From 21f91ad27c96703f17938effc0e337eb6a370dd4 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Thu, 15 Jan 2026 05:39:52 +0100 Subject: [PATCH 01/28] feat(postgres, server): add studio models and get account by ID endpoint Studio migration support: - Add StudioSessionStatus and StudioToolStatus enums - Add StudioSession, StudioToolCall, StudioOperation models with New/Update variants - Add StudioSessionRepository, StudioToolCallRepository, StudioOperationRepository - Add constraint types for all studio tables - Add error handlers for studio constraint violations Account endpoint: - Add GET /accounts/{accountId}/ endpoint to retrieve account by ID - Add accounts_share_workspace query to check workspace membership - Users can only view accounts they share a workspace with --- README.md | 9 +- crates/nvisy-postgres/src/model/mod.rs | 40 +- .../src/model/studio_operation.rs | 186 +++++++++ .../src/model/studio_session.rs | 168 ++++++++ .../src/model/studio_tool_call.rs | 155 ++++++++ crates/nvisy-postgres/src/query/mod.rs | 39 +- .../src/query/studio_operation.rs | 371 ++++++++++++++++++ .../src/query/studio_session.rs | 297 ++++++++++++++ .../src/query/studio_tool_call.rs | 310 +++++++++++++++ .../src/query/workspace_member.rs | 44 +++ .../src/types/constraint/mod.rs | 107 +++-- .../src/types/constraint/studio_operations.rs | 61 +++ .../src/types/constraint/studio_sessions.rs | 61 +++ .../src/types/constraint/studio_tool_calls.rs | 58 +++ crates/nvisy-postgres/src/types/enums/mod.rs | 6 + .../src/types/enums/studio_session_status.rs | 92 +++++ .../src/types/enums/studio_tool_status.rs | 102 +++++ crates/nvisy-postgres/src/types/mod.rs | 5 +- crates/nvisy-server/src/handler/accounts.rs | 67 +++- crates/nvisy-server/src/handler/error/mod.rs | 1 + .../src/handler/error/pg_error.rs | 3 + .../src/handler/error/pg_studio.rs | 67 ++++ .../nvisy-server/src/handler/request/paths.rs | 12 + 23 files changed, 2181 insertions(+), 80 deletions(-) create mode 100644 crates/nvisy-postgres/src/model/studio_operation.rs create mode 100644 crates/nvisy-postgres/src/model/studio_session.rs create mode 100644 crates/nvisy-postgres/src/model/studio_tool_call.rs create mode 100644 crates/nvisy-postgres/src/query/studio_operation.rs create mode 100644 crates/nvisy-postgres/src/query/studio_session.rs create mode 100644 crates/nvisy-postgres/src/query/studio_tool_call.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/studio_operations.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/studio_sessions.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/studio_tool_calls.rs create mode 100644 crates/nvisy-postgres/src/types/enums/studio_session_status.rs create mode 100644 crates/nvisy-postgres/src/types/enums/studio_tool_status.rs create mode 100644 crates/nvisy-server/src/handler/error/pg_studio.rs diff --git a/README.md b/README.md index bbbbbde..28ad0f9 100644 --- a/README.md +++ b/README.md @@ -33,14 +33,13 @@ server/ ## Quick Start ```bash -# Install tools and generate keys +# Install tools and make scripts executable make install-all -make generate-keys -# Run database migrations -make generate-migrations +# Generate keys, env and migration files +make generate-all -# Start the server +# Start the server with dotenv feature cargo run --features dotenv ``` diff --git a/crates/nvisy-postgres/src/model/mod.rs b/crates/nvisy-postgres/src/model/mod.rs index 8e4d87d..f6f546e 100644 --- a/crates/nvisy-postgres/src/model/mod.rs +++ b/crates/nvisy-postgres/src/model/mod.rs @@ -3,23 +3,27 @@ //! This module contains Diesel model definitions for all database tables, //! including structs for querying, inserting, and updating records. -pub mod account; -pub mod account_action_token; -pub mod account_api_token; -pub mod account_notification; -pub mod document; -pub mod document_annotation; -pub mod document_chunk; -pub mod document_comment; -pub mod document_file; +mod account; +mod account_action_token; +mod account_api_token; +mod account_notification; +mod document; +mod document_annotation; +mod document_chunk; +mod document_comment; +mod document_file; -pub mod workspace; -pub mod workspace_activity; -pub mod workspace_integration; -pub mod workspace_integration_run; -pub mod workspace_invite; -pub mod workspace_member; -pub mod workspace_webhook; +mod workspace; +mod workspace_activity; +mod workspace_integration; +mod workspace_integration_run; +mod workspace_invite; +mod workspace_member; +mod workspace_webhook; + +mod studio_operation; +mod studio_session; +mod studio_tool_call; // Account models pub use account::{Account, NewAccount, UpdateAccount}; @@ -52,3 +56,7 @@ pub use workspace_integration_run::{ pub use workspace_invite::{NewWorkspaceInvite, UpdateWorkspaceInvite, WorkspaceInvite}; pub use workspace_member::{NewWorkspaceMember, UpdateWorkspaceMember, WorkspaceMember}; pub use workspace_webhook::{NewWorkspaceWebhook, UpdateWorkspaceWebhook, WorkspaceWebhook}; +// Studio models +pub use studio_operation::{NewStudioOperation, StudioOperation, UpdateStudioOperation}; +pub use studio_session::{NewStudioSession, StudioSession, UpdateStudioSession}; +pub use studio_tool_call::{NewStudioToolCall, StudioToolCall, UpdateStudioToolCall}; diff --git a/crates/nvisy-postgres/src/model/studio_operation.rs b/crates/nvisy-postgres/src/model/studio_operation.rs new file mode 100644 index 0000000..b4a932c --- /dev/null +++ b/crates/nvisy-postgres/src/model/studio_operation.rs @@ -0,0 +1,186 @@ +//! Studio operation model for PostgreSQL database operations. +//! +//! This module provides models for tracking document operations (diffs) produced +//! by tool calls. Operations represent the actual changes to be applied to documents, +//! supporting apply/revert functionality for undo capabilities. +//! +//! ## Models +//! +//! - [`StudioOperation`] - Main operation model with diff details +//! - [`NewStudioOperation`] - Data structure for creating new operations +//! - [`UpdateStudioOperation`] - Data structure for updating existing operations + +use diesel::prelude::*; +use jiff_diesel::Timestamp; +use uuid::Uuid; + +use crate::schema::studio_operations; +use crate::types::HasCreatedAt; + +/// Studio operation model representing a document operation (diff). +/// +/// This model tracks individual operations produced by tool calls that can be +/// applied to or reverted from documents. Operations store position-based diffs +/// rather than content, enabling efficient undo/redo functionality. +#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] +#[diesel(table_name = studio_operations)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct StudioOperation { + /// Unique operation identifier. + pub id: Uuid, + /// Reference to the tool call that produced this operation. + pub tool_call_id: Uuid, + /// Reference to the file being modified. + pub file_id: Uuid, + /// Optional reference to a specific chunk within the file. + pub chunk_id: Option, + /// Type of operation (insert, replace, delete, format, merge, split, etc.). + pub operation_type: String, + /// The diff specification as JSON (positions, not content). + pub operation_diff: serde_json::Value, + /// Whether this operation has been applied to the document. + pub applied: bool, + /// Whether this operation was reverted by the user. + pub reverted: bool, + /// Timestamp when the operation was created. + pub created_at: Timestamp, + /// Timestamp when the operation was applied. + pub applied_at: Option, +} + +/// Data structure for creating a new studio operation. +/// +/// Contains all the information necessary to record a new document operation. +/// Operations are created as unapplied by default and can be applied later. +#[derive(Debug, Clone, Insertable)] +#[diesel(table_name = studio_operations)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct NewStudioOperation { + /// Reference to the tool call that produced this operation. + pub tool_call_id: Uuid, + /// Reference to the file being modified. + pub file_id: Uuid, + /// Optional reference to a specific chunk. + pub chunk_id: Option, + /// Type of operation. + pub operation_type: String, + /// The diff specification as JSON. + pub operation_diff: Option, + /// Optional initial applied state. + pub applied: Option, + /// Optional initial reverted state. + pub reverted: Option, +} + +/// Data structure for updating an existing studio operation. +/// +/// Contains optional fields for modifying operation properties. Primarily +/// used to mark operations as applied or reverted. +#[derive(Debug, Clone, Default, AsChangeset)] +#[diesel(table_name = studio_operations)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct UpdateStudioOperation { + /// Updated applied state. + pub applied: Option, + /// Updated reverted state. + pub reverted: Option, + /// Updated applied timestamp. + pub applied_at: Option>, +} + +impl StudioOperation { + /// Returns whether the operation has been applied. + #[inline] + pub fn is_applied(&self) -> bool { + self.applied + } + + /// Returns whether the operation has been reverted. + #[inline] + pub fn is_reverted(&self) -> bool { + self.reverted + } + + /// Returns whether the operation is pending (not yet applied). + #[inline] + pub fn is_pending(&self) -> bool { + !self.applied + } + + /// Returns whether the operation can be applied. + #[inline] + pub fn can_apply(&self) -> bool { + !self.applied + } + + /// Returns whether the operation can be reverted. + #[inline] + pub fn can_revert(&self) -> bool { + self.applied && !self.reverted + } + + /// Returns whether the operation targets a specific chunk. + #[inline] + pub fn has_chunk(&self) -> bool { + self.chunk_id.is_some() + } + + /// Returns whether the operation has diff data. + pub fn has_diff(&self) -> bool { + !self + .operation_diff + .as_object() + .is_none_or(|obj| obj.is_empty()) + } + + /// Returns the time between creation and application, if applied. + pub fn time_to_apply(&self) -> Option { + self.applied_at.map(|applied| { + let created: jiff::Timestamp = self.created_at.into(); + let applied: jiff::Timestamp = applied.into(); + applied.since(created).unwrap_or_default() + }) + } + + /// Returns whether this is an insert operation. + #[inline] + pub fn is_insert(&self) -> bool { + self.operation_type == "insert" + } + + /// Returns whether this is a replace operation. + #[inline] + pub fn is_replace(&self) -> bool { + self.operation_type == "replace" + } + + /// Returns whether this is a delete operation. + #[inline] + pub fn is_delete(&self) -> bool { + self.operation_type == "delete" + } + + /// Returns whether this is a format operation. + #[inline] + pub fn is_format(&self) -> bool { + self.operation_type == "format" + } + + /// Returns whether this is a merge operation. + #[inline] + pub fn is_merge(&self) -> bool { + self.operation_type == "merge" + } + + /// Returns whether this is a split operation. + #[inline] + pub fn is_split(&self) -> bool { + self.operation_type == "split" + } +} + +impl HasCreatedAt for StudioOperation { + fn created_at(&self) -> jiff::Timestamp { + self.created_at.into() + } +} diff --git a/crates/nvisy-postgres/src/model/studio_session.rs b/crates/nvisy-postgres/src/model/studio_session.rs new file mode 100644 index 0000000..970b6ec --- /dev/null +++ b/crates/nvisy-postgres/src/model/studio_session.rs @@ -0,0 +1,168 @@ +//! Studio session model for PostgreSQL database operations. +//! +//! This module provides models for managing LLM-assisted document editing sessions. +//! Sessions track the interaction between users and AI models during document editing, +//! including message counts, token usage, and model configuration. +//! +//! ## Models +//! +//! - [`StudioSession`] - Main session model with full configuration and status +//! - [`NewStudioSession`] - Data structure for creating new sessions +//! - [`UpdateStudioSession`] - Data structure for updating existing sessions + +use diesel::prelude::*; +use jiff_diesel::Timestamp; +use uuid::Uuid; + +use crate::schema::studio_sessions; +use crate::types::{HasCreatedAt, HasOwnership, HasUpdatedAt, StudioSessionStatus}; + +/// Studio session model representing an LLM-assisted document editing session. +/// +/// This model manages the lifecycle of editing sessions where users interact with +/// AI models to edit documents. Each session tracks the primary file being edited, +/// model configuration, and usage statistics like message and token counts. +#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] +#[diesel(table_name = studio_sessions)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct StudioSession { + /// Unique session identifier. + pub id: Uuid, + /// Reference to the workspace this session belongs to. + pub workspace_id: Uuid, + /// Account that created and owns this session. + pub account_id: Uuid, + /// Primary file being edited in this session. + pub primary_file_id: Uuid, + /// User-friendly session name. + pub display_name: String, + /// Current lifecycle status of the session. + pub session_status: StudioSessionStatus, + /// LLM configuration (model, temperature, max tokens, etc.). + pub model_config: serde_json::Value, + /// Total number of messages exchanged in this session. + pub message_count: i32, + /// Total tokens used in this session. + pub token_count: i32, + /// Timestamp when this session was created. + pub created_at: Timestamp, + /// Timestamp when this session was last modified. + pub updated_at: Timestamp, +} + +/// Data structure for creating a new studio session. +/// +/// Contains all the information necessary to create a new editing session. +/// Most fields have sensible defaults, allowing sessions to be created with +/// minimal required information. +#[derive(Debug, Clone, Insertable)] +#[diesel(table_name = studio_sessions)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct NewStudioSession { + /// Reference to the workspace this session will belong to. + pub workspace_id: Uuid, + /// Account creating this session. + pub account_id: Uuid, + /// Primary file to be edited in this session. + pub primary_file_id: Uuid, + /// Optional user-friendly session name. + pub display_name: Option, + /// Optional initial session status. + pub session_status: Option, + /// Optional LLM configuration. + pub model_config: Option, +} + +/// Data structure for updating an existing studio session. +/// +/// Contains optional fields for modifying session properties. Only the +/// fields that need to be changed should be set to Some(value), while +/// unchanged fields remain None to preserve their current values. +#[derive(Debug, Clone, Default, AsChangeset)] +#[diesel(table_name = studio_sessions)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct UpdateStudioSession { + /// Updated session display name. + pub display_name: Option, + /// Updated session status. + pub session_status: Option, + /// Updated LLM configuration. + pub model_config: Option, + /// Updated message count. + pub message_count: Option, + /// Updated token count. + pub token_count: Option, +} + +impl StudioSession { + /// Returns whether the session is currently active. + #[inline] + pub fn is_active(&self) -> bool { + self.session_status.is_active() + } + + /// Returns whether the session is paused. + #[inline] + pub fn is_paused(&self) -> bool { + self.session_status.is_paused() + } + + /// Returns whether the session is archived. + #[inline] + pub fn is_archived(&self) -> bool { + self.session_status.is_archived() + } + + /// Returns whether the session can accept new input. + #[inline] + pub fn can_accept_input(&self) -> bool { + self.session_status.can_accept_input() + } + + /// Returns whether the session has any messages. + #[inline] + pub fn has_messages(&self) -> bool { + self.message_count > 0 + } + + /// Returns whether the session has used any tokens. + #[inline] + pub fn has_token_usage(&self) -> bool { + self.token_count > 0 + } + + /// Returns whether the session has model configuration. + pub fn has_model_config(&self) -> bool { + !self + .model_config + .as_object() + .is_none_or(|obj| obj.is_empty()) + } + + /// Returns the average tokens per message, if any messages exist. + pub fn avg_tokens_per_message(&self) -> Option { + if self.message_count > 0 { + Some(self.token_count as f64 / self.message_count as f64) + } else { + None + } + } +} + +impl HasCreatedAt for StudioSession { + fn created_at(&self) -> jiff::Timestamp { + self.created_at.into() + } +} + +impl HasUpdatedAt for StudioSession { + fn updated_at(&self) -> jiff::Timestamp { + self.updated_at.into() + } +} + +impl HasOwnership for StudioSession { + fn created_by(&self) -> Uuid { + self.account_id + } +} diff --git a/crates/nvisy-postgres/src/model/studio_tool_call.rs b/crates/nvisy-postgres/src/model/studio_tool_call.rs new file mode 100644 index 0000000..38386c4 --- /dev/null +++ b/crates/nvisy-postgres/src/model/studio_tool_call.rs @@ -0,0 +1,155 @@ +//! Studio tool call model for PostgreSQL database operations. +//! +//! This module provides models for tracking tool invocations within studio sessions. +//! Tool calls represent individual operations performed by the LLM, such as +//! merging, splitting, redacting, or translating document content. +//! +//! ## Models +//! +//! - [`StudioToolCall`] - Main tool call model with execution details +//! - [`NewStudioToolCall`] - Data structure for creating new tool calls +//! - [`UpdateStudioToolCall`] - Data structure for updating existing tool calls + +use diesel::prelude::*; +use jiff_diesel::Timestamp; +use uuid::Uuid; + +use crate::schema::studio_tool_calls; +use crate::types::{HasCreatedAt, StudioToolStatus}; + +/// Studio tool call model representing a tool invocation within a session. +/// +/// This model tracks individual tool calls made during editing sessions, +/// including the tool name, input parameters, output results, and execution +/// status. Tool calls are linked to specific files and optionally to chunks. +#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] +#[diesel(table_name = studio_tool_calls)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct StudioToolCall { + /// Unique tool call identifier. + pub id: Uuid, + /// Reference to the studio session this tool call belongs to. + pub session_id: Uuid, + /// Reference to the file being operated on. + pub file_id: Uuid, + /// Optional reference to a specific chunk within the file. + pub chunk_id: Option, + /// Name of the tool being invoked. + pub tool_name: String, + /// Tool input parameters as JSON. + pub tool_input: serde_json::Value, + /// Tool output results as JSON. + pub tool_output: serde_json::Value, + /// Current execution status of the tool call. + pub tool_status: StudioToolStatus, + /// Timestamp when the tool call was created/started. + pub started_at: Timestamp, + /// Timestamp when the tool execution completed. + pub completed_at: Option, +} + +/// Data structure for creating a new studio tool call. +/// +/// Contains all the information necessary to record a new tool invocation. +/// The tool status defaults to pending, and output is populated upon completion. +#[derive(Debug, Clone, Insertable)] +#[diesel(table_name = studio_tool_calls)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct NewStudioToolCall { + /// Reference to the studio session. + pub session_id: Uuid, + /// Reference to the file being operated on. + pub file_id: Uuid, + /// Optional reference to a specific chunk. + pub chunk_id: Option, + /// Name of the tool being invoked. + pub tool_name: String, + /// Tool input parameters as JSON. + pub tool_input: Option, + /// Optional initial tool output. + pub tool_output: Option, + /// Optional initial tool status. + pub tool_status: Option, +} + +/// Data structure for updating an existing studio tool call. +/// +/// Contains optional fields for modifying tool call properties. Primarily +/// used to update the status and output upon completion or cancellation. +#[derive(Debug, Clone, Default, AsChangeset)] +#[diesel(table_name = studio_tool_calls)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct UpdateStudioToolCall { + /// Updated tool output results. + pub tool_output: Option, + /// Updated execution status. + pub tool_status: Option, + /// Updated completion timestamp. + pub completed_at: Option>, +} + +impl StudioToolCall { + /// Returns whether the tool call is pending execution. + #[inline] + pub fn is_pending(&self) -> bool { + self.tool_status.is_pending() + } + + /// Returns whether the tool is currently running. + #[inline] + pub fn is_running(&self) -> bool { + self.tool_status.is_running() + } + + /// Returns whether the tool execution completed successfully. + #[inline] + pub fn is_completed(&self) -> bool { + self.tool_status.is_completed() + } + + /// Returns whether the tool execution was cancelled. + #[inline] + pub fn is_cancelled(&self) -> bool { + self.tool_status.is_cancelled() + } + + /// Returns whether the tool is in a final state. + #[inline] + pub fn is_final(&self) -> bool { + self.tool_status.is_final() + } + + /// Returns whether the tool call targets a specific chunk. + #[inline] + pub fn has_chunk(&self) -> bool { + self.chunk_id.is_some() + } + + /// Returns whether the tool has input parameters. + pub fn has_input(&self) -> bool { + !self.tool_input.as_object().is_none_or(|obj| obj.is_empty()) + } + + /// Returns whether the tool has output results. + pub fn has_output(&self) -> bool { + !self + .tool_output + .as_object() + .is_none_or(|obj| obj.is_empty()) + } + + /// Returns the execution duration if the tool has completed. + pub fn execution_duration(&self) -> Option { + self.completed_at.map(|completed| { + let started: jiff::Timestamp = self.started_at.into(); + let completed: jiff::Timestamp = completed.into(); + completed.since(started).unwrap_or_default() + }) + } +} + +impl HasCreatedAt for StudioToolCall { + fn created_at(&self) -> jiff::Timestamp { + self.started_at.into() + } +} diff --git a/crates/nvisy-postgres/src/query/mod.rs b/crates/nvisy-postgres/src/query/mod.rs index 1e727ea..b3aeae2 100644 --- a/crates/nvisy-postgres/src/query/mod.rs +++ b/crates/nvisy-postgres/src/query/mod.rs @@ -13,24 +13,28 @@ //! [`CursorPagination`]: crate::types::CursorPagination //! [`OffsetPagination`]: crate::types::OffsetPagination -pub mod account; -pub mod account_action_token; -pub mod account_api_token; -pub mod account_notification; +mod account; +mod account_action_token; +mod account_api_token; +mod account_notification; -pub mod document; -pub mod document_annotation; -pub mod document_chunk; -pub mod document_comment; -pub mod document_file; +mod document; +mod document_annotation; +mod document_chunk; +mod document_comment; +mod document_file; -pub mod workspace; -pub mod workspace_activity; -pub mod workspace_integration; -pub mod workspace_integration_run; -pub mod workspace_invite; -pub mod workspace_member; -pub mod workspace_webhook; +mod workspace; +mod workspace_activity; +mod workspace_integration; +mod workspace_integration_run; +mod workspace_invite; +mod workspace_member; +mod workspace_webhook; + +mod studio_operation; +mod studio_session; +mod studio_tool_call; pub use account::AccountRepository; pub use account_action_token::AccountActionTokenRepository; @@ -41,6 +45,9 @@ pub use document_annotation::DocumentAnnotationRepository; pub use document_chunk::DocumentChunkRepository; pub use document_comment::DocumentCommentRepository; pub use document_file::DocumentFileRepository; +pub use studio_operation::{FileOperationCounts, StudioOperationRepository}; +pub use studio_session::StudioSessionRepository; +pub use studio_tool_call::StudioToolCallRepository; pub use workspace::WorkspaceRepository; pub use workspace_activity::WorkspaceActivityRepository; pub use workspace_integration::WorkspaceIntegrationRepository; diff --git a/crates/nvisy-postgres/src/query/studio_operation.rs b/crates/nvisy-postgres/src/query/studio_operation.rs new file mode 100644 index 0000000..715cb01 --- /dev/null +++ b/crates/nvisy-postgres/src/query/studio_operation.rs @@ -0,0 +1,371 @@ +//! Studio operation repository for managing document operations (diffs). + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use uuid::Uuid; + +use crate::model::{NewStudioOperation, StudioOperation, UpdateStudioOperation}; +use crate::types::{CursorPage, CursorPagination, OffsetPagination}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for studio operation database operations. +/// +/// Handles document operation tracking including CRUD operations, apply/revert +/// state management, and querying by tool call or file. +pub trait StudioOperationRepository { + /// Creates a new studio operation. + fn create_studio_operation( + &mut self, + operation: NewStudioOperation, + ) -> impl Future> + Send; + + /// Creates multiple studio operations in a batch. + fn create_studio_operations( + &mut self, + operations: Vec, + ) -> impl Future>> + Send; + + /// Finds a studio operation by its unique identifier. + fn find_studio_operation_by_id( + &mut self, + operation_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates an existing studio operation. + fn update_studio_operation( + &mut self, + operation_id: Uuid, + changes: UpdateStudioOperation, + ) -> impl Future> + Send; + + /// Deletes a studio operation. + fn delete_studio_operation( + &mut self, + operation_id: Uuid, + ) -> impl Future> + Send; + + /// Lists operations for a tool call. + fn list_tool_call_operations( + &mut self, + tool_call_id: Uuid, + ) -> impl Future>> + Send; + + /// Lists operations for a file with offset pagination. + fn offset_list_file_operations( + &mut self, + file_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists operations for a file with cursor pagination. + fn cursor_list_file_operations( + &mut self, + file_id: Uuid, + pagination: CursorPagination, + ) -> impl Future>> + Send; + + /// Lists pending (unapplied) operations for a file. + fn list_pending_file_operations( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Marks an operation as applied. + fn apply_studio_operation( + &mut self, + operation_id: Uuid, + ) -> impl Future> + Send; + + /// Marks multiple operations as applied. + fn apply_studio_operations( + &mut self, + operation_ids: Vec, + ) -> impl Future>> + Send; + + /// Marks an operation as reverted. + fn revert_studio_operation( + &mut self, + operation_id: Uuid, + ) -> impl Future> + Send; + + /// Counts operations by status for a file. + fn count_file_operations( + &mut self, + file_id: Uuid, + ) -> impl Future> + Send; +} + +/// Counts of operations by status for a file. +#[derive(Debug, Clone, Default)] +pub struct FileOperationCounts { + /// Total number of operations. + pub total: i64, + /// Number of applied operations. + pub applied: i64, + /// Number of pending (unapplied) operations. + pub pending: i64, + /// Number of reverted operations. + pub reverted: i64, +} + +impl StudioOperationRepository for PgConnection { + async fn create_studio_operation( + &mut self, + operation: NewStudioOperation, + ) -> PgResult { + use schema::studio_operations; + + let operation = diesel::insert_into(studio_operations::table) + .values(&operation) + .returning(StudioOperation::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(operation) + } + + async fn create_studio_operations( + &mut self, + operations: Vec, + ) -> PgResult> { + use schema::studio_operations; + + let operations = diesel::insert_into(studio_operations::table) + .values(&operations) + .returning(StudioOperation::as_returning()) + .get_results(self) + .await + .map_err(PgError::from)?; + + Ok(operations) + } + + async fn find_studio_operation_by_id( + &mut self, + operation_id: Uuid, + ) -> PgResult> { + use schema::studio_operations::dsl::*; + + let operation = studio_operations + .filter(id.eq(operation_id)) + .select(StudioOperation::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(operation) + } + + async fn update_studio_operation( + &mut self, + operation_id: Uuid, + changes: UpdateStudioOperation, + ) -> PgResult { + use schema::studio_operations::dsl::*; + + let operation = diesel::update(studio_operations) + .filter(id.eq(operation_id)) + .set(&changes) + .returning(StudioOperation::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(operation) + } + + async fn delete_studio_operation(&mut self, operation_id: Uuid) -> PgResult<()> { + use schema::studio_operations::dsl::*; + + diesel::delete(studio_operations) + .filter(id.eq(operation_id)) + .execute(self) + .await + .map_err(PgError::from)?; + + Ok(()) + } + + async fn list_tool_call_operations(&mut self, tc_id: Uuid) -> PgResult> { + use schema::studio_operations::{self, dsl}; + + let operations = studio_operations::table + .filter(dsl::tool_call_id.eq(tc_id)) + .select(StudioOperation::as_select()) + .order(dsl::created_at.asc()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(operations) + } + + async fn offset_list_file_operations( + &mut self, + f_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::studio_operations::{self, dsl}; + + let operations = studio_operations::table + .filter(dsl::file_id.eq(f_id)) + .select(StudioOperation::as_select()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(operations) + } + + async fn cursor_list_file_operations( + &mut self, + f_id: Uuid, + pagination: CursorPagination, + ) -> PgResult> { + use schema::studio_operations::{self, dsl}; + + let total = if pagination.include_count { + Some( + studio_operations::table + .filter(dsl::file_id.eq(f_id)) + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + studio_operations::table + .filter(dsl::file_id.eq(f_id)) + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(StudioOperation::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + studio_operations::table + .filter(dsl::file_id.eq(f_id)) + .select(StudioOperation::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |op: &StudioOperation| (op.created_at.into(), op.id), + )) + } + + async fn list_pending_file_operations(&mut self, f_id: Uuid) -> PgResult> { + use schema::studio_operations::{self, dsl}; + + let operations = studio_operations::table + .filter(dsl::file_id.eq(f_id)) + .filter(dsl::applied.eq(false)) + .select(StudioOperation::as_select()) + .order(dsl::created_at.asc()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(operations) + } + + async fn apply_studio_operation(&mut self, operation_id: Uuid) -> PgResult { + let changes = UpdateStudioOperation { + applied: Some(true), + applied_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), + ..Default::default() + }; + + self.update_studio_operation(operation_id, changes).await + } + + async fn apply_studio_operations( + &mut self, + operation_ids: Vec, + ) -> PgResult> { + use schema::studio_operations::dsl::*; + + let now = jiff_diesel::Timestamp::from(jiff::Timestamp::now()); + + let operations = diesel::update(studio_operations) + .filter(id.eq_any(&operation_ids)) + .set((applied.eq(true), applied_at.eq(Some(now)))) + .returning(StudioOperation::as_returning()) + .get_results(self) + .await + .map_err(PgError::from)?; + + Ok(operations) + } + + async fn revert_studio_operation(&mut self, operation_id: Uuid) -> PgResult { + let changes = UpdateStudioOperation { + reverted: Some(true), + ..Default::default() + }; + + self.update_studio_operation(operation_id, changes).await + } + + async fn count_file_operations(&mut self, f_id: Uuid) -> PgResult { + use diesel::dsl::count_star; + use schema::studio_operations::{self, dsl}; + + let total = studio_operations::table + .filter(dsl::file_id.eq(f_id)) + .select(count_star()) + .get_result::(self) + .await + .map_err(PgError::from)?; + + let applied_count = studio_operations::table + .filter(dsl::file_id.eq(f_id)) + .filter(dsl::applied.eq(true)) + .select(count_star()) + .get_result::(self) + .await + .map_err(PgError::from)?; + + let reverted_count = studio_operations::table + .filter(dsl::file_id.eq(f_id)) + .filter(dsl::reverted.eq(true)) + .select(count_star()) + .get_result::(self) + .await + .map_err(PgError::from)?; + + Ok(FileOperationCounts { + total, + applied: applied_count, + pending: total - applied_count, + reverted: reverted_count, + }) + } +} diff --git a/crates/nvisy-postgres/src/query/studio_session.rs b/crates/nvisy-postgres/src/query/studio_session.rs new file mode 100644 index 0000000..6fe0501 --- /dev/null +++ b/crates/nvisy-postgres/src/query/studio_session.rs @@ -0,0 +1,297 @@ +//! Studio session repository for managing LLM-assisted editing sessions. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use uuid::Uuid; + +use crate::model::{NewStudioSession, StudioSession, UpdateStudioSession}; +use crate::types::{CursorPage, CursorPagination, OffsetPagination, StudioSessionStatus}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for studio session database operations. +/// +/// Handles LLM-assisted editing session management including CRUD operations, +/// status tracking, and usage statistics updates. +pub trait StudioSessionRepository { + /// Creates a new studio session with the provided configuration. + fn create_studio_session( + &mut self, + session: NewStudioSession, + ) -> impl Future> + Send; + + /// Finds a studio session by its unique identifier. + fn find_studio_session_by_id( + &mut self, + session_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates an existing studio session. + fn update_studio_session( + &mut self, + session_id: Uuid, + changes: UpdateStudioSession, + ) -> impl Future> + Send; + + /// Deletes a studio session by archiving it. + fn delete_studio_session( + &mut self, + session_id: Uuid, + ) -> impl Future> + Send; + + /// Lists studio sessions for a workspace with offset pagination. + fn offset_list_studio_sessions( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists studio sessions for a workspace with cursor pagination. + fn cursor_list_studio_sessions( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + ) -> impl Future>> + Send; + + /// Lists studio sessions for an account with offset pagination. + fn offset_list_account_studio_sessions( + &mut self, + account_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists active studio sessions for a file. + fn list_file_studio_sessions( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates the status of a studio session. + fn update_studio_session_status( + &mut self, + session_id: Uuid, + new_status: StudioSessionStatus, + ) -> impl Future> + Send; + + /// Increments the message and token counts for a session. + fn increment_studio_session_usage( + &mut self, + session_id: Uuid, + messages: i32, + tokens: i32, + ) -> impl Future> + Send; +} + +impl StudioSessionRepository for PgConnection { + async fn create_studio_session( + &mut self, + session: NewStudioSession, + ) -> PgResult { + use schema::studio_sessions; + + let session = diesel::insert_into(studio_sessions::table) + .values(&session) + .returning(StudioSession::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(session) + } + + async fn find_studio_session_by_id( + &mut self, + session_id: Uuid, + ) -> PgResult> { + use schema::studio_sessions::dsl::*; + + let session = studio_sessions + .filter(id.eq(session_id)) + .select(StudioSession::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(session) + } + + async fn update_studio_session( + &mut self, + session_id: Uuid, + changes: UpdateStudioSession, + ) -> PgResult { + use schema::studio_sessions::dsl::*; + + let session = diesel::update(studio_sessions) + .filter(id.eq(session_id)) + .set(&changes) + .returning(StudioSession::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(session) + } + + async fn delete_studio_session(&mut self, session_id: Uuid) -> PgResult<()> { + use schema::studio_sessions::dsl::*; + + diesel::update(studio_sessions) + .filter(id.eq(session_id)) + .set(session_status.eq(StudioSessionStatus::Archived)) + .execute(self) + .await + .map_err(PgError::from)?; + + Ok(()) + } + + async fn offset_list_studio_sessions( + &mut self, + ws_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::studio_sessions::{self, dsl}; + + let sessions = studio_sessions::table + .filter(dsl::workspace_id.eq(ws_id)) + .select(StudioSession::as_select()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(sessions) + } + + async fn cursor_list_studio_sessions( + &mut self, + ws_id: Uuid, + pagination: CursorPagination, + ) -> PgResult> { + use schema::studio_sessions::{self, dsl}; + + let total = if pagination.include_count { + Some( + studio_sessions::table + .filter(dsl::workspace_id.eq(ws_id)) + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + studio_sessions::table + .filter(dsl::workspace_id.eq(ws_id)) + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(StudioSession::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + studio_sessions::table + .filter(dsl::workspace_id.eq(ws_id)) + .select(StudioSession::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |s: &StudioSession| (s.created_at.into(), s.id), + )) + } + + async fn offset_list_account_studio_sessions( + &mut self, + acc_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::studio_sessions::{self, dsl}; + + let sessions = studio_sessions::table + .filter(dsl::account_id.eq(acc_id)) + .select(StudioSession::as_select()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(sessions) + } + + async fn list_file_studio_sessions(&mut self, file_id: Uuid) -> PgResult> { + use schema::studio_sessions::{self, dsl}; + + let sessions = studio_sessions::table + .filter(dsl::primary_file_id.eq(file_id)) + .filter(dsl::session_status.ne(StudioSessionStatus::Archived)) + .select(StudioSession::as_select()) + .order(dsl::created_at.desc()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(sessions) + } + + async fn update_studio_session_status( + &mut self, + session_id: Uuid, + new_status: StudioSessionStatus, + ) -> PgResult { + let changes = UpdateStudioSession { + session_status: Some(new_status), + ..Default::default() + }; + + self.update_studio_session(session_id, changes).await + } + + async fn increment_studio_session_usage( + &mut self, + session_id: Uuid, + messages: i32, + tokens: i32, + ) -> PgResult { + use schema::studio_sessions::dsl::*; + + let session = diesel::update(studio_sessions) + .filter(id.eq(session_id)) + .set(( + message_count.eq(message_count + messages), + token_count.eq(token_count + tokens), + )) + .returning(StudioSession::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(session) + } +} diff --git a/crates/nvisy-postgres/src/query/studio_tool_call.rs b/crates/nvisy-postgres/src/query/studio_tool_call.rs new file mode 100644 index 0000000..fa91afe --- /dev/null +++ b/crates/nvisy-postgres/src/query/studio_tool_call.rs @@ -0,0 +1,310 @@ +//! Studio tool call repository for managing tool invocations within sessions. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use uuid::Uuid; + +use crate::model::{NewStudioToolCall, StudioToolCall, UpdateStudioToolCall}; +use crate::types::{CursorPage, CursorPagination, OffsetPagination, StudioToolStatus}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for studio tool call database operations. +/// +/// Handles tool invocation tracking including CRUD operations, status updates, +/// and querying by session, file, or status. +pub trait StudioToolCallRepository { + /// Creates a new studio tool call. + fn create_studio_tool_call( + &mut self, + tool_call: NewStudioToolCall, + ) -> impl Future> + Send; + + /// Finds a studio tool call by its unique identifier. + fn find_studio_tool_call_by_id( + &mut self, + tool_call_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates an existing studio tool call. + fn update_studio_tool_call( + &mut self, + tool_call_id: Uuid, + changes: UpdateStudioToolCall, + ) -> impl Future> + Send; + + /// Deletes a studio tool call. + fn delete_studio_tool_call( + &mut self, + tool_call_id: Uuid, + ) -> impl Future> + Send; + + /// Lists tool calls for a session with offset pagination. + fn offset_list_session_tool_calls( + &mut self, + session_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists tool calls for a session with cursor pagination. + fn cursor_list_session_tool_calls( + &mut self, + session_id: Uuid, + pagination: CursorPagination, + ) -> impl Future>> + Send; + + /// Lists tool calls for a file with offset pagination. + fn offset_list_file_tool_calls( + &mut self, + file_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists pending or running tool calls for a session. + fn list_active_session_tool_calls( + &mut self, + session_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates the status of a tool call. + fn update_studio_tool_call_status( + &mut self, + tool_call_id: Uuid, + new_status: StudioToolStatus, + ) -> impl Future> + Send; + + /// Marks a tool call as completed with the given output. + fn complete_studio_tool_call( + &mut self, + tool_call_id: Uuid, + output: serde_json::Value, + ) -> impl Future> + Send; + + /// Cancels a pending or running tool call. + fn cancel_studio_tool_call( + &mut self, + tool_call_id: Uuid, + ) -> impl Future> + Send; +} + +impl StudioToolCallRepository for PgConnection { + async fn create_studio_tool_call( + &mut self, + tool_call: NewStudioToolCall, + ) -> PgResult { + use schema::studio_tool_calls; + + let tool_call = diesel::insert_into(studio_tool_calls::table) + .values(&tool_call) + .returning(StudioToolCall::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(tool_call) + } + + async fn find_studio_tool_call_by_id( + &mut self, + tool_call_id: Uuid, + ) -> PgResult> { + use schema::studio_tool_calls::dsl::*; + + let tool_call = studio_tool_calls + .filter(id.eq(tool_call_id)) + .select(StudioToolCall::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(tool_call) + } + + async fn update_studio_tool_call( + &mut self, + tool_call_id: Uuid, + changes: UpdateStudioToolCall, + ) -> PgResult { + use schema::studio_tool_calls::dsl::*; + + let tool_call = diesel::update(studio_tool_calls) + .filter(id.eq(tool_call_id)) + .set(&changes) + .returning(StudioToolCall::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(tool_call) + } + + async fn delete_studio_tool_call(&mut self, tool_call_id: Uuid) -> PgResult<()> { + use schema::studio_tool_calls::dsl::*; + + diesel::delete(studio_tool_calls) + .filter(id.eq(tool_call_id)) + .execute(self) + .await + .map_err(PgError::from)?; + + Ok(()) + } + + async fn offset_list_session_tool_calls( + &mut self, + sess_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::studio_tool_calls::{self, dsl}; + + let tool_calls = studio_tool_calls::table + .filter(dsl::session_id.eq(sess_id)) + .select(StudioToolCall::as_select()) + .order(dsl::started_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(tool_calls) + } + + async fn cursor_list_session_tool_calls( + &mut self, + sess_id: Uuid, + pagination: CursorPagination, + ) -> PgResult> { + use schema::studio_tool_calls::{self, dsl}; + + let total = if pagination.include_count { + Some( + studio_tool_calls::table + .filter(dsl::session_id.eq(sess_id)) + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + studio_tool_calls::table + .filter(dsl::session_id.eq(sess_id)) + .filter( + dsl::started_at + .lt(&cursor_time) + .or(dsl::started_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(StudioToolCall::as_select()) + .order((dsl::started_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + studio_tool_calls::table + .filter(dsl::session_id.eq(sess_id)) + .select(StudioToolCall::as_select()) + .order((dsl::started_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |tc: &StudioToolCall| (tc.started_at.into(), tc.id), + )) + } + + async fn offset_list_file_tool_calls( + &mut self, + f_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::studio_tool_calls::{self, dsl}; + + let tool_calls = studio_tool_calls::table + .filter(dsl::file_id.eq(f_id)) + .select(StudioToolCall::as_select()) + .order(dsl::started_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(tool_calls) + } + + async fn list_active_session_tool_calls( + &mut self, + sess_id: Uuid, + ) -> PgResult> { + use schema::studio_tool_calls::{self, dsl}; + + let tool_calls = studio_tool_calls::table + .filter(dsl::session_id.eq(sess_id)) + .filter( + dsl::tool_status + .eq(StudioToolStatus::Pending) + .or(dsl::tool_status.eq(StudioToolStatus::Running)), + ) + .select(StudioToolCall::as_select()) + .order(dsl::started_at.asc()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(tool_calls) + } + + async fn update_studio_tool_call_status( + &mut self, + tool_call_id: Uuid, + new_status: StudioToolStatus, + ) -> PgResult { + let changes = UpdateStudioToolCall { + tool_status: Some(new_status), + ..Default::default() + }; + + self.update_studio_tool_call(tool_call_id, changes).await + } + + async fn complete_studio_tool_call( + &mut self, + tool_call_id: Uuid, + output: serde_json::Value, + ) -> PgResult { + let changes = UpdateStudioToolCall { + tool_output: Some(output), + tool_status: Some(StudioToolStatus::Completed), + completed_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), + }; + + self.update_studio_tool_call(tool_call_id, changes).await + } + + async fn cancel_studio_tool_call(&mut self, tool_call_id: Uuid) -> PgResult { + let changes = UpdateStudioToolCall { + tool_status: Some(StudioToolStatus::Cancelled), + completed_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), + ..Default::default() + }; + + self.update_studio_tool_call(tool_call_id, changes).await + } +} diff --git a/crates/nvisy-postgres/src/query/workspace_member.rs b/crates/nvisy-postgres/src/query/workspace_member.rs index 7f7c91a..c6bbe36 100644 --- a/crates/nvisy-postgres/src/query/workspace_member.rs +++ b/crates/nvisy-postgres/src/query/workspace_member.rs @@ -149,6 +149,16 @@ pub trait WorkspaceMemberRepository { workspace_id: Uuid, email: &str, ) -> impl Future>> + Send; + + /// Checks if two accounts share at least one common workspace. + /// + /// Returns true if both accounts are members of at least one common workspace. + /// This is an optimized query that stops at the first match. + fn accounts_share_workspace( + &mut self, + account_id_a: Uuid, + account_id_b: Uuid, + ) -> impl Future> + Send; } impl WorkspaceMemberRepository for PgConnection { @@ -680,4 +690,38 @@ impl WorkspaceMemberRepository for PgConnection { Ok(result) } + + async fn accounts_share_workspace( + &mut self, + account_id_a: Uuid, + account_id_b: Uuid, + ) -> PgResult { + use diesel::dsl::exists; + use schema::workspace_members; + + // Self-check: an account always "shares" with itself + if account_id_a == account_id_b { + return Ok(true); + } + + // Use EXISTS with a self-join to find any common workspace + // This is optimized to stop at the first match + let wm_a = workspace_members::table; + let wm_b = diesel::alias!(workspace_members as wm_b); + + let shares = diesel::select(exists( + wm_a.inner_join( + wm_b.on(wm_b + .field(workspace_members::workspace_id) + .eq(workspace_members::workspace_id)), + ) + .filter(workspace_members::account_id.eq(account_id_a)) + .filter(wm_b.field(workspace_members::account_id).eq(account_id_b)), + )) + .get_result::(self) + .await + .map_err(PgError::from)?; + + Ok(shares) + } } diff --git a/crates/nvisy-postgres/src/types/constraint/mod.rs b/crates/nvisy-postgres/src/types/constraint/mod.rs index a53c34e..06e4824 100644 --- a/crates/nvisy-postgres/src/types/constraint/mod.rs +++ b/crates/nvisy-postgres/src/types/constraint/mod.rs @@ -4,51 +4,57 @@ //! organized into logical groups for better maintainability. // Account-related constraint modules -pub mod account_action_tokens; -pub mod account_api_tokens; -pub mod account_notifications; -pub mod accounts; +mod account_action_tokens; +mod account_api_tokens; +mod account_notifications; +mod accounts; // Workspace-related constraint modules -pub mod workspace_activities; -pub mod workspace_integrations; -pub mod workspace_invites; -pub mod workspace_members; -pub mod workspace_webhooks; -pub mod workspaces; +mod workspace_activities; +mod workspace_integration_runs; +mod workspace_integrations; +mod workspace_invites; +mod workspace_members; +mod workspace_webhooks; +mod workspaces; // Document-related constraint modules -pub mod document_annotations; -pub mod document_chunks; -pub mod document_comments; -pub mod document_files; -pub mod document_versions; -pub mod documents; - -// Workspace run constraint modules -pub mod workspace_integration_runs; +mod document_annotations; +mod document_chunks; +mod document_comments; +mod document_files; +mod document_versions; +mod documents; + +// Studio-related constraint modules +mod studio_operations; +mod studio_sessions; +mod studio_tool_calls; use std::fmt; -pub use account_action_tokens::AccountActionTokenConstraints; -pub use account_api_tokens::AccountApiTokenConstraints; -// Re-export all constraint types for convenience -pub use account_notifications::AccountNotificationConstraints; -pub use accounts::AccountConstraints; -pub use document_annotations::DocumentAnnotationConstraints; -pub use document_chunks::DocumentChunkConstraints; -pub use document_comments::DocumentCommentConstraints; -pub use document_files::DocumentFileConstraints; -pub use document_versions::DocumentVersionConstraints; -pub use documents::DocumentConstraints; use serde::{Deserialize, Serialize}; -pub use workspace_activities::WorkspaceActivitiesConstraints; -pub use workspace_integration_runs::WorkspaceIntegrationRunConstraints; -pub use workspace_integrations::WorkspaceIntegrationConstraints; -pub use workspace_invites::WorkspaceInviteConstraints; -pub use workspace_members::WorkspaceMemberConstraints; -pub use workspace_webhooks::WorkspaceWebhookConstraints; -pub use workspaces::WorkspaceConstraints; + +pub use self::account_action_tokens::AccountActionTokenConstraints; +pub use self::account_api_tokens::AccountApiTokenConstraints; +pub use self::account_notifications::AccountNotificationConstraints; +pub use self::accounts::AccountConstraints; +pub use self::document_annotations::DocumentAnnotationConstraints; +pub use self::document_chunks::DocumentChunkConstraints; +pub use self::document_comments::DocumentCommentConstraints; +pub use self::document_files::DocumentFileConstraints; +pub use self::document_versions::DocumentVersionConstraints; +pub use self::documents::DocumentConstraints; +pub use self::studio_operations::StudioOperationConstraints; +pub use self::studio_sessions::StudioSessionConstraints; +pub use self::studio_tool_calls::StudioToolCallConstraints; +pub use self::workspace_activities::WorkspaceActivitiesConstraints; +pub use self::workspace_integration_runs::WorkspaceIntegrationRunConstraints; +pub use self::workspace_integrations::WorkspaceIntegrationConstraints; +pub use self::workspace_invites::WorkspaceInviteConstraints; +pub use self::workspace_members::WorkspaceMemberConstraints; +pub use self::workspace_webhooks::WorkspaceWebhookConstraints; +pub use self::workspaces::WorkspaceConstraints; /// Unified constraint violation enum that can represent any database constraint. /// @@ -80,6 +86,11 @@ pub enum ConstraintViolation { DocumentComment(DocumentCommentConstraints), DocumentFile(DocumentFileConstraints), DocumentVersion(DocumentVersionConstraints), + + // Studio-related constraints + StudioSession(StudioSessionConstraints), + StudioToolCall(StudioToolCallConstraints), + StudioOperation(StudioOperationConstraints), } /// Categories of database constraint violations. @@ -156,6 +167,11 @@ impl ConstraintViolation { DocumentFileConstraints::new => DocumentFile, DocumentVersionConstraints::new => DocumentVersion, }, + "studio" => try_parse! { + StudioSessionConstraints::new => StudioSession, + StudioToolCallConstraints::new => StudioToolCall, + StudioOperationConstraints::new => StudioOperation, + }, _ => None, } } @@ -187,6 +203,11 @@ impl ConstraintViolation { ConstraintViolation::DocumentComment(_) => "document_comments", ConstraintViolation::DocumentFile(_) => "document_files", ConstraintViolation::DocumentVersion(_) => "document_versions", + + // Studio-related tables + ConstraintViolation::StudioSession(_) => "studio_sessions", + ConstraintViolation::StudioToolCall(_) => "studio_tool_calls", + ConstraintViolation::StudioOperation(_) => "studio_operations", } } @@ -215,6 +236,10 @@ impl ConstraintViolation { | ConstraintViolation::DocumentComment(_) | ConstraintViolation::DocumentFile(_) | ConstraintViolation::DocumentVersion(_) => "documents", + + ConstraintViolation::StudioSession(_) + | ConstraintViolation::StudioToolCall(_) + | ConstraintViolation::StudioOperation(_) => "studio", } } @@ -242,6 +267,10 @@ impl ConstraintViolation { ConstraintViolation::DocumentComment(c) => c.categorize(), ConstraintViolation::DocumentFile(c) => c.categorize(), ConstraintViolation::DocumentVersion(c) => c.categorize(), + + ConstraintViolation::StudioSession(c) => c.categorize(), + ConstraintViolation::StudioToolCall(c) => c.categorize(), + ConstraintViolation::StudioOperation(c) => c.categorize(), } } @@ -274,6 +303,10 @@ impl fmt::Display for ConstraintViolation { ConstraintViolation::DocumentComment(c) => write!(f, "{}", c), ConstraintViolation::DocumentFile(c) => write!(f, "{}", c), ConstraintViolation::DocumentVersion(c) => write!(f, "{}", c), + + ConstraintViolation::StudioSession(c) => write!(f, "{}", c), + ConstraintViolation::StudioToolCall(c) => write!(f, "{}", c), + ConstraintViolation::StudioOperation(c) => write!(f, "{}", c), } } } diff --git a/crates/nvisy-postgres/src/types/constraint/studio_operations.rs b/crates/nvisy-postgres/src/types/constraint/studio_operations.rs new file mode 100644 index 0000000..041fd1a --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/studio_operations.rs @@ -0,0 +1,61 @@ +//! Studio operations table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Studio operations table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum StudioOperationConstraints { + // Operation validation constraints + #[strum(serialize = "studio_operations_operation_type_length")] + OperationTypeLength, + #[strum(serialize = "studio_operations_operation_diff_size")] + OperationDiffSize, + + // Operation business logic constraints + #[strum(serialize = "studio_operations_revert_requires_applied")] + RevertRequiresApplied, + + // Operation chronological constraints + #[strum(serialize = "studio_operations_applied_after_created")] + AppliedAfterCreated, +} + +impl StudioOperationConstraints { + /// Creates a new [`StudioOperationConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + StudioOperationConstraints::OperationTypeLength + | StudioOperationConstraints::OperationDiffSize => ConstraintCategory::Validation, + + StudioOperationConstraints::RevertRequiresApplied => ConstraintCategory::BusinessLogic, + + StudioOperationConstraints::AppliedAfterCreated => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: StudioOperationConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for StudioOperationConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/studio_sessions.rs b/crates/nvisy-postgres/src/types/constraint/studio_sessions.rs new file mode 100644 index 0000000..a3f5802 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/studio_sessions.rs @@ -0,0 +1,61 @@ +//! Studio sessions table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Studio sessions table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum StudioSessionConstraints { + // Session validation constraints + #[strum(serialize = "studio_sessions_display_name_length")] + DisplayNameLength, + #[strum(serialize = "studio_sessions_model_config_size")] + ModelConfigSize, + #[strum(serialize = "studio_sessions_message_count_min")] + MessageCountMin, + #[strum(serialize = "studio_sessions_token_count_min")] + TokenCountMin, + + // Session chronological constraints + #[strum(serialize = "studio_sessions_updated_after_created")] + UpdatedAfterCreated, +} + +impl StudioSessionConstraints { + /// Creates a new [`StudioSessionConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + StudioSessionConstraints::DisplayNameLength + | StudioSessionConstraints::ModelConfigSize + | StudioSessionConstraints::MessageCountMin + | StudioSessionConstraints::TokenCountMin => ConstraintCategory::Validation, + + StudioSessionConstraints::UpdatedAfterCreated => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: StudioSessionConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for StudioSessionConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/studio_tool_calls.rs b/crates/nvisy-postgres/src/types/constraint/studio_tool_calls.rs new file mode 100644 index 0000000..7a4dfe4 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/studio_tool_calls.rs @@ -0,0 +1,58 @@ +//! Studio tool calls table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Studio tool calls table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum StudioToolCallConstraints { + // Tool call validation constraints + #[strum(serialize = "studio_tool_calls_tool_name_length")] + ToolNameLength, + #[strum(serialize = "studio_tool_calls_tool_input_size")] + ToolInputSize, + #[strum(serialize = "studio_tool_calls_tool_output_size")] + ToolOutputSize, + + // Tool call chronological constraints + #[strum(serialize = "studio_tool_calls_completed_after_started")] + CompletedAfterStarted, +} + +impl StudioToolCallConstraints { + /// Creates a new [`StudioToolCallConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + StudioToolCallConstraints::ToolNameLength + | StudioToolCallConstraints::ToolInputSize + | StudioToolCallConstraints::ToolOutputSize => ConstraintCategory::Validation, + + StudioToolCallConstraints::CompletedAfterStarted => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: StudioToolCallConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for StudioToolCallConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/enums/mod.rs b/crates/nvisy-postgres/src/types/enums/mod.rs index 2308fd9..32c76ba 100644 --- a/crates/nvisy-postgres/src/types/enums/mod.rs +++ b/crates/nvisy-postgres/src/types/enums/mod.rs @@ -26,6 +26,10 @@ pub mod content_segmentation; pub mod processing_status; pub mod require_mode; +// Studio-related enumerations +pub mod studio_session_status; +pub mod studio_tool_status; + pub use action_token_type::ActionTokenType; pub use activity_type::{ActivityCategory, ActivityType}; pub use annotation_type::AnnotationType; @@ -38,6 +42,8 @@ pub use notification_event::NotificationEvent; pub use processing_status::ProcessingStatus; pub use require_mode::RequireMode; pub use run_type::RunType; +pub use studio_session_status::StudioSessionStatus; +pub use studio_tool_status::StudioToolStatus; pub use webhook_event::WebhookEvent; pub use webhook_status::WebhookStatus; pub use webhook_type::WebhookType; diff --git a/crates/nvisy-postgres/src/types/enums/studio_session_status.rs b/crates/nvisy-postgres/src/types/enums/studio_session_status.rs new file mode 100644 index 0000000..1522a0d --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/studio_session_status.rs @@ -0,0 +1,92 @@ +//! Studio session status enumeration for LLM-assisted editing sessions. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines the lifecycle status of a studio editing session. +/// +/// This enumeration corresponds to the `STUDIO_SESSION_STATUS` PostgreSQL enum and is used +/// to track the state of LLM-assisted document editing sessions as they progress through +/// their lifecycle from active use to archival. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::StudioSessionStatus"] +pub enum StudioSessionStatus { + /// Session is currently active and in use + #[db_rename = "active"] + #[serde(rename = "active")] + #[default] + Active, + + /// Session is temporarily paused but can be resumed + #[db_rename = "paused"] + #[serde(rename = "paused")] + Paused, + + /// Session has been archived and is no longer active + #[db_rename = "archived"] + #[serde(rename = "archived")] + Archived, +} + +impl StudioSessionStatus { + /// Returns whether the session is currently active. + #[inline] + pub fn is_active(self) -> bool { + matches!(self, StudioSessionStatus::Active) + } + + /// Returns whether the session is paused. + #[inline] + pub fn is_paused(self) -> bool { + matches!(self, StudioSessionStatus::Paused) + } + + /// Returns whether the session is archived. + #[inline] + pub fn is_archived(self) -> bool { + matches!(self, StudioSessionStatus::Archived) + } + + /// Returns whether the session can accept new messages or tool calls. + #[inline] + pub fn can_accept_input(self) -> bool { + matches!(self, StudioSessionStatus::Active) + } + + /// Returns whether the session can be resumed. + #[inline] + pub fn can_resume(self) -> bool { + matches!(self, StudioSessionStatus::Paused) + } + + /// Returns whether the session can be paused. + #[inline] + pub fn can_pause(self) -> bool { + matches!(self, StudioSessionStatus::Active) + } + + /// Returns whether the session can be archived. + #[inline] + pub fn can_archive(self) -> bool { + matches!( + self, + StudioSessionStatus::Active | StudioSessionStatus::Paused + ) + } + + /// Returns whether the session is in a final state. + #[inline] + pub fn is_final(self) -> bool { + matches!(self, StudioSessionStatus::Archived) + } + + /// Returns session statuses that are considered active (not archived). + pub fn active_statuses() -> &'static [StudioSessionStatus] { + &[StudioSessionStatus::Active, StudioSessionStatus::Paused] + } +} diff --git a/crates/nvisy-postgres/src/types/enums/studio_tool_status.rs b/crates/nvisy-postgres/src/types/enums/studio_tool_status.rs new file mode 100644 index 0000000..9ee5ed7 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/studio_tool_status.rs @@ -0,0 +1,102 @@ +//! Studio tool status enumeration for tool execution tracking. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines the execution status of a studio tool call. +/// +/// This enumeration corresponds to the `STUDIO_TOOL_STATUS` PostgreSQL enum and is used +/// to track the state of tool invocations within studio sessions as they progress +/// from pending through execution to completion or cancellation. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::StudioToolStatus"] +pub enum StudioToolStatus { + /// Tool call is queued and waiting to be executed + #[db_rename = "pending"] + #[serde(rename = "pending")] + #[default] + Pending, + + /// Tool is currently being executed + #[db_rename = "running"] + #[serde(rename = "running")] + Running, + + /// Tool execution completed successfully + #[db_rename = "completed"] + #[serde(rename = "completed")] + Completed, + + /// Tool execution was cancelled + #[db_rename = "cancelled"] + #[serde(rename = "cancelled")] + Cancelled, +} + +impl StudioToolStatus { + /// Returns whether the tool call is pending execution. + #[inline] + pub fn is_pending(self) -> bool { + matches!(self, StudioToolStatus::Pending) + } + + /// Returns whether the tool is currently running. + #[inline] + pub fn is_running(self) -> bool { + matches!(self, StudioToolStatus::Running) + } + + /// Returns whether the tool execution completed successfully. + #[inline] + pub fn is_completed(self) -> bool { + matches!(self, StudioToolStatus::Completed) + } + + /// Returns whether the tool execution was cancelled. + #[inline] + pub fn is_cancelled(self) -> bool { + matches!(self, StudioToolStatus::Cancelled) + } + + /// Returns whether the tool is in a final state. + #[inline] + pub fn is_final(self) -> bool { + matches!( + self, + StudioToolStatus::Completed | StudioToolStatus::Cancelled + ) + } + + /// Returns whether the tool can be started. + #[inline] + pub fn can_start(self) -> bool { + matches!(self, StudioToolStatus::Pending) + } + + /// Returns whether the tool can be cancelled. + #[inline] + pub fn can_cancel(self) -> bool { + matches!(self, StudioToolStatus::Pending | StudioToolStatus::Running) + } + + /// Returns whether the tool execution is active (not final). + #[inline] + pub fn is_active(self) -> bool { + matches!(self, StudioToolStatus::Pending | StudioToolStatus::Running) + } + + /// Returns tool statuses that are considered active (not final). + pub fn active_statuses() -> &'static [StudioToolStatus] { + &[StudioToolStatus::Pending, StudioToolStatus::Running] + } + + /// Returns tool statuses that represent final states. + pub fn final_statuses() -> &'static [StudioToolStatus] { + &[StudioToolStatus::Completed, StudioToolStatus::Cancelled] + } +} diff --git a/crates/nvisy-postgres/src/types/mod.rs b/crates/nvisy-postgres/src/types/mod.rs index 77d614f..6841ff5 100644 --- a/crates/nvisy-postgres/src/types/mod.rs +++ b/crates/nvisy-postgres/src/types/mod.rs @@ -13,6 +13,7 @@ pub use constraint::{ AccountNotificationConstraints, ConstraintCategory, ConstraintViolation, DocumentAnnotationConstraints, DocumentChunkConstraints, DocumentCommentConstraints, DocumentConstraints, DocumentFileConstraints, DocumentVersionConstraints, + StudioOperationConstraints, StudioSessionConstraints, StudioToolCallConstraints, WorkspaceActivitiesConstraints, WorkspaceConstraints, WorkspaceIntegrationConstraints, WorkspaceIntegrationRunConstraints, WorkspaceInviteConstraints, WorkspaceMemberConstraints, WorkspaceWebhookConstraints, @@ -20,8 +21,8 @@ pub use constraint::{ pub use enums::{ ActionTokenType, ActivityCategory, ActivityType, AnnotationType, ApiTokenType, ContentSegmentation, IntegrationStatus, IntegrationType, InviteStatus, NotificationEvent, - ProcessingStatus, RequireMode, RunType, WebhookEvent, WebhookStatus, WebhookType, - WorkspaceRole, + ProcessingStatus, RequireMode, RunType, StudioSessionStatus, StudioToolStatus, WebhookEvent, + WebhookStatus, WebhookType, WorkspaceRole, }; pub use filtering::{FileFilter, FileFormat, InviteFilter, MemberFilter}; pub use pagination::{Cursor, CursorPage, CursorPagination, OffsetPage, OffsetPagination}; diff --git a/crates/nvisy-server/src/handler/accounts.rs b/crates/nvisy-server/src/handler/accounts.rs index 4eb8003..9ddde9c 100644 --- a/crates/nvisy-server/src/handler/accounts.rs +++ b/crates/nvisy-server/src/handler/accounts.rs @@ -11,12 +11,14 @@ use axum::extract::State; use axum::http::StatusCode; use nvisy_postgres::PgClient; use nvisy_postgres::model::Account as AccountModel; -use nvisy_postgres::query::{AccountNotificationRepository, AccountRepository}; +use nvisy_postgres::query::{ + AccountNotificationRepository, AccountRepository, WorkspaceMemberRepository, +}; use uuid::Uuid; -use super::request::{CursorPagination, UpdateAccount}; +use super::request::{AccountPathParams, CursorPagination, UpdateAccount}; use super::response::{Account, ErrorResponse, Notification, NotificationsPage, UnreadStatus}; -use crate::extract::{AuthState, Json, Query, ValidateJson}; +use crate::extract::{AuthState, Json, Path, Query, ValidateJson}; use crate::handler::{ErrorKind, Result}; use crate::service::{PasswordHasher, PasswordStrength, ServiceState}; @@ -43,12 +45,65 @@ async fn get_own_account( } fn get_own_account_docs(op: TransformOperation) -> TransformOperation { - op.summary("Get account") + op.summary("Get own account") .description("Returns the authenticated user's account details.") .response::<200, Json>() .response::<401, Json>() } +/// Retrieves an account by ID. +/// +/// The requester must share at least one workspace with the target account. +#[tracing::instrument( + skip_all, + fields( + requester_id = %auth_claims.account_id, + target_id = %path_params.account_id, + ) +)] +async fn get_account( + State(pg_client): State, + AuthState(auth_claims): AuthState, + Path(path_params): Path, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Reading account by ID"); + + let mut conn = pg_client.get_connection().await?; + + // Check if requester shares a workspace with target account + let shares_workspace = conn + .accounts_share_workspace(auth_claims.account_id, path_params.account_id) + .await?; + + if !shares_workspace { + tracing::warn!( + target: TRACING_TARGET, + "Access denied: accounts do not share a workspace" + ); + return Err(ErrorKind::Forbidden + .with_message("You do not have access to this account") + .with_resource("account")); + } + + let account = find_account(&mut conn, path_params.account_id).await?; + + tracing::info!(target: TRACING_TARGET, "Account read by ID"); + + Ok((StatusCode::OK, Json(Account::from_model(account)))) +} + +fn get_account_docs(op: TransformOperation) -> TransformOperation { + op.summary("Get account by ID") + .description( + "Returns an account's details by ID. \ + The requester must share at least one workspace with the target account.", + ) + .response::<200, Json>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + /// Updates the authenticated account. #[tracing::instrument( skip_all, @@ -262,6 +317,10 @@ pub fn routes(_state: ServiceState) -> ApiRouter { .patch_with(update_own_account, update_own_account_docs) .delete_with(delete_own_account, delete_own_account_docs), ) + .api_route( + "/accounts/{accountId}/", + get_with(get_account, get_account_docs), + ) .api_route( "/notifications/", get_with(list_notifications, list_notifications_docs), diff --git a/crates/nvisy-server/src/handler/error/mod.rs b/crates/nvisy-server/src/handler/error/mod.rs index 196759c..591b0f6 100644 --- a/crates/nvisy-server/src/handler/error/mod.rs +++ b/crates/nvisy-server/src/handler/error/mod.rs @@ -5,6 +5,7 @@ mod nats_error; mod pg_account; mod pg_document; mod pg_error; +mod pg_studio; mod pg_workspace; mod service_error; diff --git a/crates/nvisy-server/src/handler/error/pg_error.rs b/crates/nvisy-server/src/handler/error/pg_error.rs index b3a92e6..a5ee229 100644 --- a/crates/nvisy-server/src/handler/error/pg_error.rs +++ b/crates/nvisy-server/src/handler/error/pg_error.rs @@ -34,6 +34,9 @@ impl From for Error<'static> { ConstraintViolation::DocumentFile(c) => c.into(), ConstraintViolation::DocumentVersion(c) => c.into(), ConstraintViolation::WorkspaceWebhook(c) => c.into(), + ConstraintViolation::StudioSession(c) => c.into(), + ConstraintViolation::StudioToolCall(c) => c.into(), + ConstraintViolation::StudioOperation(c) => c.into(), } } } diff --git a/crates/nvisy-server/src/handler/error/pg_studio.rs b/crates/nvisy-server/src/handler/error/pg_studio.rs new file mode 100644 index 0000000..f43f75b --- /dev/null +++ b/crates/nvisy-server/src/handler/error/pg_studio.rs @@ -0,0 +1,67 @@ +//! Studio-related constraint violation error handlers. + +use nvisy_postgres::types::{ + StudioOperationConstraints, StudioSessionConstraints, StudioToolCallConstraints, +}; + +use crate::handler::{Error, ErrorKind}; + +impl From for Error<'static> { + fn from(c: StudioSessionConstraints) -> Self { + let error = match c { + StudioSessionConstraints::DisplayNameLength => ErrorKind::BadRequest + .with_message("Session name must be between 1 and 255 characters long"), + StudioSessionConstraints::ModelConfigSize => { + ErrorKind::BadRequest.with_message("Model configuration size is invalid") + } + StudioSessionConstraints::MessageCountMin => { + ErrorKind::InternalServerError.into_error() + } + StudioSessionConstraints::TokenCountMin => ErrorKind::InternalServerError.into_error(), + StudioSessionConstraints::UpdatedAfterCreated => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("studio_session") + } +} + +impl From for Error<'static> { + fn from(c: StudioToolCallConstraints) -> Self { + let error = match c { + StudioToolCallConstraints::ToolNameLength => ErrorKind::BadRequest + .with_message("Tool name must be between 1 and 128 characters long"), + StudioToolCallConstraints::ToolInputSize => { + ErrorKind::BadRequest.with_message("Tool input size exceeds maximum allowed") + } + StudioToolCallConstraints::ToolOutputSize => { + ErrorKind::BadRequest.with_message("Tool output size exceeds maximum allowed") + } + StudioToolCallConstraints::CompletedAfterStarted => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("studio_tool_call") + } +} + +impl From for Error<'static> { + fn from(c: StudioOperationConstraints) -> Self { + let error = match c { + StudioOperationConstraints::OperationTypeLength => ErrorKind::BadRequest + .with_message("Operation type must be between 1 and 64 characters long"), + StudioOperationConstraints::OperationDiffSize => { + ErrorKind::BadRequest.with_message("Operation diff size exceeds maximum allowed") + } + StudioOperationConstraints::RevertRequiresApplied => ErrorKind::BadRequest + .with_message("Cannot revert an operation that has not been applied"), + StudioOperationConstraints::AppliedAfterCreated => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("studio_operation") + } +} diff --git a/crates/nvisy-server/src/handler/request/paths.rs b/crates/nvisy-server/src/handler/request/paths.rs index 7295901..7a60af2 100644 --- a/crates/nvisy-server/src/handler/request/paths.rs +++ b/crates/nvisy-server/src/handler/request/paths.rs @@ -157,3 +157,15 @@ pub struct TokenPathParams { /// Unique identifier of the API token. pub token_id: Uuid, } + +/// Path parameters for account operations. +/// +/// Used when retrieving account information by ID. Access is granted +/// if the requester shares at least one workspace with the target account. +#[must_use] +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct AccountPathParams { + /// Unique identifier of the account. + pub account_id: Uuid, +} From 07e0669b7b400322b50e27685890dd6a4d4cf136 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Fri, 16 Jan 2026 04:42:05 +0100 Subject: [PATCH 02/28] feat(postgres, server): add file versioning, chat handler, docs, remove websocket - Add version_number column to document_files for file versioning - Add version query methods: list_file_versions, find_latest_version, get_next_version_number - Rename studio to chat throughout codebase (migrations, models, handlers) - Add chat handler with SSE streaming support - Add FILE_SOURCE enum (uploaded, imported, generated) - Update REQUIRE_MODE to content types (document, image, spreadsheet, etc.) - Remove WebSocket handler and workspace event streams - Remove ws features from axum and aide - Add EMBEDDING_DIMENSIONS constant (1536) - Upgrade rig-core from 0.12 to 0.28 - Add platform vision documentation in /docs - Clean up unused constants --- .gitignore | 2 +- Cargo.lock | 153 ++-- Cargo.toml | 3 +- README.md | 2 +- crates/nvisy-nats/src/client/nats_client.rs | 35 +- crates/nvisy-nats/src/stream/mod.rs | 18 - .../nvisy-nats/src/stream/workspace_event.rs | 657 -------------- .../src/stream/workspace_event_pub.rs | 55 -- .../src/stream/workspace_event_sub.rs | 53 -- .../src/model/account_api_token.rs | 6 +- .../src/model/account_notification.rs | 6 +- ...{studio_operation.rs => chat_operation.rs} | 32 +- .../{studio_session.rs => chat_session.rs} | 44 +- ...{studio_tool_call.rs => chat_tool_call.rs} | 46 +- .../src/model/document_comment.rs | 4 +- .../nvisy-postgres/src/model/document_file.rs | 31 +- crates/nvisy-postgres/src/model/mod.rs | 14 +- .../src/model/workspace_invite.rs | 4 +- ...{studio_operation.rs => chat_operation.rs} | 188 ++-- .../nvisy-postgres/src/query/chat_session.rs | 291 +++++++ ...{studio_tool_call.rs => chat_tool_call.rs} | 174 ++-- .../nvisy-postgres/src/query/document_file.rs | 74 ++ crates/nvisy-postgres/src/query/mod.rs | 12 +- .../src/query/studio_session.rs | 297 ------- crates/nvisy-postgres/src/schema.rs | 53 +- crates/nvisy-postgres/src/types/constants.rs | 150 +--- .../src/types/constraint/chat_operations.rs | 61 ++ .../src/types/constraint/chat_sessions.rs | 61 ++ .../src/types/constraint/chat_tool_calls.rs | 58 ++ .../src/types/constraint/mod.rs | 56 +- .../src/types/constraint/studio_operations.rs | 61 -- .../src/types/constraint/studio_sessions.rs | 61 -- .../src/types/constraint/studio_tool_calls.rs | 58 -- ...ssion_status.rs => chat_session_status.rs} | 35 +- ...dio_tool_status.rs => chat_tool_status.rs} | 41 +- .../src/types/enums/file_source.rs | 66 ++ crates/nvisy-postgres/src/types/enums/mod.rs | 12 +- .../src/types/enums/require_mode.rs | 163 ++-- crates/nvisy-postgres/src/types/mod.rs | 17 +- crates/nvisy-rig/src/provider/embedding.rs | 61 +- crates/nvisy-rig/src/rag/indexer/mod.rs | 2 +- crates/nvisy-rig/src/rag/searcher/mod.rs | 2 +- crates/nvisy-server/Cargo.toml | 5 +- crates/nvisy-server/src/handler/chat.rs | 420 +++++++++ crates/nvisy-server/src/handler/error/mod.rs | 2 +- .../nvisy-server/src/handler/error/pg_chat.rs | 65 ++ .../src/handler/error/pg_error.rs | 6 +- .../src/handler/error/pg_studio.rs | 67 -- crates/nvisy-server/src/handler/mod.rs | 6 +- .../nvisy-server/src/handler/request/chat.rs | 74 ++ .../nvisy-server/src/handler/request/mod.rs | 2 + .../nvisy-server/src/handler/request/paths.rs | 12 + .../nvisy-server/src/handler/response/chat.rs | 94 ++ .../src/handler/response/files.rs | 8 +- .../nvisy-server/src/handler/response/mod.rs | 2 + crates/nvisy-server/src/handler/websocket.rs | 820 ------------------ .../src/middleware/specification.rs | 4 +- docs/ARCHITECTURE.md | 84 ++ docs/INTELLIGENCE.md | 67 ++ docs/README.md | 13 + docs/VISION.md | 50 ++ .../2025-05-27-011852_documents/down.sql | 1 + migrations/2025-05-27-011852_documents/up.sql | 24 +- migrations/2026-01-09-002114_chat/down.sql | 10 + migrations/2026-01-09-002114_chat/up.sql | 202 +++++ migrations/2026-01-09-002114_studio/down.sql | 10 - migrations/2026-01-09-002114_studio/up.sql | 202 ----- 67 files changed, 2402 insertions(+), 3067 deletions(-) delete mode 100644 crates/nvisy-nats/src/stream/workspace_event.rs delete mode 100644 crates/nvisy-nats/src/stream/workspace_event_pub.rs delete mode 100644 crates/nvisy-nats/src/stream/workspace_event_sub.rs rename crates/nvisy-postgres/src/model/{studio_operation.rs => chat_operation.rs} (86%) rename crates/nvisy-postgres/src/model/{studio_session.rs => chat_session.rs} (80%) rename crates/nvisy-postgres/src/model/{studio_tool_call.rs => chat_tool_call.rs} (79%) rename crates/nvisy-postgres/src/query/{studio_operation.rs => chat_operation.rs} (58%) create mode 100644 crates/nvisy-postgres/src/query/chat_session.rs rename crates/nvisy-postgres/src/query/{studio_tool_call.rs => chat_tool_call.rs} (54%) delete mode 100644 crates/nvisy-postgres/src/query/studio_session.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/chat_operations.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/chat_sessions.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/chat_tool_calls.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/studio_operations.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/studio_sessions.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/studio_tool_calls.rs rename crates/nvisy-postgres/src/types/enums/{studio_session_status.rs => chat_session_status.rs} (66%) rename crates/nvisy-postgres/src/types/enums/{studio_tool_status.rs => chat_tool_status.rs} (62%) create mode 100644 crates/nvisy-postgres/src/types/enums/file_source.rs create mode 100644 crates/nvisy-server/src/handler/chat.rs create mode 100644 crates/nvisy-server/src/handler/error/pg_chat.rs delete mode 100644 crates/nvisy-server/src/handler/error/pg_studio.rs create mode 100644 crates/nvisy-server/src/handler/request/chat.rs create mode 100644 crates/nvisy-server/src/handler/response/chat.rs delete mode 100644 crates/nvisy-server/src/handler/websocket.rs create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/INTELLIGENCE.md create mode 100644 docs/README.md create mode 100644 docs/VISION.md create mode 100644 migrations/2026-01-09-002114_chat/down.sql create mode 100644 migrations/2026-01-09-002114_chat/up.sql delete mode 100644 migrations/2026-01-09-002114_studio/down.sql delete mode 100644 migrations/2026-01-09-002114_studio/up.sql diff --git a/.gitignore b/.gitignore index c72f062..f8a71f2 100644 --- a/.gitignore +++ b/.gitignore @@ -50,4 +50,4 @@ temp/ .ignore*/ LLM.md .claude -pgtrgm/ +CLAUDE.md diff --git a/Cargo.lock b/Cargo.lock index f0792c1..2e82068 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -145,7 +145,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -156,7 +156,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -198,6 +198,12 @@ dependencies = [ "password-hash", ] +[[package]] +name = "as-any" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" + [[package]] name = "async-compression" version = "0.4.36" @@ -336,7 +342,6 @@ checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ "axum-core", "axum-macros", - "base64", "bytes", "form_urlencoded", "futures-util", @@ -356,10 +361,8 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sha1", "sync_wrapper", "tokio", - "tokio-tungstenite", "tower", "tower-layer", "tower-service", @@ -1357,7 +1360,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "eventsource-stream" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" +dependencies = [ + "futures-core", + "nom", + "pin-project-lite", ] [[package]] @@ -1549,6 +1563,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.31" @@ -2116,7 +2136,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -2347,6 +2367,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -2400,13 +2426,23 @@ dependencies = [ "signatory", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2654,6 +2690,7 @@ dependencies = [ "tempfile", "thiserror 2.0.17", "tokio", + "tokio-stream", "tokio-util", "tower", "tower-http", @@ -2718,9 +2755,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "ordered-float" -version = "4.6.0" +version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" dependencies = [ "num-traits", ] @@ -3289,23 +3326,33 @@ dependencies = [ [[package]] name = "rig-core" -version = "0.12.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed332151c57f658a89fc58cbd274aa6743ae2ad3026a17630ec2bfc77eff96ac" +checksum = "5b1a48121c1ecd6f6ce59d64ec353c791aac6fc07bf4aa353380e8185659e6eb" dependencies = [ + "as-any", "async-stream", "base64", "bytes", + "eventsource-stream", + "fastrand", "futures", + "futures-timer", "glob", + "http", + "mime", "mime_guess", "ordered-float", + "pin-project-lite", "reqwest", - "schemars 0.8.22", + "schemars 1.2.0", "serde", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.17", + "tokio", "tracing", + "tracing-futures", + "url", ] [[package]] @@ -3368,7 +3415,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3463,37 +3510,38 @@ dependencies = [ [[package]] name = "schemars" -version = "0.8.22" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" dependencies = [ "dyn-clone", - "schemars_derive 0.8.22", + "indexmap", + "jiff", + "ref-cast", + "schemars_derive 0.9.0", "serde", "serde_json", + "uuid", ] [[package]] name = "schemars" -version = "0.9.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2" dependencies = [ "dyn-clone", - "indexmap", - "jiff", "ref-cast", - "schemars_derive 0.9.0", + "schemars_derive 1.2.0", "serde", "serde_json", - "uuid", ] [[package]] name = "schemars_derive" -version = "0.8.22" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +checksum = "5016d94c77c6d32f0b8e08b781f7dc8a90c2007d4e77472cc2807bc10a8438fe" dependencies = [ "proc-macro2", "quote", @@ -3503,9 +3551,9 @@ dependencies = [ [[package]] name = "schemars_derive" -version = "0.9.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5016d94c77c6d32f0b8e08b781f7dc8a90c2007d4e77472cc2807bc10a8438fe" +checksum = "4908ad288c5035a8eb12cfdf0d49270def0a268ee162b75eeee0f85d155a7c45" dependencies = [ "proc-macro2", "quote", @@ -3942,7 +3990,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -4142,18 +4190,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-tungstenite" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857" -dependencies = [ - "futures-util", - "log", - "tokio", - "tungstenite", -] - [[package]] name = "tokio-util" version = "0.7.18" @@ -4315,6 +4351,18 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-futures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +dependencies = [ + "futures", + "futures-task", + "pin-project", + "tracing", +] + [[package]] name = "tracing-log" version = "0.2.0" @@ -4373,23 +4421,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tungstenite" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" -dependencies = [ - "bytes", - "data-encoding", - "http", - "httparse", - "log", - "rand 0.9.2", - "sha1", - "thiserror 2.0.17", - "utf-8", -] - [[package]] name = "typeid" version = "1.0.3" @@ -4496,12 +4527,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "utf-8" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" - [[package]] name = "utf8_iter" version = "1.0.4" diff --git a/Cargo.toml b/Cargo.toml index 63aa3ec..bbd309b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ clap = { version = "4.5", features = [] } # Async runtime tokio = { version = "1.49", features = [] } tokio-util = { version = "0.7", features = ["io"] } +tokio-stream = { version = "0.1", features = [] } futures = { version = "0.3", features = [] } futures-util = { version = "0.3", features = [] } async-stream = { version = "0.3", features = [] } @@ -123,7 +124,7 @@ text-splitter = { version = "0.29", features = [] } woothee = { version = "0.13", features = [] } # AI/ML frameworks -rig-core = { version = "0.12", default-features = false, features = ["reqwest-rustls"] } +rig-core = { version = "0.28", default-features = false, features = ["reqwest-rustls"] } # Archive/Compression tar = { version = "0.4", features = [] } diff --git a/README.md b/README.md index 28ad0f9..d3b48f7 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ High-performance backend server for the Nvisy document processing platform. - **High-Performance** - Async HTTP server with Axum and Tokio - **LLM Annotations** - AI-driven document edits via structured annotations - **RAG Pipeline** - Build knowledge bases with document embeddings and semantic search -- **Real-Time Updates** - Live collaboration via NATS pub/sub and WebSocket +- **Real-Time Updates** - AI streaming via SSE and job processing via NATS - **Interactive Docs** - Auto-generated OpenAPI with Scalar UI ## Architecture diff --git a/crates/nvisy-nats/src/client/nats_client.rs b/crates/nvisy-nats/src/client/nats_client.rs index 5454b35..0493f1c 100644 --- a/crates/nvisy-nats/src/client/nats_client.rs +++ b/crates/nvisy-nats/src/client/nats_client.rs @@ -39,10 +39,7 @@ use tokio::time::timeout; use super::nats_config::NatsConfig; use crate::kv::{ApiTokenStore, CacheStore, ChatHistoryStore}; use crate::object::{DocumentBucket, DocumentStore}; -use crate::stream::{ - DocumentJobPublisher, DocumentJobSubscriber, Stage, WorkspaceEventPublisher, - WorkspaceEventSubscriber, -}; +use crate::stream::{DocumentJobPublisher, DocumentJobSubscriber, Stage}; use crate::{Error, Result, TRACING_TARGET_CLIENT, TRACING_TARGET_CONNECTION}; /// NATS client wrapper with connection management. @@ -188,36 +185,6 @@ impl NatsClient { DocumentJobSubscriber::new(&self.inner.jetstream, consumer_name).await } - /// Create a workspace event publisher. - #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn workspace_event_publisher(&self) -> Result { - WorkspaceEventPublisher::new(&self.inner.jetstream).await - } - - /// Create a workspace event subscriber. - #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn workspace_event_subscriber( - &self, - consumer_name: &str, - ) -> Result { - WorkspaceEventSubscriber::new(&self.inner.jetstream, consumer_name).await - } - - /// Create a workspace event subscriber filtered to a specific workspace. - #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn workspace_event_subscriber_for_workspace( - &self, - consumer_name: &str, - workspace_id: uuid::Uuid, - ) -> Result { - WorkspaceEventSubscriber::new_for_workspace( - &self.inner.jetstream, - consumer_name, - workspace_id, - ) - .await - } - /// Get or create a CacheStore for a specific namespace #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] pub async fn cache_store( diff --git a/crates/nvisy-nats/src/stream/mod.rs b/crates/nvisy-nats/src/stream/mod.rs index 5115c07..603c2a7 100644 --- a/crates/nvisy-nats/src/stream/mod.rs +++ b/crates/nvisy-nats/src/stream/mod.rs @@ -3,7 +3,6 @@ //! This module provides type-safe streaming capabilities for: //! //! - Document processing jobs -//! - Workspace event jobs // Base types mod event; @@ -16,11 +15,6 @@ mod document_job_pub; mod document_job_sub; mod document_task; -// Workspace event -mod workspace_event; -mod workspace_event_pub; -mod workspace_event_sub; - pub use document_job::{ CompressionLevel, DocumentJob, PostprocessingData, PreprocessingData, ProcessingData, ProcessingQuality, STREAM_NAME as DOCUMENT_JOB_STREAM, Stage, @@ -31,15 +25,3 @@ pub use document_task::{GenerateInfoType, InsertValue, MergeOrder, PredefinedTas pub use event::EventPriority; pub use publisher::StreamPublisher; pub use subscriber::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; -pub use workspace_event::{ - DocumentCreatedEvent, DocumentDeletedEvent, DocumentUpdateEvent, ErrorEvent, - FilePostprocessedEvent, FilePreprocessedEvent, FileTransformedEvent, JobCompletedEvent, - JobFailedEvent, JobProgressEvent, JobStage, JoinEvent, LeaveEvent, MemberAddedEvent, - MemberPresenceEvent, MemberRemovedEvent, PostprocessingType, PreprocessingType, - TransformationType, TypingEvent, WorkspaceEvent, WorkspaceUpdatedEvent, WorkspaceWsMessage, -}; -pub use workspace_event_pub::WorkspaceEventPublisher; -pub use workspace_event_sub::{ - WorkspaceEventBatchStream, WorkspaceEventMessage, WorkspaceEventStream, - WorkspaceEventSubscriber, -}; diff --git a/crates/nvisy-nats/src/stream/workspace_event.rs b/crates/nvisy-nats/src/stream/workspace_event.rs deleted file mode 100644 index 75a81b2..0000000 --- a/crates/nvisy-nats/src/stream/workspace_event.rs +++ /dev/null @@ -1,657 +0,0 @@ -//! Workspace event stream for real-time WebSocket communication. -//! -//! This module provides NATS-based pub/sub for workspace WebSocket messages, -//! enabling distributed real-time communication across multiple server instances. - -use jiff::Timestamp; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Member joined the workspace event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct JoinEvent { - pub account_id: Uuid, - pub display_name: String, - pub timestamp: Timestamp, -} - -/// Member left the workspace event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct LeaveEvent { - pub account_id: Uuid, - pub timestamp: Timestamp, -} - -/// Document content update event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct DocumentUpdateEvent { - pub document_id: Uuid, - pub version: u32, - pub updated_by: Uuid, - pub timestamp: Timestamp, -} - -/// Document created event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct DocumentCreatedEvent { - pub document_id: Uuid, - pub display_name: String, - pub created_by: Uuid, - pub timestamp: Timestamp, -} - -/// Document deleted event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct DocumentDeletedEvent { - pub document_id: Uuid, - pub deleted_by: Uuid, - pub timestamp: Timestamp, -} - -/// Type of preprocessing operation completed. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum PreprocessingType { - /// File metadata validation completed. - Validation, - /// OCR text extraction completed. - Ocr, - /// Embeddings generation completed. - Embeddings, - /// Thumbnail generation completed. - Thumbnails, - /// All preprocessing steps completed. - Complete, -} - -/// File preprocessing completed event. -/// -/// Emitted when a preprocessing step (validation, OCR, embeddings) completes. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct FilePreprocessedEvent { - pub file_id: Uuid, - pub document_id: Uuid, - pub preprocessing_type: PreprocessingType, - #[serde(skip_serializing_if = "Option::is_none")] - pub details: Option, - pub timestamp: Timestamp, -} - -/// Type of transformation applied to the file. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum TransformationType { - /// Content was redacted. - Redaction, - /// Content was summarized. - Summary, - /// Content was translated. - Translation, - /// Information was extracted. - Extraction, - /// Information was inserted. - Insertion, - /// Content was reformatted. - Reformat, - /// Content was proofread. - Proofread, - /// Table of contents was generated. - TableOfContents, - /// File was split into multiple files. - Split, - /// Multiple files were merged. - Merge, - /// Custom VLM-based transformation. - Custom, -} - -/// File transformed event. -/// -/// Emitted when a document processing transformation completes. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct FileTransformedEvent { - pub file_id: Uuid, - pub document_id: Uuid, - pub transformation_type: TransformationType, - /// For split operations, the resulting file IDs. - #[serde(skip_serializing_if = "Option::is_none")] - pub result_file_ids: Option>, - /// Human-readable summary of the transformation. - #[serde(skip_serializing_if = "Option::is_none")] - pub summary: Option, - pub transformed_by: Uuid, - pub timestamp: Timestamp, -} - -/// Type of postprocessing operation completed. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum PostprocessingType { - /// Format conversion completed. - Conversion, - /// Compression completed. - Compression, - /// Annotations flattened into document. - FlattenAnnotations, - /// All postprocessing steps completed. - Complete, -} - -/// File postprocessed event. -/// -/// Emitted when a postprocessing step (conversion, compression) completes. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct FilePostprocessedEvent { - pub file_id: Uuid, - pub document_id: Uuid, - pub postprocessing_type: PostprocessingType, - /// The output format if conversion was performed. - #[serde(skip_serializing_if = "Option::is_none")] - pub output_format: Option, - pub timestamp: Timestamp, -} - -/// Job processing stage for progress tracking. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "snake_case")] -pub enum JobStage { - Preprocessing, - Processing, - Postprocessing, -} - -/// Job progress event. -/// -/// Emitted periodically during long-running jobs to indicate progress. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct JobProgressEvent { - pub job_id: Uuid, - pub file_id: Uuid, - pub document_id: Uuid, - pub stage: JobStage, - /// Progress percentage (0-100). - pub progress: u8, - /// Current operation being performed. - #[serde(skip_serializing_if = "Option::is_none")] - pub current_operation: Option, - pub timestamp: Timestamp, -} - -/// Job completed event. -/// -/// Emitted when an entire document processing job completes successfully. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct JobCompletedEvent { - pub job_id: Uuid, - pub file_id: Uuid, - pub document_id: Uuid, - /// The final output file ID (may differ from input if transformations created new files). - #[serde(skip_serializing_if = "Option::is_none")] - pub output_file_id: Option, - pub timestamp: Timestamp, -} - -/// Job failed event. -/// -/// Emitted when a document processing job fails. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct JobFailedEvent { - pub job_id: Uuid, - pub file_id: Uuid, - pub document_id: Uuid, - pub stage: JobStage, - pub error_code: String, - pub error_message: String, - pub timestamp: Timestamp, -} - -/// Member presence update event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct MemberPresenceEvent { - pub account_id: Uuid, - pub is_online: bool, - pub timestamp: Timestamp, -} - -/// Member added to workspace event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct MemberAddedEvent { - pub account_id: Uuid, - pub display_name: String, - pub member_role: String, - pub added_by: Uuid, - pub timestamp: Timestamp, -} - -/// Member removed from workspace event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct MemberRemovedEvent { - pub account_id: Uuid, - pub removed_by: Uuid, - pub timestamp: Timestamp, -} - -/// Workspace settings updated event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct WorkspaceUpdatedEvent { - pub display_name: Option, - pub updated_by: Uuid, - pub timestamp: Timestamp, -} - -/// Typing indicator event. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct TypingEvent { - pub account_id: Uuid, - pub document_id: Option, - pub timestamp: Timestamp, -} - -/// Error event from server. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub struct ErrorEvent { - pub code: String, - pub message: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub details: Option, -} - -/// WebSocket message types for workspace communication. -/// -/// All messages are serialized as JSON with a `type` field that identifies -/// the message variant. This enables type-safe message handling on both -/// client and server. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(tag = "type", rename_all = "camelCase")] -pub enum WorkspaceWsMessage { - /// Client announces presence in the workspace. - Join(JoinEvent), - - /// Client leaves the workspace. - Leave(LeaveEvent), - - /// Document content update notification. - DocumentUpdate(DocumentUpdateEvent), - - /// Document creation notification. - DocumentCreated(DocumentCreatedEvent), - - /// Document deletion notification. - DocumentDeleted(DocumentDeletedEvent), - - /// File preprocessing step completed (validation, OCR, embeddings). - FilePreprocessed(FilePreprocessedEvent), - - /// File transformation completed (redaction, translation, etc.). - FileTransformed(FileTransformedEvent), - - /// File postprocessing step completed (conversion, compression). - FilePostprocessed(FilePostprocessedEvent), - - /// Job progress update. - JobProgress(JobProgressEvent), - - /// Job completed successfully. - JobCompleted(JobCompletedEvent), - - /// Job failed. - JobFailed(JobFailedEvent), - - /// Member presence update. - MemberPresence(MemberPresenceEvent), - - /// Member added to workspace. - MemberAdded(MemberAddedEvent), - - /// Member removed from workspace. - MemberRemoved(MemberRemovedEvent), - - /// Workspace settings updated. - WorkspaceUpdated(WorkspaceUpdatedEvent), - - /// Typing indicator. - Typing(TypingEvent), - - /// Error message from server. - Error(ErrorEvent), -} - -impl WorkspaceWsMessage { - /// Creates an error message with the given code and message. - #[inline] - pub fn error(code: impl Into, message: impl Into) -> Self { - Self::Error(ErrorEvent { - code: code.into(), - message: message.into(), - details: None, - }) - } - - /// Creates an error message with additional details. - #[inline] - pub fn error_with_details( - code: impl Into, - message: impl Into, - details: impl Into, - ) -> Self { - Self::Error(ErrorEvent { - code: code.into(), - message: message.into(), - details: Some(details.into()), - }) - } - - /// Get the account ID associated with this message, if any. - pub fn account_id(&self) -> Option { - match self { - Self::Join(e) => Some(e.account_id), - Self::Leave(e) => Some(e.account_id), - Self::DocumentUpdate(e) => Some(e.updated_by), - Self::DocumentCreated(e) => Some(e.created_by), - Self::DocumentDeleted(e) => Some(e.deleted_by), - Self::FilePreprocessed(_) => None, - Self::FileTransformed(e) => Some(e.transformed_by), - Self::FilePostprocessed(_) => None, - Self::JobProgress(_) => None, - Self::JobCompleted(_) => None, - Self::JobFailed(_) => None, - Self::MemberPresence(e) => Some(e.account_id), - Self::MemberAdded(e) => Some(e.account_id), - Self::MemberRemoved(e) => Some(e.account_id), - Self::WorkspaceUpdated(e) => Some(e.updated_by), - Self::Typing(e) => Some(e.account_id), - Self::Error(_) => None, - } - } - - /// Get the timestamp of this message. - pub fn timestamp(&self) -> Option { - match self { - Self::Join(e) => Some(e.timestamp), - Self::Leave(e) => Some(e.timestamp), - Self::DocumentUpdate(e) => Some(e.timestamp), - Self::DocumentCreated(e) => Some(e.timestamp), - Self::DocumentDeleted(e) => Some(e.timestamp), - Self::FilePreprocessed(e) => Some(e.timestamp), - Self::FileTransformed(e) => Some(e.timestamp), - Self::FilePostprocessed(e) => Some(e.timestamp), - Self::JobProgress(e) => Some(e.timestamp), - Self::JobCompleted(e) => Some(e.timestamp), - Self::JobFailed(e) => Some(e.timestamp), - Self::MemberPresence(e) => Some(e.timestamp), - Self::MemberAdded(e) => Some(e.timestamp), - Self::MemberRemoved(e) => Some(e.timestamp), - Self::WorkspaceUpdated(e) => Some(e.timestamp), - Self::Typing(e) => Some(e.timestamp), - Self::Error(_) => None, - } - } - - /// Create a join event. - pub fn join(account_id: Uuid, display_name: impl Into) -> Self { - Self::Join(JoinEvent { - account_id, - display_name: display_name.into(), - timestamp: Timestamp::now(), - }) - } - - /// Create a leave event. - pub fn leave(account_id: Uuid) -> Self { - Self::Leave(LeaveEvent { - account_id, - timestamp: Timestamp::now(), - }) - } - - /// Create a typing event. - pub fn typing(account_id: Uuid, document_id: Option) -> Self { - Self::Typing(TypingEvent { - account_id, - document_id, - timestamp: Timestamp::now(), - }) - } - - /// Create a document update event. - pub fn document_update(document_id: Uuid, version: u32, updated_by: Uuid) -> Self { - Self::DocumentUpdate(DocumentUpdateEvent { - document_id, - version, - updated_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a document created event. - pub fn document_created( - document_id: Uuid, - display_name: impl Into, - created_by: Uuid, - ) -> Self { - Self::DocumentCreated(DocumentCreatedEvent { - document_id, - display_name: display_name.into(), - created_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a document deleted event. - pub fn document_deleted(document_id: Uuid, deleted_by: Uuid) -> Self { - Self::DocumentDeleted(DocumentDeletedEvent { - document_id, - deleted_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a file preprocessed event. - pub fn file_preprocessed( - file_id: Uuid, - document_id: Uuid, - preprocessing_type: PreprocessingType, - ) -> Self { - Self::FilePreprocessed(FilePreprocessedEvent { - file_id, - document_id, - preprocessing_type, - details: None, - timestamp: Timestamp::now(), - }) - } - - /// Create a file transformed event. - pub fn file_transformed( - file_id: Uuid, - document_id: Uuid, - transformation_type: TransformationType, - transformed_by: Uuid, - ) -> Self { - Self::FileTransformed(FileTransformedEvent { - file_id, - document_id, - transformation_type, - result_file_ids: None, - summary: None, - transformed_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a file postprocessed event. - pub fn file_postprocessed( - file_id: Uuid, - document_id: Uuid, - postprocessing_type: PostprocessingType, - ) -> Self { - Self::FilePostprocessed(FilePostprocessedEvent { - file_id, - document_id, - postprocessing_type, - output_format: None, - timestamp: Timestamp::now(), - }) - } - - /// Create a job progress event. - pub fn job_progress( - job_id: Uuid, - file_id: Uuid, - document_id: Uuid, - stage: JobStage, - progress: u8, - ) -> Self { - Self::JobProgress(JobProgressEvent { - job_id, - file_id, - document_id, - stage, - progress: progress.min(100), - current_operation: None, - timestamp: Timestamp::now(), - }) - } - - /// Create a job completed event. - pub fn job_completed(job_id: Uuid, file_id: Uuid, document_id: Uuid) -> Self { - Self::JobCompleted(JobCompletedEvent { - job_id, - file_id, - document_id, - output_file_id: None, - timestamp: Timestamp::now(), - }) - } - - /// Create a job failed event. - pub fn job_failed( - job_id: Uuid, - file_id: Uuid, - document_id: Uuid, - stage: JobStage, - error_code: impl Into, - error_message: impl Into, - ) -> Self { - Self::JobFailed(JobFailedEvent { - job_id, - file_id, - document_id, - stage, - error_code: error_code.into(), - error_message: error_message.into(), - timestamp: Timestamp::now(), - }) - } - - /// Create a member presence event. - pub fn member_presence(account_id: Uuid, is_online: bool) -> Self { - Self::MemberPresence(MemberPresenceEvent { - account_id, - is_online, - timestamp: Timestamp::now(), - }) - } - - /// Create a member added event. - pub fn member_added( - account_id: Uuid, - display_name: impl Into, - member_role: impl Into, - added_by: Uuid, - ) -> Self { - Self::MemberAdded(MemberAddedEvent { - account_id, - display_name: display_name.into(), - member_role: member_role.into(), - added_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a member removed event. - pub fn member_removed(account_id: Uuid, removed_by: Uuid) -> Self { - Self::MemberRemoved(MemberRemovedEvent { - account_id, - removed_by, - timestamp: Timestamp::now(), - }) - } - - /// Create a workspace updated event. - pub fn workspace_updated(display_name: Option, updated_by: Uuid) -> Self { - Self::WorkspaceUpdated(WorkspaceUpdatedEvent { - display_name, - updated_by, - timestamp: Timestamp::now(), - }) - } -} - -/// Workspace event envelope for NATS publishing. -/// -/// Wraps the WebSocket message with metadata for routing and filtering. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct WorkspaceEvent { - /// The workspace this event belongs to. - pub workspace_id: Uuid, - - /// The WebSocket message payload. - pub message: WorkspaceWsMessage, - - /// When this event was created. - pub created_at: Timestamp, -} - -impl WorkspaceEvent { - /// Create a new workspace event. - pub fn new(workspace_id: Uuid, message: WorkspaceWsMessage) -> Self { - Self { - workspace_id, - message, - created_at: Timestamp::now(), - } - } -} diff --git a/crates/nvisy-nats/src/stream/workspace_event_pub.rs b/crates/nvisy-nats/src/stream/workspace_event_pub.rs deleted file mode 100644 index 3152eeb..0000000 --- a/crates/nvisy-nats/src/stream/workspace_event_pub.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! Workspace event stream publisher for real-time WebSocket communication. - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; -use uuid::Uuid; - -use super::publisher::StreamPublisher; -use super::workspace_event::{WorkspaceEvent, WorkspaceWsMessage}; -use crate::Result; - -/// Workspace event publisher for broadcasting WebSocket messages. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct WorkspaceEventPublisher { - #[deref] - #[deref_mut] - publisher: StreamPublisher, -} - -impl WorkspaceEventPublisher { - /// Create a new workspace event publisher. - pub async fn new(jetstream: &Context) -> Result { - let publisher = StreamPublisher::new(jetstream, "PROJECT_EVENTS").await?; - Ok(Self { publisher }) - } - - /// Publish a WebSocket message to a specific workspace. - /// - /// Messages are published to the subject `PROJECT_EVENTS.{workspace_id}`. - pub async fn publish_message( - &self, - workspace_id: Uuid, - message: WorkspaceWsMessage, - ) -> Result<()> { - let event = WorkspaceEvent::new(workspace_id, message); - let subject = workspace_id.to_string(); - self.publisher.publish(&subject, &event).await - } - - /// Publish multiple messages to a workspace in batch. - pub async fn publish_batch( - &self, - workspace_id: Uuid, - messages: Vec, - ) -> Result<()> { - let events: Vec = messages - .into_iter() - .map(|msg| WorkspaceEvent::new(workspace_id, msg)) - .collect(); - - let subject = workspace_id.to_string(); - self.publisher - .publish_batch_parallel(&subject, &events, 10) - .await - } -} diff --git a/crates/nvisy-nats/src/stream/workspace_event_sub.rs b/crates/nvisy-nats/src/stream/workspace_event_sub.rs deleted file mode 100644 index e05fce9..0000000 --- a/crates/nvisy-nats/src/stream/workspace_event_sub.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! Workspace event stream subscriber for real-time WebSocket communication. - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; -use uuid::Uuid; - -use super::subscriber::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; -use super::workspace_event::WorkspaceEvent; -use crate::Result; - -/// Workspace event subscriber for receiving WebSocket messages. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct WorkspaceEventSubscriber { - #[deref] - #[deref_mut] - subscriber: StreamSubscriber, -} - -impl WorkspaceEventSubscriber { - /// Create a new workspace event subscriber. - /// - /// # Arguments - /// - /// * `jetstream` - JetStream context - /// * `consumer_name` - Unique name for this consumer (e.g., "server-instance-1") - pub async fn new(jetstream: &Context, consumer_name: &str) -> Result { - let subscriber = StreamSubscriber::new(jetstream, "PROJECT_EVENTS", consumer_name).await?; - Ok(Self { subscriber }) - } - - /// Create a subscriber filtered to a specific workspace. - /// - /// Only receives events for the specified workspace ID. - pub async fn new_for_workspace( - jetstream: &Context, - consumer_name: &str, - workspace_id: Uuid, - ) -> Result { - let subscriber = StreamSubscriber::new(jetstream, "PROJECT_EVENTS", consumer_name) - .await? - .with_filter_subject(format!("PROJECT_EVENTS.{}", workspace_id)); - Ok(Self { subscriber }) - } -} - -/// Type alias for workspace event batch stream. -pub type WorkspaceEventBatchStream = TypedBatchStream; - -/// Type alias for workspace event message. -pub type WorkspaceEventMessage = TypedMessage; - -/// Type alias for workspace event message stream. -pub type WorkspaceEventStream = TypedMessageStream; diff --git a/crates/nvisy-postgres/src/model/account_api_token.rs b/crates/nvisy-postgres/src/model/account_api_token.rs index 67ede27..d3ed6c1 100644 --- a/crates/nvisy-postgres/src/model/account_api_token.rs +++ b/crates/nvisy-postgres/src/model/account_api_token.rs @@ -6,8 +6,8 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::account_api_tokens; -use crate::types::constants::token; use crate::types::{ApiTokenType, HasCreatedAt, HasExpiresAt, HasSecurityContext}; +use crate::types::{EXPIRY_WARNING_MINUTES, LONG_LIVED_THRESHOLD_HOURS}; /// Account API token model representing an authentication token. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] @@ -139,7 +139,7 @@ impl AccountApiToken { /// Returns whether the token is about to expire (within warning threshold). pub fn is_expiring_soon_default(&self) -> bool { - self.is_expiring_soon(token::EXPIRY_WARNING_MINUTES) + self.is_expiring_soon(EXPIRY_WARNING_MINUTES) } /// Returns whether this is a web token. @@ -159,7 +159,7 @@ impl AccountApiToken { /// Returns whether the token is long-lived (active for more than 24 hours). pub fn is_long_lived(&self) -> bool { - i64::from(self.token_duration().get_hours()) > token::LONG_LIVED_THRESHOLD_HOURS + i64::from(self.token_duration().get_hours()) > LONG_LIVED_THRESHOLD_HOURS } /// Returns a shortened version of the token ID for logging/display. diff --git a/crates/nvisy-postgres/src/model/account_notification.rs b/crates/nvisy-postgres/src/model/account_notification.rs index 8521db8..78e044c 100644 --- a/crates/nvisy-postgres/src/model/account_notification.rs +++ b/crates/nvisy-postgres/src/model/account_notification.rs @@ -5,7 +5,7 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::account_notifications; -use crate::types::constants::notification; +use crate::types::DEFAULT_RETENTION_DAYS; use crate::types::{HasCreatedAt, HasExpiresAt, NotificationEvent}; /// Account notification model representing a notification sent to a user. @@ -186,9 +186,7 @@ impl HasExpiresAt for AccountNotification { Some( self.expires_at.map(Into::into).unwrap_or( jiff::Timestamp::now() - .checked_add( - jiff::Span::new().hours(notification::DEFAULT_RETENTION_DAYS as i64 * 24), - ) + .checked_add(jiff::Span::new().hours(DEFAULT_RETENTION_DAYS as i64 * 24)) .expect("valid notification expiry"), ), ) diff --git a/crates/nvisy-postgres/src/model/studio_operation.rs b/crates/nvisy-postgres/src/model/chat_operation.rs similarity index 86% rename from crates/nvisy-postgres/src/model/studio_operation.rs rename to crates/nvisy-postgres/src/model/chat_operation.rs index b4a932c..4a1584a 100644 --- a/crates/nvisy-postgres/src/model/studio_operation.rs +++ b/crates/nvisy-postgres/src/model/chat_operation.rs @@ -1,4 +1,4 @@ -//! Studio operation model for PostgreSQL database operations. +//! Chat operation model for PostgreSQL database operations. //! //! This module provides models for tracking document operations (diffs) produced //! by tool calls. Operations represent the actual changes to be applied to documents, @@ -6,26 +6,26 @@ //! //! ## Models //! -//! - [`StudioOperation`] - Main operation model with diff details -//! - [`NewStudioOperation`] - Data structure for creating new operations -//! - [`UpdateStudioOperation`] - Data structure for updating existing operations +//! - [`ChatOperation`] - Main operation model with diff details +//! - [`NewChatOperation`] - Data structure for creating new operations +//! - [`UpdateChatOperation`] - Data structure for updating existing operations use diesel::prelude::*; use jiff_diesel::Timestamp; use uuid::Uuid; -use crate::schema::studio_operations; +use crate::schema::chat_operations; use crate::types::HasCreatedAt; -/// Studio operation model representing a document operation (diff). +/// Chat operation model representing a document operation (diff). /// /// This model tracks individual operations produced by tool calls that can be /// applied to or reverted from documents. Operations store position-based diffs /// rather than content, enabling efficient undo/redo functionality. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = studio_operations)] +#[diesel(table_name = chat_operations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct StudioOperation { +pub struct ChatOperation { /// Unique operation identifier. pub id: Uuid, /// Reference to the tool call that produced this operation. @@ -48,14 +48,14 @@ pub struct StudioOperation { pub applied_at: Option, } -/// Data structure for creating a new studio operation. +/// Data structure for creating a new chat operation. /// /// Contains all the information necessary to record a new document operation. /// Operations are created as unapplied by default and can be applied later. #[derive(Debug, Clone, Insertable)] -#[diesel(table_name = studio_operations)] +#[diesel(table_name = chat_operations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewStudioOperation { +pub struct NewChatOperation { /// Reference to the tool call that produced this operation. pub tool_call_id: Uuid, /// Reference to the file being modified. @@ -72,14 +72,14 @@ pub struct NewStudioOperation { pub reverted: Option, } -/// Data structure for updating an existing studio operation. +/// Data structure for updating an existing chat operation. /// /// Contains optional fields for modifying operation properties. Primarily /// used to mark operations as applied or reverted. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = studio_operations)] +#[diesel(table_name = chat_operations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateStudioOperation { +pub struct UpdateChatOperation { /// Updated applied state. pub applied: Option, /// Updated reverted state. @@ -88,7 +88,7 @@ pub struct UpdateStudioOperation { pub applied_at: Option>, } -impl StudioOperation { +impl ChatOperation { /// Returns whether the operation has been applied. #[inline] pub fn is_applied(&self) -> bool { @@ -179,7 +179,7 @@ impl StudioOperation { } } -impl HasCreatedAt for StudioOperation { +impl HasCreatedAt for ChatOperation { fn created_at(&self) -> jiff::Timestamp { self.created_at.into() } diff --git a/crates/nvisy-postgres/src/model/studio_session.rs b/crates/nvisy-postgres/src/model/chat_session.rs similarity index 80% rename from crates/nvisy-postgres/src/model/studio_session.rs rename to crates/nvisy-postgres/src/model/chat_session.rs index 970b6ec..e7e9591 100644 --- a/crates/nvisy-postgres/src/model/studio_session.rs +++ b/crates/nvisy-postgres/src/model/chat_session.rs @@ -1,4 +1,4 @@ -//! Studio session model for PostgreSQL database operations. +//! Chat session model for PostgreSQL database operations. //! //! This module provides models for managing LLM-assisted document editing sessions. //! Sessions track the interaction between users and AI models during document editing, @@ -6,26 +6,26 @@ //! //! ## Models //! -//! - [`StudioSession`] - Main session model with full configuration and status -//! - [`NewStudioSession`] - Data structure for creating new sessions -//! - [`UpdateStudioSession`] - Data structure for updating existing sessions +//! - [`ChatSession`] - Main session model with full configuration and status +//! - [`NewChatSession`] - Data structure for creating new sessions +//! - [`UpdateChatSession`] - Data structure for updating existing sessions use diesel::prelude::*; use jiff_diesel::Timestamp; use uuid::Uuid; -use crate::schema::studio_sessions; -use crate::types::{HasCreatedAt, HasOwnership, HasUpdatedAt, StudioSessionStatus}; +use crate::schema::chat_sessions; +use crate::types::{ChatSessionStatus, HasCreatedAt, HasOwnership, HasUpdatedAt}; -/// Studio session model representing an LLM-assisted document editing session. +/// Chat session model representing an LLM-assisted document editing session. /// /// This model manages the lifecycle of editing sessions where users interact with /// AI models to edit documents. Each session tracks the primary file being edited, /// model configuration, and usage statistics like message and token counts. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = studio_sessions)] +#[diesel(table_name = chat_sessions)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct StudioSession { +pub struct ChatSession { /// Unique session identifier. pub id: Uuid, /// Reference to the workspace this session belongs to. @@ -37,7 +37,7 @@ pub struct StudioSession { /// User-friendly session name. pub display_name: String, /// Current lifecycle status of the session. - pub session_status: StudioSessionStatus, + pub session_status: ChatSessionStatus, /// LLM configuration (model, temperature, max tokens, etc.). pub model_config: serde_json::Value, /// Total number of messages exchanged in this session. @@ -50,15 +50,15 @@ pub struct StudioSession { pub updated_at: Timestamp, } -/// Data structure for creating a new studio session. +/// Data structure for creating a new chat session. /// /// Contains all the information necessary to create a new editing session. /// Most fields have sensible defaults, allowing sessions to be created with /// minimal required information. #[derive(Debug, Clone, Insertable)] -#[diesel(table_name = studio_sessions)] +#[diesel(table_name = chat_sessions)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewStudioSession { +pub struct NewChatSession { /// Reference to the workspace this session will belong to. pub workspace_id: Uuid, /// Account creating this session. @@ -68,24 +68,24 @@ pub struct NewStudioSession { /// Optional user-friendly session name. pub display_name: Option, /// Optional initial session status. - pub session_status: Option, + pub session_status: Option, /// Optional LLM configuration. pub model_config: Option, } -/// Data structure for updating an existing studio session. +/// Data structure for updating an existing chat session. /// /// Contains optional fields for modifying session properties. Only the /// fields that need to be changed should be set to Some(value), while /// unchanged fields remain None to preserve their current values. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = studio_sessions)] +#[diesel(table_name = chat_sessions)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateStudioSession { +pub struct UpdateChatSession { /// Updated session display name. pub display_name: Option, /// Updated session status. - pub session_status: Option, + pub session_status: Option, /// Updated LLM configuration. pub model_config: Option, /// Updated message count. @@ -94,7 +94,7 @@ pub struct UpdateStudioSession { pub token_count: Option, } -impl StudioSession { +impl ChatSession { /// Returns whether the session is currently active. #[inline] pub fn is_active(&self) -> bool { @@ -149,19 +149,19 @@ impl StudioSession { } } -impl HasCreatedAt for StudioSession { +impl HasCreatedAt for ChatSession { fn created_at(&self) -> jiff::Timestamp { self.created_at.into() } } -impl HasUpdatedAt for StudioSession { +impl HasUpdatedAt for ChatSession { fn updated_at(&self) -> jiff::Timestamp { self.updated_at.into() } } -impl HasOwnership for StudioSession { +impl HasOwnership for ChatSession { fn created_by(&self) -> Uuid { self.account_id } diff --git a/crates/nvisy-postgres/src/model/studio_tool_call.rs b/crates/nvisy-postgres/src/model/chat_tool_call.rs similarity index 79% rename from crates/nvisy-postgres/src/model/studio_tool_call.rs rename to crates/nvisy-postgres/src/model/chat_tool_call.rs index 38386c4..aaaf164 100644 --- a/crates/nvisy-postgres/src/model/studio_tool_call.rs +++ b/crates/nvisy-postgres/src/model/chat_tool_call.rs @@ -1,34 +1,34 @@ -//! Studio tool call model for PostgreSQL database operations. +//! Chat tool call model for PostgreSQL database operations. //! -//! This module provides models for tracking tool invocations within studio sessions. +//! This module provides models for tracking tool invocations within chat sessions. //! Tool calls represent individual operations performed by the LLM, such as //! merging, splitting, redacting, or translating document content. //! //! ## Models //! -//! - [`StudioToolCall`] - Main tool call model with execution details -//! - [`NewStudioToolCall`] - Data structure for creating new tool calls -//! - [`UpdateStudioToolCall`] - Data structure for updating existing tool calls +//! - [`ChatToolCall`] - Main tool call model with execution details +//! - [`NewChatToolCall`] - Data structure for creating new tool calls +//! - [`UpdateChatToolCall`] - Data structure for updating existing tool calls use diesel::prelude::*; use jiff_diesel::Timestamp; use uuid::Uuid; -use crate::schema::studio_tool_calls; -use crate::types::{HasCreatedAt, StudioToolStatus}; +use crate::schema::chat_tool_calls; +use crate::types::{ChatToolStatus, HasCreatedAt}; -/// Studio tool call model representing a tool invocation within a session. +/// Chat tool call model representing a tool invocation within a session. /// /// This model tracks individual tool calls made during editing sessions, /// including the tool name, input parameters, output results, and execution /// status. Tool calls are linked to specific files and optionally to chunks. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = studio_tool_calls)] +#[diesel(table_name = chat_tool_calls)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct StudioToolCall { +pub struct ChatToolCall { /// Unique tool call identifier. pub id: Uuid, - /// Reference to the studio session this tool call belongs to. + /// Reference to the chat session this tool call belongs to. pub session_id: Uuid, /// Reference to the file being operated on. pub file_id: Uuid, @@ -41,22 +41,22 @@ pub struct StudioToolCall { /// Tool output results as JSON. pub tool_output: serde_json::Value, /// Current execution status of the tool call. - pub tool_status: StudioToolStatus, + pub tool_status: ChatToolStatus, /// Timestamp when the tool call was created/started. pub started_at: Timestamp, /// Timestamp when the tool execution completed. pub completed_at: Option, } -/// Data structure for creating a new studio tool call. +/// Data structure for creating a new chat tool call. /// /// Contains all the information necessary to record a new tool invocation. /// The tool status defaults to pending, and output is populated upon completion. #[derive(Debug, Clone, Insertable)] -#[diesel(table_name = studio_tool_calls)] +#[diesel(table_name = chat_tool_calls)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewStudioToolCall { - /// Reference to the studio session. +pub struct NewChatToolCall { + /// Reference to the chat session. pub session_id: Uuid, /// Reference to the file being operated on. pub file_id: Uuid, @@ -69,26 +69,26 @@ pub struct NewStudioToolCall { /// Optional initial tool output. pub tool_output: Option, /// Optional initial tool status. - pub tool_status: Option, + pub tool_status: Option, } -/// Data structure for updating an existing studio tool call. +/// Data structure for updating an existing chat tool call. /// /// Contains optional fields for modifying tool call properties. Primarily /// used to update the status and output upon completion or cancellation. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = studio_tool_calls)] +#[diesel(table_name = chat_tool_calls)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateStudioToolCall { +pub struct UpdateChatToolCall { /// Updated tool output results. pub tool_output: Option, /// Updated execution status. - pub tool_status: Option, + pub tool_status: Option, /// Updated completion timestamp. pub completed_at: Option>, } -impl StudioToolCall { +impl ChatToolCall { /// Returns whether the tool call is pending execution. #[inline] pub fn is_pending(&self) -> bool { @@ -148,7 +148,7 @@ impl StudioToolCall { } } -impl HasCreatedAt for StudioToolCall { +impl HasCreatedAt for ChatToolCall { fn created_at(&self) -> jiff::Timestamp { self.started_at.into() } diff --git a/crates/nvisy-postgres/src/model/document_comment.rs b/crates/nvisy-postgres/src/model/document_comment.rs index f67e27e..91d2c20 100644 --- a/crates/nvisy-postgres/src/model/document_comment.rs +++ b/crates/nvisy-postgres/src/model/document_comment.rs @@ -5,7 +5,7 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::document_comments; -use crate::types::constants::comment; +use crate::types::EDIT_GRACE_PERIOD_SECONDS; use crate::types::{HasCreatedAt, HasDeletedAt, HasUpdatedAt}; /// Document comment model representing user discussions on files. @@ -99,7 +99,7 @@ impl DocumentComment { pub fn is_edited(&self) -> bool { let duration = jiff::Timestamp::from(self.updated_at) - jiff::Timestamp::from(self.created_at); - duration.get_seconds() > comment::EDIT_GRACE_PERIOD_SECONDS + duration.get_seconds() > EDIT_GRACE_PERIOD_SECONDS } } diff --git a/crates/nvisy-postgres/src/model/document_file.rs b/crates/nvisy-postgres/src/model/document_file.rs index c4fa53e..b407d3e 100644 --- a/crates/nvisy-postgres/src/model/document_file.rs +++ b/crates/nvisy-postgres/src/model/document_file.rs @@ -5,9 +5,10 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::document_files; -use crate::types::constants::file; +use crate::types::RECENTLY_UPLOADED_HOURS; use crate::types::{ - ContentSegmentation, HasCreatedAt, HasDeletedAt, HasUpdatedAt, ProcessingStatus, RequireMode, + ContentSegmentation, FileSource, HasCreatedAt, HasDeletedAt, HasUpdatedAt, ProcessingStatus, + RequireMode, }; /// Document file model representing a file attached to a document. @@ -23,8 +24,10 @@ pub struct DocumentFile { pub document_id: Option, /// Reference to the account that owns this file. pub account_id: Uuid, - /// Parent file reference for hierarchical relationships. + /// Parent file reference for hierarchical relationships or version chains. pub parent_id: Option, + /// Version number (1 for original, increments for new versions). + pub version_number: i32, /// Human-readable file name for display. pub display_name: String, /// Original filename when uploaded. @@ -33,6 +36,8 @@ pub struct DocumentFile { pub file_extension: String, /// Classification tags. pub tags: Vec>, + /// How the file was created (uploaded, imported, generated). + pub source: FileSource, /// Processing mode requirements. pub require_mode: RequireMode, /// Processing priority (higher numbers = higher priority). @@ -74,8 +79,10 @@ pub struct NewDocumentFile { pub document_id: Option, /// Account ID. pub account_id: Uuid, - /// Parent file ID. + /// Parent file ID (for derived files or version chains). pub parent_id: Option, + /// Version number (defaults to 1). + pub version_number: Option, /// Display name. pub display_name: Option, /// Original filename. @@ -84,6 +91,8 @@ pub struct NewDocumentFile { pub file_extension: Option, /// Tags pub tags: Option>>, + /// How the file was created. + pub source: Option, /// Require mode pub require_mode: Option, /// Processing priority @@ -124,6 +133,8 @@ pub struct UpdateDocumentFile { pub parent_id: Option>, /// Tags pub tags: Option>>, + /// How the file was created. + pub source: Option, /// Require mode pub require_mode: Option, /// Processing priority @@ -149,7 +160,7 @@ pub struct UpdateDocumentFile { impl DocumentFile { /// Returns whether the file was uploaded recently. pub fn is_recently_uploaded(&self) -> bool { - self.was_created_within(jiff::Span::new().hours(file::RECENTLY_UPLOADED_HOURS)) + self.was_created_within(jiff::Span::new().hours(RECENTLY_UPLOADED_HOURS)) } /// Returns whether the file is deleted. @@ -250,6 +261,16 @@ impl DocumentFile { _ => "Minimal", } } + + /// Returns whether this is the original version (version 1). + pub fn is_original_version(&self) -> bool { + self.version_number == 1 + } + + /// Returns whether this file is a newer version of another file. + pub fn is_version_of(&self, other: &DocumentFile) -> bool { + self.parent_id == Some(other.id) && self.version_number > other.version_number + } } impl HasCreatedAt for DocumentFile { diff --git a/crates/nvisy-postgres/src/model/mod.rs b/crates/nvisy-postgres/src/model/mod.rs index f6f546e..2f5b02e 100644 --- a/crates/nvisy-postgres/src/model/mod.rs +++ b/crates/nvisy-postgres/src/model/mod.rs @@ -21,9 +21,9 @@ mod workspace_invite; mod workspace_member; mod workspace_webhook; -mod studio_operation; -mod studio_session; -mod studio_tool_call; +mod chat_operation; +mod chat_session; +mod chat_tool_call; // Account models pub use account::{Account, NewAccount, UpdateAccount}; @@ -56,7 +56,7 @@ pub use workspace_integration_run::{ pub use workspace_invite::{NewWorkspaceInvite, UpdateWorkspaceInvite, WorkspaceInvite}; pub use workspace_member::{NewWorkspaceMember, UpdateWorkspaceMember, WorkspaceMember}; pub use workspace_webhook::{NewWorkspaceWebhook, UpdateWorkspaceWebhook, WorkspaceWebhook}; -// Studio models -pub use studio_operation::{NewStudioOperation, StudioOperation, UpdateStudioOperation}; -pub use studio_session::{NewStudioSession, StudioSession, UpdateStudioSession}; -pub use studio_tool_call::{NewStudioToolCall, StudioToolCall, UpdateStudioToolCall}; +// Chat models +pub use chat_operation::{ChatOperation, NewChatOperation, UpdateChatOperation}; +pub use chat_session::{ChatSession, NewChatSession, UpdateChatSession}; +pub use chat_tool_call::{ChatToolCall, NewChatToolCall, UpdateChatToolCall}; diff --git a/crates/nvisy-postgres/src/model/workspace_invite.rs b/crates/nvisy-postgres/src/model/workspace_invite.rs index 844b5bb..6dd8624 100644 --- a/crates/nvisy-postgres/src/model/workspace_invite.rs +++ b/crates/nvisy-postgres/src/model/workspace_invite.rs @@ -5,7 +5,7 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::workspace_invites; -use crate::types::constants::invite; +use crate::types::RECENTLY_SENT_HOURS; use crate::types::{HasCreatedAt, HasUpdatedAt, InviteStatus, WorkspaceRole}; /// Workspace invitation model representing an invitation to join a workspace. @@ -122,7 +122,7 @@ impl WorkspaceInvite { /// Returns whether the invitation was sent recently. pub fn is_recently_sent(&self) -> bool { - self.was_created_within(jiff::Span::new().hours(invite::RECENTLY_SENT_HOURS)) + self.was_created_within(jiff::Span::new().hours(RECENTLY_SENT_HOURS)) } /// Returns the time remaining until expiration. diff --git a/crates/nvisy-postgres/src/query/studio_operation.rs b/crates/nvisy-postgres/src/query/chat_operation.rs similarity index 58% rename from crates/nvisy-postgres/src/query/studio_operation.rs rename to crates/nvisy-postgres/src/query/chat_operation.rs index 715cb01..af2107f 100644 --- a/crates/nvisy-postgres/src/query/studio_operation.rs +++ b/crates/nvisy-postgres/src/query/chat_operation.rs @@ -1,4 +1,4 @@ -//! Studio operation repository for managing document operations (diffs). +//! Chat operation repository for managing document operations (diffs). use std::future::Future; @@ -6,42 +6,42 @@ use diesel::prelude::*; use diesel_async::RunQueryDsl; use uuid::Uuid; -use crate::model::{NewStudioOperation, StudioOperation, UpdateStudioOperation}; +use crate::model::{ChatOperation, NewChatOperation, UpdateChatOperation}; use crate::types::{CursorPage, CursorPagination, OffsetPagination}; use crate::{PgConnection, PgError, PgResult, schema}; -/// Repository for studio operation database operations. +/// Repository for chat operation database operations. /// /// Handles document operation tracking including CRUD operations, apply/revert /// state management, and querying by tool call or file. -pub trait StudioOperationRepository { - /// Creates a new studio operation. - fn create_studio_operation( +pub trait ChatOperationRepository { + /// Creates a new chat operation. + fn create_chat_operation( &mut self, - operation: NewStudioOperation, - ) -> impl Future> + Send; + operation: NewChatOperation, + ) -> impl Future> + Send; - /// Creates multiple studio operations in a batch. - fn create_studio_operations( + /// Creates multiple chat operations in a batch. + fn create_chat_operations( &mut self, - operations: Vec, - ) -> impl Future>> + Send; + operations: Vec, + ) -> impl Future>> + Send; - /// Finds a studio operation by its unique identifier. - fn find_studio_operation_by_id( + /// Finds a chat operation by its unique identifier. + fn find_chat_operation_by_id( &mut self, operation_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Updates an existing studio operation. - fn update_studio_operation( + /// Updates an existing chat operation. + fn update_chat_operation( &mut self, operation_id: Uuid, - changes: UpdateStudioOperation, - ) -> impl Future> + Send; + changes: UpdateChatOperation, + ) -> impl Future> + Send; - /// Deletes a studio operation. - fn delete_studio_operation( + /// Deletes a chat operation. + fn delete_chat_operation( &mut self, operation_id: Uuid, ) -> impl Future> + Send; @@ -50,45 +50,45 @@ pub trait StudioOperationRepository { fn list_tool_call_operations( &mut self, tool_call_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists operations for a file with offset pagination. fn offset_list_file_operations( &mut self, file_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists operations for a file with cursor pagination. fn cursor_list_file_operations( &mut self, file_id: Uuid, pagination: CursorPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists pending (unapplied) operations for a file. fn list_pending_file_operations( &mut self, file_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Marks an operation as applied. - fn apply_studio_operation( + fn apply_chat_operation( &mut self, operation_id: Uuid, - ) -> impl Future> + Send; + ) -> impl Future> + Send; /// Marks multiple operations as applied. - fn apply_studio_operations( + fn apply_chat_operations( &mut self, operation_ids: Vec, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Marks an operation as reverted. - fn revert_studio_operation( + fn revert_chat_operation( &mut self, operation_id: Uuid, - ) -> impl Future> + Send; + ) -> impl Future> + Send; /// Counts operations by status for a file. fn count_file_operations( @@ -110,16 +110,16 @@ pub struct FileOperationCounts { pub reverted: i64, } -impl StudioOperationRepository for PgConnection { - async fn create_studio_operation( +impl ChatOperationRepository for PgConnection { + async fn create_chat_operation( &mut self, - operation: NewStudioOperation, - ) -> PgResult { - use schema::studio_operations; + operation: NewChatOperation, + ) -> PgResult { + use schema::chat_operations; - let operation = diesel::insert_into(studio_operations::table) + let operation = diesel::insert_into(chat_operations::table) .values(&operation) - .returning(StudioOperation::as_returning()) + .returning(ChatOperation::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -127,15 +127,15 @@ impl StudioOperationRepository for PgConnection { Ok(operation) } - async fn create_studio_operations( + async fn create_chat_operations( &mut self, - operations: Vec, - ) -> PgResult> { - use schema::studio_operations; + operations: Vec, + ) -> PgResult> { + use schema::chat_operations; - let operations = diesel::insert_into(studio_operations::table) + let operations = diesel::insert_into(chat_operations::table) .values(&operations) - .returning(StudioOperation::as_returning()) + .returning(ChatOperation::as_returning()) .get_results(self) .await .map_err(PgError::from)?; @@ -143,15 +143,15 @@ impl StudioOperationRepository for PgConnection { Ok(operations) } - async fn find_studio_operation_by_id( + async fn find_chat_operation_by_id( &mut self, operation_id: Uuid, - ) -> PgResult> { - use schema::studio_operations::dsl::*; + ) -> PgResult> { + use schema::chat_operations::dsl::*; - let operation = studio_operations + let operation = chat_operations .filter(id.eq(operation_id)) - .select(StudioOperation::as_select()) + .select(ChatOperation::as_select()) .first(self) .await .optional() @@ -160,17 +160,17 @@ impl StudioOperationRepository for PgConnection { Ok(operation) } - async fn update_studio_operation( + async fn update_chat_operation( &mut self, operation_id: Uuid, - changes: UpdateStudioOperation, - ) -> PgResult { - use schema::studio_operations::dsl::*; + changes: UpdateChatOperation, + ) -> PgResult { + use schema::chat_operations::dsl::*; - let operation = diesel::update(studio_operations) + let operation = diesel::update(chat_operations) .filter(id.eq(operation_id)) .set(&changes) - .returning(StudioOperation::as_returning()) + .returning(ChatOperation::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -178,10 +178,10 @@ impl StudioOperationRepository for PgConnection { Ok(operation) } - async fn delete_studio_operation(&mut self, operation_id: Uuid) -> PgResult<()> { - use schema::studio_operations::dsl::*; + async fn delete_chat_operation(&mut self, operation_id: Uuid) -> PgResult<()> { + use schema::chat_operations::dsl::*; - diesel::delete(studio_operations) + diesel::delete(chat_operations) .filter(id.eq(operation_id)) .execute(self) .await @@ -190,12 +190,12 @@ impl StudioOperationRepository for PgConnection { Ok(()) } - async fn list_tool_call_operations(&mut self, tc_id: Uuid) -> PgResult> { - use schema::studio_operations::{self, dsl}; + async fn list_tool_call_operations(&mut self, tc_id: Uuid) -> PgResult> { + use schema::chat_operations::{self, dsl}; - let operations = studio_operations::table + let operations = chat_operations::table .filter(dsl::tool_call_id.eq(tc_id)) - .select(StudioOperation::as_select()) + .select(ChatOperation::as_select()) .order(dsl::created_at.asc()) .load(self) .await @@ -208,12 +208,12 @@ impl StudioOperationRepository for PgConnection { &mut self, f_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::studio_operations::{self, dsl}; + ) -> PgResult> { + use schema::chat_operations::{self, dsl}; - let operations = studio_operations::table + let operations = chat_operations::table .filter(dsl::file_id.eq(f_id)) - .select(StudioOperation::as_select()) + .select(ChatOperation::as_select()) .order(dsl::created_at.desc()) .limit(pagination.limit) .offset(pagination.offset) @@ -228,12 +228,12 @@ impl StudioOperationRepository for PgConnection { &mut self, f_id: Uuid, pagination: CursorPagination, - ) -> PgResult> { - use schema::studio_operations::{self, dsl}; + ) -> PgResult> { + use schema::chat_operations::{self, dsl}; let total = if pagination.include_count { Some( - studio_operations::table + chat_operations::table .filter(dsl::file_id.eq(f_id)) .count() .get_result::(self) @@ -246,26 +246,26 @@ impl StudioOperationRepository for PgConnection { let limit = pagination.limit + 1; - let items: Vec = if let Some(cursor) = &pagination.after { + let items: Vec = if let Some(cursor) = &pagination.after { let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); - studio_operations::table + chat_operations::table .filter(dsl::file_id.eq(f_id)) .filter( dsl::created_at .lt(&cursor_time) .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), ) - .select(StudioOperation::as_select()) + .select(ChatOperation::as_select()) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(limit) .load(self) .await .map_err(PgError::from)? } else { - studio_operations::table + chat_operations::table .filter(dsl::file_id.eq(f_id)) - .select(StudioOperation::as_select()) + .select(ChatOperation::as_select()) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(limit) .load(self) @@ -277,17 +277,17 @@ impl StudioOperationRepository for PgConnection { items, total, pagination.limit, - |op: &StudioOperation| (op.created_at.into(), op.id), + |op: &ChatOperation| (op.created_at.into(), op.id), )) } - async fn list_pending_file_operations(&mut self, f_id: Uuid) -> PgResult> { - use schema::studio_operations::{self, dsl}; + async fn list_pending_file_operations(&mut self, f_id: Uuid) -> PgResult> { + use schema::chat_operations::{self, dsl}; - let operations = studio_operations::table + let operations = chat_operations::table .filter(dsl::file_id.eq(f_id)) .filter(dsl::applied.eq(false)) - .select(StudioOperation::as_select()) + .select(ChatOperation::as_select()) .order(dsl::created_at.asc()) .load(self) .await @@ -296,28 +296,28 @@ impl StudioOperationRepository for PgConnection { Ok(operations) } - async fn apply_studio_operation(&mut self, operation_id: Uuid) -> PgResult { - let changes = UpdateStudioOperation { + async fn apply_chat_operation(&mut self, operation_id: Uuid) -> PgResult { + let changes = UpdateChatOperation { applied: Some(true), applied_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), ..Default::default() }; - self.update_studio_operation(operation_id, changes).await + self.update_chat_operation(operation_id, changes).await } - async fn apply_studio_operations( + async fn apply_chat_operations( &mut self, operation_ids: Vec, - ) -> PgResult> { - use schema::studio_operations::dsl::*; + ) -> PgResult> { + use schema::chat_operations::dsl::*; let now = jiff_diesel::Timestamp::from(jiff::Timestamp::now()); - let operations = diesel::update(studio_operations) + let operations = diesel::update(chat_operations) .filter(id.eq_any(&operation_ids)) .set((applied.eq(true), applied_at.eq(Some(now)))) - .returning(StudioOperation::as_returning()) + .returning(ChatOperation::as_returning()) .get_results(self) .await .map_err(PgError::from)?; @@ -325,27 +325,27 @@ impl StudioOperationRepository for PgConnection { Ok(operations) } - async fn revert_studio_operation(&mut self, operation_id: Uuid) -> PgResult { - let changes = UpdateStudioOperation { + async fn revert_chat_operation(&mut self, operation_id: Uuid) -> PgResult { + let changes = UpdateChatOperation { reverted: Some(true), ..Default::default() }; - self.update_studio_operation(operation_id, changes).await + self.update_chat_operation(operation_id, changes).await } async fn count_file_operations(&mut self, f_id: Uuid) -> PgResult { use diesel::dsl::count_star; - use schema::studio_operations::{self, dsl}; + use schema::chat_operations::{self, dsl}; - let total = studio_operations::table + let total = chat_operations::table .filter(dsl::file_id.eq(f_id)) .select(count_star()) .get_result::(self) .await .map_err(PgError::from)?; - let applied_count = studio_operations::table + let applied_count = chat_operations::table .filter(dsl::file_id.eq(f_id)) .filter(dsl::applied.eq(true)) .select(count_star()) @@ -353,7 +353,7 @@ impl StudioOperationRepository for PgConnection { .await .map_err(PgError::from)?; - let reverted_count = studio_operations::table + let reverted_count = chat_operations::table .filter(dsl::file_id.eq(f_id)) .filter(dsl::reverted.eq(true)) .select(count_star()) diff --git a/crates/nvisy-postgres/src/query/chat_session.rs b/crates/nvisy-postgres/src/query/chat_session.rs new file mode 100644 index 0000000..5831af2 --- /dev/null +++ b/crates/nvisy-postgres/src/query/chat_session.rs @@ -0,0 +1,291 @@ +//! Chat session repository for managing LLM-assisted editing sessions. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use uuid::Uuid; + +use crate::model::{ChatSession, NewChatSession, UpdateChatSession}; +use crate::types::{ChatSessionStatus, CursorPage, CursorPagination, OffsetPagination}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for chat session database operations. +/// +/// Handles LLM-assisted editing session management including CRUD operations, +/// status tracking, and usage statistics updates. +pub trait ChatSessionRepository { + /// Creates a new chat session with the provided configuration. + fn create_chat_session( + &mut self, + session: NewChatSession, + ) -> impl Future> + Send; + + /// Finds a chat session by its unique identifier. + fn find_chat_session_by_id( + &mut self, + session_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates an existing chat session. + fn update_chat_session( + &mut self, + session_id: Uuid, + changes: UpdateChatSession, + ) -> impl Future> + Send; + + /// Deletes a chat session by archiving it. + fn delete_chat_session( + &mut self, + session_id: Uuid, + ) -> impl Future> + Send; + + /// Lists chat sessions for a workspace with offset pagination. + fn offset_list_chat_sessions( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists chat sessions for a workspace with cursor pagination. + fn cursor_list_chat_sessions( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + ) -> impl Future>> + Send; + + /// Lists chat sessions for an account with offset pagination. + fn offset_list_account_chat_sessions( + &mut self, + account_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists active chat sessions for a file. + fn list_file_chat_sessions( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates the status of a chat session. + fn update_chat_session_status( + &mut self, + session_id: Uuid, + new_status: ChatSessionStatus, + ) -> impl Future> + Send; + + /// Increments the message and token counts for a session. + fn increment_chat_session_usage( + &mut self, + session_id: Uuid, + messages: i32, + tokens: i32, + ) -> impl Future> + Send; +} + +impl ChatSessionRepository for PgConnection { + async fn create_chat_session(&mut self, session: NewChatSession) -> PgResult { + use schema::chat_sessions; + + let session = diesel::insert_into(chat_sessions::table) + .values(&session) + .returning(ChatSession::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(session) + } + + async fn find_chat_session_by_id(&mut self, session_id: Uuid) -> PgResult> { + use schema::chat_sessions::dsl::*; + + let session = chat_sessions + .filter(id.eq(session_id)) + .select(ChatSession::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(session) + } + + async fn update_chat_session( + &mut self, + session_id: Uuid, + changes: UpdateChatSession, + ) -> PgResult { + use schema::chat_sessions::dsl::*; + + let session = diesel::update(chat_sessions) + .filter(id.eq(session_id)) + .set(&changes) + .returning(ChatSession::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(session) + } + + async fn delete_chat_session(&mut self, session_id: Uuid) -> PgResult<()> { + use schema::chat_sessions::dsl::*; + + diesel::update(chat_sessions) + .filter(id.eq(session_id)) + .set(session_status.eq(ChatSessionStatus::Archived)) + .execute(self) + .await + .map_err(PgError::from)?; + + Ok(()) + } + + async fn offset_list_chat_sessions( + &mut self, + ws_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::chat_sessions::{self, dsl}; + + let sessions = chat_sessions::table + .filter(dsl::workspace_id.eq(ws_id)) + .select(ChatSession::as_select()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(sessions) + } + + async fn cursor_list_chat_sessions( + &mut self, + ws_id: Uuid, + pagination: CursorPagination, + ) -> PgResult> { + use schema::chat_sessions::{self, dsl}; + + let total = if pagination.include_count { + Some( + chat_sessions::table + .filter(dsl::workspace_id.eq(ws_id)) + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + chat_sessions::table + .filter(dsl::workspace_id.eq(ws_id)) + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(ChatSession::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + chat_sessions::table + .filter(dsl::workspace_id.eq(ws_id)) + .select(ChatSession::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |s: &ChatSession| (s.created_at.into(), s.id), + )) + } + + async fn offset_list_account_chat_sessions( + &mut self, + acc_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::chat_sessions::{self, dsl}; + + let sessions = chat_sessions::table + .filter(dsl::account_id.eq(acc_id)) + .select(ChatSession::as_select()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(sessions) + } + + async fn list_file_chat_sessions(&mut self, file_id: Uuid) -> PgResult> { + use schema::chat_sessions::{self, dsl}; + + let sessions = chat_sessions::table + .filter(dsl::primary_file_id.eq(file_id)) + .filter(dsl::session_status.ne(ChatSessionStatus::Archived)) + .select(ChatSession::as_select()) + .order(dsl::created_at.desc()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(sessions) + } + + async fn update_chat_session_status( + &mut self, + session_id: Uuid, + new_status: ChatSessionStatus, + ) -> PgResult { + let changes = UpdateChatSession { + session_status: Some(new_status), + ..Default::default() + }; + + self.update_chat_session(session_id, changes).await + } + + async fn increment_chat_session_usage( + &mut self, + session_id: Uuid, + messages: i32, + tokens: i32, + ) -> PgResult { + use schema::chat_sessions::dsl::*; + + let session = diesel::update(chat_sessions) + .filter(id.eq(session_id)) + .set(( + message_count.eq(message_count + messages), + token_count.eq(token_count + tokens), + )) + .returning(ChatSession::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(session) + } +} diff --git a/crates/nvisy-postgres/src/query/studio_tool_call.rs b/crates/nvisy-postgres/src/query/chat_tool_call.rs similarity index 54% rename from crates/nvisy-postgres/src/query/studio_tool_call.rs rename to crates/nvisy-postgres/src/query/chat_tool_call.rs index fa91afe..d1bf40b 100644 --- a/crates/nvisy-postgres/src/query/studio_tool_call.rs +++ b/crates/nvisy-postgres/src/query/chat_tool_call.rs @@ -1,4 +1,4 @@ -//! Studio tool call repository for managing tool invocations within sessions. +//! Chat tool call repository for managing tool invocations within sessions. use std::future::Future; @@ -6,36 +6,36 @@ use diesel::prelude::*; use diesel_async::RunQueryDsl; use uuid::Uuid; -use crate::model::{NewStudioToolCall, StudioToolCall, UpdateStudioToolCall}; -use crate::types::{CursorPage, CursorPagination, OffsetPagination, StudioToolStatus}; +use crate::model::{ChatToolCall, NewChatToolCall, UpdateChatToolCall}; +use crate::types::{ChatToolStatus, CursorPage, CursorPagination, OffsetPagination}; use crate::{PgConnection, PgError, PgResult, schema}; -/// Repository for studio tool call database operations. +/// Repository for chat tool call database operations. /// /// Handles tool invocation tracking including CRUD operations, status updates, /// and querying by session, file, or status. -pub trait StudioToolCallRepository { - /// Creates a new studio tool call. - fn create_studio_tool_call( +pub trait ChatToolCallRepository { + /// Creates a new chat tool call. + fn create_chat_tool_call( &mut self, - tool_call: NewStudioToolCall, - ) -> impl Future> + Send; + tool_call: NewChatToolCall, + ) -> impl Future> + Send; - /// Finds a studio tool call by its unique identifier. - fn find_studio_tool_call_by_id( + /// Finds a chat tool call by its unique identifier. + fn find_chat_tool_call_by_id( &mut self, tool_call_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Updates an existing studio tool call. - fn update_studio_tool_call( + /// Updates an existing chat tool call. + fn update_chat_tool_call( &mut self, tool_call_id: Uuid, - changes: UpdateStudioToolCall, - ) -> impl Future> + Send; + changes: UpdateChatToolCall, + ) -> impl Future> + Send; - /// Deletes a studio tool call. - fn delete_studio_tool_call( + /// Deletes a chat tool call. + fn delete_chat_tool_call( &mut self, tool_call_id: Uuid, ) -> impl Future> + Send; @@ -45,59 +45,59 @@ pub trait StudioToolCallRepository { &mut self, session_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists tool calls for a session with cursor pagination. fn cursor_list_session_tool_calls( &mut self, session_id: Uuid, pagination: CursorPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists tool calls for a file with offset pagination. fn offset_list_file_tool_calls( &mut self, file_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists pending or running tool calls for a session. fn list_active_session_tool_calls( &mut self, session_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Updates the status of a tool call. - fn update_studio_tool_call_status( + fn update_chat_tool_call_status( &mut self, tool_call_id: Uuid, - new_status: StudioToolStatus, - ) -> impl Future> + Send; + new_status: ChatToolStatus, + ) -> impl Future> + Send; /// Marks a tool call as completed with the given output. - fn complete_studio_tool_call( + fn complete_chat_tool_call( &mut self, tool_call_id: Uuid, output: serde_json::Value, - ) -> impl Future> + Send; + ) -> impl Future> + Send; /// Cancels a pending or running tool call. - fn cancel_studio_tool_call( + fn cancel_chat_tool_call( &mut self, tool_call_id: Uuid, - ) -> impl Future> + Send; + ) -> impl Future> + Send; } -impl StudioToolCallRepository for PgConnection { - async fn create_studio_tool_call( +impl ChatToolCallRepository for PgConnection { + async fn create_chat_tool_call( &mut self, - tool_call: NewStudioToolCall, - ) -> PgResult { - use schema::studio_tool_calls; + tool_call: NewChatToolCall, + ) -> PgResult { + use schema::chat_tool_calls; - let tool_call = diesel::insert_into(studio_tool_calls::table) + let tool_call = diesel::insert_into(chat_tool_calls::table) .values(&tool_call) - .returning(StudioToolCall::as_returning()) + .returning(ChatToolCall::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -105,15 +105,15 @@ impl StudioToolCallRepository for PgConnection { Ok(tool_call) } - async fn find_studio_tool_call_by_id( + async fn find_chat_tool_call_by_id( &mut self, tool_call_id: Uuid, - ) -> PgResult> { - use schema::studio_tool_calls::dsl::*; + ) -> PgResult> { + use schema::chat_tool_calls::dsl::*; - let tool_call = studio_tool_calls + let tool_call = chat_tool_calls .filter(id.eq(tool_call_id)) - .select(StudioToolCall::as_select()) + .select(ChatToolCall::as_select()) .first(self) .await .optional() @@ -122,17 +122,17 @@ impl StudioToolCallRepository for PgConnection { Ok(tool_call) } - async fn update_studio_tool_call( + async fn update_chat_tool_call( &mut self, tool_call_id: Uuid, - changes: UpdateStudioToolCall, - ) -> PgResult { - use schema::studio_tool_calls::dsl::*; + changes: UpdateChatToolCall, + ) -> PgResult { + use schema::chat_tool_calls::dsl::*; - let tool_call = diesel::update(studio_tool_calls) + let tool_call = diesel::update(chat_tool_calls) .filter(id.eq(tool_call_id)) .set(&changes) - .returning(StudioToolCall::as_returning()) + .returning(ChatToolCall::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -140,10 +140,10 @@ impl StudioToolCallRepository for PgConnection { Ok(tool_call) } - async fn delete_studio_tool_call(&mut self, tool_call_id: Uuid) -> PgResult<()> { - use schema::studio_tool_calls::dsl::*; + async fn delete_chat_tool_call(&mut self, tool_call_id: Uuid) -> PgResult<()> { + use schema::chat_tool_calls::dsl::*; - diesel::delete(studio_tool_calls) + diesel::delete(chat_tool_calls) .filter(id.eq(tool_call_id)) .execute(self) .await @@ -156,12 +156,12 @@ impl StudioToolCallRepository for PgConnection { &mut self, sess_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::studio_tool_calls::{self, dsl}; + ) -> PgResult> { + use schema::chat_tool_calls::{self, dsl}; - let tool_calls = studio_tool_calls::table + let tool_calls = chat_tool_calls::table .filter(dsl::session_id.eq(sess_id)) - .select(StudioToolCall::as_select()) + .select(ChatToolCall::as_select()) .order(dsl::started_at.desc()) .limit(pagination.limit) .offset(pagination.offset) @@ -176,12 +176,12 @@ impl StudioToolCallRepository for PgConnection { &mut self, sess_id: Uuid, pagination: CursorPagination, - ) -> PgResult> { - use schema::studio_tool_calls::{self, dsl}; + ) -> PgResult> { + use schema::chat_tool_calls::{self, dsl}; let total = if pagination.include_count { Some( - studio_tool_calls::table + chat_tool_calls::table .filter(dsl::session_id.eq(sess_id)) .count() .get_result::(self) @@ -194,26 +194,26 @@ impl StudioToolCallRepository for PgConnection { let limit = pagination.limit + 1; - let items: Vec = if let Some(cursor) = &pagination.after { + let items: Vec = if let Some(cursor) = &pagination.after { let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); - studio_tool_calls::table + chat_tool_calls::table .filter(dsl::session_id.eq(sess_id)) .filter( dsl::started_at .lt(&cursor_time) .or(dsl::started_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), ) - .select(StudioToolCall::as_select()) + .select(ChatToolCall::as_select()) .order((dsl::started_at.desc(), dsl::id.desc())) .limit(limit) .load(self) .await .map_err(PgError::from)? } else { - studio_tool_calls::table + chat_tool_calls::table .filter(dsl::session_id.eq(sess_id)) - .select(StudioToolCall::as_select()) + .select(ChatToolCall::as_select()) .order((dsl::started_at.desc(), dsl::id.desc())) .limit(limit) .load(self) @@ -225,7 +225,7 @@ impl StudioToolCallRepository for PgConnection { items, total, pagination.limit, - |tc: &StudioToolCall| (tc.started_at.into(), tc.id), + |tc: &ChatToolCall| (tc.started_at.into(), tc.id), )) } @@ -233,12 +233,12 @@ impl StudioToolCallRepository for PgConnection { &mut self, f_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::studio_tool_calls::{self, dsl}; + ) -> PgResult> { + use schema::chat_tool_calls::{self, dsl}; - let tool_calls = studio_tool_calls::table + let tool_calls = chat_tool_calls::table .filter(dsl::file_id.eq(f_id)) - .select(StudioToolCall::as_select()) + .select(ChatToolCall::as_select()) .order(dsl::started_at.desc()) .limit(pagination.limit) .offset(pagination.offset) @@ -252,17 +252,17 @@ impl StudioToolCallRepository for PgConnection { async fn list_active_session_tool_calls( &mut self, sess_id: Uuid, - ) -> PgResult> { - use schema::studio_tool_calls::{self, dsl}; + ) -> PgResult> { + use schema::chat_tool_calls::{self, dsl}; - let tool_calls = studio_tool_calls::table + let tool_calls = chat_tool_calls::table .filter(dsl::session_id.eq(sess_id)) .filter( dsl::tool_status - .eq(StudioToolStatus::Pending) - .or(dsl::tool_status.eq(StudioToolStatus::Running)), + .eq(ChatToolStatus::Pending) + .or(dsl::tool_status.eq(ChatToolStatus::Running)), ) - .select(StudioToolCall::as_select()) + .select(ChatToolCall::as_select()) .order(dsl::started_at.asc()) .load(self) .await @@ -271,40 +271,40 @@ impl StudioToolCallRepository for PgConnection { Ok(tool_calls) } - async fn update_studio_tool_call_status( + async fn update_chat_tool_call_status( &mut self, tool_call_id: Uuid, - new_status: StudioToolStatus, - ) -> PgResult { - let changes = UpdateStudioToolCall { + new_status: ChatToolStatus, + ) -> PgResult { + let changes = UpdateChatToolCall { tool_status: Some(new_status), ..Default::default() }; - self.update_studio_tool_call(tool_call_id, changes).await + self.update_chat_tool_call(tool_call_id, changes).await } - async fn complete_studio_tool_call( + async fn complete_chat_tool_call( &mut self, tool_call_id: Uuid, output: serde_json::Value, - ) -> PgResult { - let changes = UpdateStudioToolCall { + ) -> PgResult { + let changes = UpdateChatToolCall { tool_output: Some(output), - tool_status: Some(StudioToolStatus::Completed), + tool_status: Some(ChatToolStatus::Completed), completed_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), }; - self.update_studio_tool_call(tool_call_id, changes).await + self.update_chat_tool_call(tool_call_id, changes).await } - async fn cancel_studio_tool_call(&mut self, tool_call_id: Uuid) -> PgResult { - let changes = UpdateStudioToolCall { - tool_status: Some(StudioToolStatus::Cancelled), + async fn cancel_chat_tool_call(&mut self, tool_call_id: Uuid) -> PgResult { + let changes = UpdateChatToolCall { + tool_status: Some(ChatToolStatus::Cancelled), completed_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), ..Default::default() }; - self.update_studio_tool_call(tool_call_id, changes).await + self.update_chat_tool_call(tool_call_id, changes).await } } diff --git a/crates/nvisy-postgres/src/query/document_file.rs b/crates/nvisy-postgres/src/query/document_file.rs index 985afdd..0b92498 100644 --- a/crates/nvisy-postgres/src/query/document_file.rs +++ b/crates/nvisy-postgres/src/query/document_file.rs @@ -117,6 +117,29 @@ pub trait DocumentFileRepository { &mut self, file_ids: &[Uuid], ) -> impl Future>> + Send; + + /// Lists all versions of a file (the file itself and all files that have it as parent). + /// + /// Returns files ordered by version_number descending (newest first). + fn list_file_versions( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Finds the latest version of a file by traversing the version chain. + /// + /// Starting from a file, follows the chain of files where parent_id points + /// to the previous version and returns the one with the highest version_number. + fn find_latest_version( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Gets the next version number for creating a new version of a file. + fn get_next_version_number( + &mut self, + file_id: Uuid, + ) -> impl Future> + Send; } impl DocumentFileRepository for PgConnection { @@ -462,4 +485,55 @@ impl DocumentFileRepository for PgConnection { Ok(files) } + + async fn list_file_versions(&mut self, file_id: Uuid) -> PgResult> { + use schema::document_files::{self, dsl}; + + // Get the original file and all files that have it (or its descendants) as parent + // This query gets the file itself plus all files where parent_id = file_id + let files = document_files::table + .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) + .filter(dsl::deleted_at.is_null()) + .order(dsl::version_number.desc()) + .select(DocumentFile::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(files) + } + + async fn find_latest_version(&mut self, file_id: Uuid) -> PgResult> { + use schema::document_files::{self, dsl}; + + // Find the file with highest version_number that has file_id as parent, + // or the file itself if no newer versions exist + let latest = document_files::table + .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) + .filter(dsl::deleted_at.is_null()) + .order(dsl::version_number.desc()) + .select(DocumentFile::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(latest) + } + + async fn get_next_version_number(&mut self, file_id: Uuid) -> PgResult { + use diesel::dsl::max; + use schema::document_files::{self, dsl}; + + // Get the max version_number from the file and its versions + let max_version: Option = document_files::table + .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) + .filter(dsl::deleted_at.is_null()) + .select(max(dsl::version_number)) + .first(self) + .await + .map_err(PgError::from)?; + + Ok(max_version.unwrap_or(0) + 1) + } } diff --git a/crates/nvisy-postgres/src/query/mod.rs b/crates/nvisy-postgres/src/query/mod.rs index b3aeae2..a0830f4 100644 --- a/crates/nvisy-postgres/src/query/mod.rs +++ b/crates/nvisy-postgres/src/query/mod.rs @@ -32,22 +32,22 @@ mod workspace_invite; mod workspace_member; mod workspace_webhook; -mod studio_operation; -mod studio_session; -mod studio_tool_call; +mod chat_operation; +mod chat_session; +mod chat_tool_call; pub use account::AccountRepository; pub use account_action_token::AccountActionTokenRepository; pub use account_api_token::AccountApiTokenRepository; pub use account_notification::AccountNotificationRepository; +pub use chat_operation::{ChatOperationRepository, FileOperationCounts}; +pub use chat_session::ChatSessionRepository; +pub use chat_tool_call::ChatToolCallRepository; pub use document::DocumentRepository; pub use document_annotation::DocumentAnnotationRepository; pub use document_chunk::DocumentChunkRepository; pub use document_comment::DocumentCommentRepository; pub use document_file::DocumentFileRepository; -pub use studio_operation::{FileOperationCounts, StudioOperationRepository}; -pub use studio_session::StudioSessionRepository; -pub use studio_tool_call::StudioToolCallRepository; pub use workspace::WorkspaceRepository; pub use workspace_activity::WorkspaceActivityRepository; pub use workspace_integration::WorkspaceIntegrationRepository; diff --git a/crates/nvisy-postgres/src/query/studio_session.rs b/crates/nvisy-postgres/src/query/studio_session.rs deleted file mode 100644 index 6fe0501..0000000 --- a/crates/nvisy-postgres/src/query/studio_session.rs +++ /dev/null @@ -1,297 +0,0 @@ -//! Studio session repository for managing LLM-assisted editing sessions. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use uuid::Uuid; - -use crate::model::{NewStudioSession, StudioSession, UpdateStudioSession}; -use crate::types::{CursorPage, CursorPagination, OffsetPagination, StudioSessionStatus}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for studio session database operations. -/// -/// Handles LLM-assisted editing session management including CRUD operations, -/// status tracking, and usage statistics updates. -pub trait StudioSessionRepository { - /// Creates a new studio session with the provided configuration. - fn create_studio_session( - &mut self, - session: NewStudioSession, - ) -> impl Future> + Send; - - /// Finds a studio session by its unique identifier. - fn find_studio_session_by_id( - &mut self, - session_id: Uuid, - ) -> impl Future>> + Send; - - /// Updates an existing studio session. - fn update_studio_session( - &mut self, - session_id: Uuid, - changes: UpdateStudioSession, - ) -> impl Future> + Send; - - /// Deletes a studio session by archiving it. - fn delete_studio_session( - &mut self, - session_id: Uuid, - ) -> impl Future> + Send; - - /// Lists studio sessions for a workspace with offset pagination. - fn offset_list_studio_sessions( - &mut self, - workspace_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists studio sessions for a workspace with cursor pagination. - fn cursor_list_studio_sessions( - &mut self, - workspace_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists studio sessions for an account with offset pagination. - fn offset_list_account_studio_sessions( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists active studio sessions for a file. - fn list_file_studio_sessions( - &mut self, - file_id: Uuid, - ) -> impl Future>> + Send; - - /// Updates the status of a studio session. - fn update_studio_session_status( - &mut self, - session_id: Uuid, - new_status: StudioSessionStatus, - ) -> impl Future> + Send; - - /// Increments the message and token counts for a session. - fn increment_studio_session_usage( - &mut self, - session_id: Uuid, - messages: i32, - tokens: i32, - ) -> impl Future> + Send; -} - -impl StudioSessionRepository for PgConnection { - async fn create_studio_session( - &mut self, - session: NewStudioSession, - ) -> PgResult { - use schema::studio_sessions; - - let session = diesel::insert_into(studio_sessions::table) - .values(&session) - .returning(StudioSession::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(session) - } - - async fn find_studio_session_by_id( - &mut self, - session_id: Uuid, - ) -> PgResult> { - use schema::studio_sessions::dsl::*; - - let session = studio_sessions - .filter(id.eq(session_id)) - .select(StudioSession::as_select()) - .first(self) - .await - .optional() - .map_err(PgError::from)?; - - Ok(session) - } - - async fn update_studio_session( - &mut self, - session_id: Uuid, - changes: UpdateStudioSession, - ) -> PgResult { - use schema::studio_sessions::dsl::*; - - let session = diesel::update(studio_sessions) - .filter(id.eq(session_id)) - .set(&changes) - .returning(StudioSession::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(session) - } - - async fn delete_studio_session(&mut self, session_id: Uuid) -> PgResult<()> { - use schema::studio_sessions::dsl::*; - - diesel::update(studio_sessions) - .filter(id.eq(session_id)) - .set(session_status.eq(StudioSessionStatus::Archived)) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(()) - } - - async fn offset_list_studio_sessions( - &mut self, - ws_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::studio_sessions::{self, dsl}; - - let sessions = studio_sessions::table - .filter(dsl::workspace_id.eq(ws_id)) - .select(StudioSession::as_select()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(sessions) - } - - async fn cursor_list_studio_sessions( - &mut self, - ws_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use schema::studio_sessions::{self, dsl}; - - let total = if pagination.include_count { - Some( - studio_sessions::table - .filter(dsl::workspace_id.eq(ws_id)) - .count() - .get_result::(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let limit = pagination.limit + 1; - - let items: Vec = if let Some(cursor) = &pagination.after { - let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); - - studio_sessions::table - .filter(dsl::workspace_id.eq(ws_id)) - .filter( - dsl::created_at - .lt(&cursor_time) - .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), - ) - .select(StudioSession::as_select()) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(limit) - .load(self) - .await - .map_err(PgError::from)? - } else { - studio_sessions::table - .filter(dsl::workspace_id.eq(ws_id)) - .select(StudioSession::as_select()) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(limit) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new( - items, - total, - pagination.limit, - |s: &StudioSession| (s.created_at.into(), s.id), - )) - } - - async fn offset_list_account_studio_sessions( - &mut self, - acc_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::studio_sessions::{self, dsl}; - - let sessions = studio_sessions::table - .filter(dsl::account_id.eq(acc_id)) - .select(StudioSession::as_select()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(sessions) - } - - async fn list_file_studio_sessions(&mut self, file_id: Uuid) -> PgResult> { - use schema::studio_sessions::{self, dsl}; - - let sessions = studio_sessions::table - .filter(dsl::primary_file_id.eq(file_id)) - .filter(dsl::session_status.ne(StudioSessionStatus::Archived)) - .select(StudioSession::as_select()) - .order(dsl::created_at.desc()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(sessions) - } - - async fn update_studio_session_status( - &mut self, - session_id: Uuid, - new_status: StudioSessionStatus, - ) -> PgResult { - let changes = UpdateStudioSession { - session_status: Some(new_status), - ..Default::default() - }; - - self.update_studio_session(session_id, changes).await - } - - async fn increment_studio_session_usage( - &mut self, - session_id: Uuid, - messages: i32, - tokens: i32, - ) -> PgResult { - use schema::studio_sessions::dsl::*; - - let session = diesel::update(studio_sessions) - .filter(id.eq(session_id)) - .set(( - message_count.eq(message_count + messages), - token_count.eq(token_count + tokens), - )) - .returning(StudioSession::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(session) - } -} diff --git a/crates/nvisy-postgres/src/schema.rs b/crates/nvisy-postgres/src/schema.rs index 2ee76a9..ca630c1 100644 --- a/crates/nvisy-postgres/src/schema.rs +++ b/crates/nvisy-postgres/src/schema.rs @@ -21,6 +21,10 @@ pub mod sql_types { #[diesel(postgres_type(name = "content_segmentation"))] pub struct ContentSegmentation; + #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] + #[diesel(postgres_type(name = "file_source"))] + pub struct FileSource; + #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] #[diesel(postgres_type(name = "integration_status"))] pub struct IntegrationStatus; @@ -50,12 +54,12 @@ pub mod sql_types { pub struct RunType; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "studio_session_status"))] - pub struct StudioSessionStatus; + #[diesel(postgres_type(name = "chat_session_status"))] + pub struct ChatSessionStatus; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "studio_tool_status"))] - pub struct StudioToolStatus; + #[diesel(postgres_type(name = "chat_tool_status"))] + pub struct ChatToolStatus; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] #[diesel(postgres_type(name = "webhook_event"))] @@ -218,6 +222,7 @@ diesel::table! { use super::sql_types::RequireMode; use super::sql_types::ProcessingStatus; use super::sql_types::ContentSegmentation; + use super::sql_types::FileSource; document_files (id) { id -> Uuid, @@ -225,10 +230,12 @@ diesel::table! { document_id -> Nullable, account_id -> Uuid, parent_id -> Nullable, + version_number -> Int4, display_name -> Text, original_filename -> Text, file_extension -> Text, tags -> Array>, + source -> FileSource, require_mode -> RequireMode, processing_priority -> Int4, processing_status -> ProcessingStatus, @@ -268,7 +275,7 @@ diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; - studio_operations (id) { + chat_operations (id) { id -> Uuid, tool_call_id -> Uuid, file_id -> Uuid, @@ -285,15 +292,15 @@ diesel::table! { diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; - use super::sql_types::StudioSessionStatus; + use super::sql_types::ChatSessionStatus; - studio_sessions (id) { + chat_sessions (id) { id -> Uuid, workspace_id -> Uuid, account_id -> Uuid, primary_file_id -> Uuid, display_name -> Text, - session_status -> StudioSessionStatus, + session_status -> ChatSessionStatus, model_config -> Jsonb, message_count -> Int4, token_count -> Int4, @@ -305,9 +312,9 @@ diesel::table! { diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; - use super::sql_types::StudioToolStatus; + use super::sql_types::ChatToolStatus; - studio_tool_calls (id) { + chat_tool_calls (id) { id -> Uuid, session_id -> Uuid, file_id -> Uuid, @@ -315,7 +322,7 @@ diesel::table! { tool_name -> Text, tool_input -> Jsonb, tool_output -> Jsonb, - tool_status -> StudioToolStatus, + tool_status -> ChatToolStatus, started_at -> Timestamptz, completed_at -> Nullable, } @@ -483,15 +490,15 @@ diesel::joinable!(document_files -> documents (document_id)); diesel::joinable!(document_files -> workspaces (workspace_id)); diesel::joinable!(documents -> accounts (account_id)); diesel::joinable!(documents -> workspaces (workspace_id)); -diesel::joinable!(studio_operations -> document_chunks (chunk_id)); -diesel::joinable!(studio_operations -> document_files (file_id)); -diesel::joinable!(studio_operations -> studio_tool_calls (tool_call_id)); -diesel::joinable!(studio_sessions -> accounts (account_id)); -diesel::joinable!(studio_sessions -> document_files (primary_file_id)); -diesel::joinable!(studio_sessions -> workspaces (workspace_id)); -diesel::joinable!(studio_tool_calls -> document_chunks (chunk_id)); -diesel::joinable!(studio_tool_calls -> document_files (file_id)); -diesel::joinable!(studio_tool_calls -> studio_sessions (session_id)); +diesel::joinable!(chat_operations -> document_chunks (chunk_id)); +diesel::joinable!(chat_operations -> document_files (file_id)); +diesel::joinable!(chat_operations -> chat_tool_calls (tool_call_id)); +diesel::joinable!(chat_sessions -> accounts (account_id)); +diesel::joinable!(chat_sessions -> document_files (primary_file_id)); +diesel::joinable!(chat_sessions -> workspaces (workspace_id)); +diesel::joinable!(chat_tool_calls -> document_chunks (chunk_id)); +diesel::joinable!(chat_tool_calls -> document_files (file_id)); +diesel::joinable!(chat_tool_calls -> chat_sessions (session_id)); diesel::joinable!(workspace_activities -> accounts (account_id)); diesel::joinable!(workspace_activities -> workspaces (workspace_id)); diesel::joinable!(workspace_integration_runs -> accounts (account_id)); @@ -511,14 +518,14 @@ diesel::allow_tables_to_appear_in_same_query!( account_api_tokens, account_notifications, accounts, + chat_operations, + chat_sessions, + chat_tool_calls, document_annotations, document_chunks, document_comments, document_files, documents, - studio_operations, - studio_sessions, - studio_tool_calls, workspace_activities, workspace_integration_runs, workspace_integrations, diff --git a/crates/nvisy-postgres/src/types/constants.rs b/crates/nvisy-postgres/src/types/constants.rs index 4ad6ad4..843c1dc 100644 --- a/crates/nvisy-postgres/src/types/constants.rs +++ b/crates/nvisy-postgres/src/types/constants.rs @@ -1,115 +1,39 @@ //! Constants used throughout the application. -/// Database-related constants. -pub mod database { - /// Default pagination limit. - pub const DEFAULT_PAGE_SIZE: i64 = 50; - - /// Maximum pagination limit. - pub const MAX_PAGE_SIZE: i64 = 1000; -} - -/// Security-related constants. -pub mod security { - /// Default bcrypt cost for password hashing. - pub const DEFAULT_BCRYPT_COST: u32 = 12; - - /// Maximum number of active sessions per user. - pub const MAX_SESSIONS_PER_USER: i32 = 10; -} - -/// File and storage related constants. -pub mod storage { - /// Maximum file size in MB. - pub const MAX_FILE_SIZE_MB: i32 = 100; - - /// Maximum total storage per workspace in MB. - pub const MAX_PROJECT_STORAGE_MB: i32 = 1000; -} - -/// Notification and communication constants. -pub mod notification { - /// Default notification retention days. - pub const DEFAULT_RETENTION_DAYS: i32 = 90; - - /// Number of days within which a notification is considered "recent". - pub const RECENT_DAYS: i64 = 7; -} - -/// Constants related to account security and behavior. -pub mod account { - /// Maximum number of consecutive failed login attempts before account lockout. - pub const MAX_FAILED_LOGIN_ATTEMPTS: i32 = 5; - - /// Number of days after which a password change reminder should be shown. - pub const PASSWORD_CHANGE_REMINDER_DAYS: i64 = 90; - - /// Number of days within which an account is considered "recently active". - pub const RECENT_ACTIVITY_DAYS: i64 = 30; - - /// Number of hours within which an account is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 24; -} - -/// Constants related to API tokens and sessions. -pub mod token { - /// Number of minutes within which a token is considered "recently used". - pub const RECENT_USE_MINUTES: i64 = 30; - - /// Number of minutes before expiry to show expiration warnings. - pub const EXPIRY_WARNING_MINUTES: i64 = 15; - - /// Number of hours after which a token is considered "long-lived". - pub const LONG_LIVED_THRESHOLD_HOURS: i64 = 24; - - /// Number of hours within which a token is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 1; -} - -/// Constants related to comments and discussions. -pub mod comment { - /// Number of seconds of grace period for detecting comment edits. - pub const EDIT_GRACE_PERIOD_SECONDS: i64 = 5; - - /// Number of hours within which a comment is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 24; - - /// Number of hours within which a comment is considered "recently updated". - pub const RECENTLY_UPDATED_HOURS: i64 = 1; -} - -/// Constants related to workspaces and workspace management. -pub mod workspace { - /// Number of days within which workspace access is considered "recent". - pub const RECENT_ACCESS_DAYS: i64 = 7; - - /// Number of hours within which a workspace is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 24; -} - -/// Constants related to documents and document processing. -pub mod document { - /// Number of hours within which a document is considered "recently created". - pub const RECENTLY_CREATED_HOURS: i64 = 24; - - /// Number of hours within which a document is considered "recently updated". - pub const RECENTLY_UPDATED_HOURS: i64 = 1; -} - -/// Constants related to file processing and storage. -pub mod file { - /// Number of hours within which a file is considered "recently uploaded". - pub const RECENTLY_UPLOADED_HOURS: i64 = 1; - - /// Number of days within which processing status is considered "stale". - pub const STALE_PROCESSING_DAYS: i64 = 1; -} - -/// Constants related to invitations and membership. -pub mod invite { - /// Number of days an invitation remains valid by default. - pub const DEFAULT_EXPIRY_DAYS: i64 = 7; - - /// Number of hours within which an invite is considered "recently sent". - pub const RECENTLY_SENT_HOURS: i64 = 24; -} +/// Number of minutes before expiry to show expiration warnings. +/// +/// Used in: `account_api_tokens` +pub const EXPIRY_WARNING_MINUTES: i64 = 15; + +/// Number of hours after which a token is considered "long-lived". +/// +/// Used in: `account_api_tokens` +pub const LONG_LIVED_THRESHOLD_HOURS: i64 = 24; + +/// Number of seconds of grace period for detecting comment edits. +/// +/// Used in: `document_comments` +pub const EDIT_GRACE_PERIOD_SECONDS: i64 = 5; + +/// Number of hours within which a file is considered "recently uploaded". +/// +/// Used in: `document_files` +pub const RECENTLY_UPLOADED_HOURS: i64 = 1; + +/// Number of hours within which an invite is considered "recently sent". +/// +/// Used in: `workspace_invites` +pub const RECENTLY_SENT_HOURS: i64 = 24; + +/// Default notification retention days. +/// +/// Used in: `account_notifications` +pub const DEFAULT_RETENTION_DAYS: i32 = 90; + +/// Number of dimensions for vector embeddings. +/// +/// This value must match the `VECTOR(n)` dimension in the database schema. +/// Currently configured for OpenAI text-embedding-3-small (1536 dimensions). +/// +/// Used in: `document_chunks` +pub const EMBEDDING_DIMENSIONS: usize = 1536; diff --git a/crates/nvisy-postgres/src/types/constraint/chat_operations.rs b/crates/nvisy-postgres/src/types/constraint/chat_operations.rs new file mode 100644 index 0000000..99db5ec --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/chat_operations.rs @@ -0,0 +1,61 @@ +//! Chat operations table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Chat operations table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum ChatOperationConstraints { + // Operation validation constraints + #[strum(serialize = "chat_operations_operation_type_length")] + OperationTypeLength, + #[strum(serialize = "chat_operations_operation_diff_size")] + OperationDiffSize, + + // Operation business logic constraints + #[strum(serialize = "chat_operations_revert_requires_applied")] + RevertRequiresApplied, + + // Operation chronological constraints + #[strum(serialize = "chat_operations_applied_after_created")] + AppliedAfterCreated, +} + +impl ChatOperationConstraints { + /// Creates a new [`ChatOperationConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + ChatOperationConstraints::OperationTypeLength + | ChatOperationConstraints::OperationDiffSize => ConstraintCategory::Validation, + + ChatOperationConstraints::RevertRequiresApplied => ConstraintCategory::BusinessLogic, + + ChatOperationConstraints::AppliedAfterCreated => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: ChatOperationConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for ChatOperationConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/chat_sessions.rs b/crates/nvisy-postgres/src/types/constraint/chat_sessions.rs new file mode 100644 index 0000000..05dc8b9 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/chat_sessions.rs @@ -0,0 +1,61 @@ +//! Chat sessions table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Chat sessions table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum ChatSessionConstraints { + // Session validation constraints + #[strum(serialize = "chat_sessions_display_name_length")] + DisplayNameLength, + #[strum(serialize = "chat_sessions_model_config_size")] + ModelConfigSize, + #[strum(serialize = "chat_sessions_message_count_min")] + MessageCountMin, + #[strum(serialize = "chat_sessions_token_count_min")] + TokenCountMin, + + // Session chronological constraints + #[strum(serialize = "chat_sessions_updated_after_created")] + UpdatedAfterCreated, +} + +impl ChatSessionConstraints { + /// Creates a new [`ChatSessionConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + ChatSessionConstraints::DisplayNameLength + | ChatSessionConstraints::ModelConfigSize + | ChatSessionConstraints::MessageCountMin + | ChatSessionConstraints::TokenCountMin => ConstraintCategory::Validation, + + ChatSessionConstraints::UpdatedAfterCreated => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: ChatSessionConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for ChatSessionConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/chat_tool_calls.rs b/crates/nvisy-postgres/src/types/constraint/chat_tool_calls.rs new file mode 100644 index 0000000..c836dea --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/chat_tool_calls.rs @@ -0,0 +1,58 @@ +//! Chat tool calls table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Chat tool calls table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum ChatToolCallConstraints { + // Tool call validation constraints + #[strum(serialize = "chat_tool_calls_tool_name_length")] + ToolNameLength, + #[strum(serialize = "chat_tool_calls_tool_input_size")] + ToolInputSize, + #[strum(serialize = "chat_tool_calls_tool_output_size")] + ToolOutputSize, + + // Tool call chronological constraints + #[strum(serialize = "chat_tool_calls_completed_after_started")] + CompletedAfterStarted, +} + +impl ChatToolCallConstraints { + /// Creates a new [`ChatToolCallConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + ChatToolCallConstraints::ToolNameLength + | ChatToolCallConstraints::ToolInputSize + | ChatToolCallConstraints::ToolOutputSize => ConstraintCategory::Validation, + + ChatToolCallConstraints::CompletedAfterStarted => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: ChatToolCallConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for ChatToolCallConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/mod.rs b/crates/nvisy-postgres/src/types/constraint/mod.rs index 06e4824..135a540 100644 --- a/crates/nvisy-postgres/src/types/constraint/mod.rs +++ b/crates/nvisy-postgres/src/types/constraint/mod.rs @@ -26,10 +26,10 @@ mod document_files; mod document_versions; mod documents; -// Studio-related constraint modules -mod studio_operations; -mod studio_sessions; -mod studio_tool_calls; +// Chat-related constraint modules +mod chat_operations; +mod chat_sessions; +mod chat_tool_calls; use std::fmt; @@ -39,15 +39,15 @@ pub use self::account_action_tokens::AccountActionTokenConstraints; pub use self::account_api_tokens::AccountApiTokenConstraints; pub use self::account_notifications::AccountNotificationConstraints; pub use self::accounts::AccountConstraints; +pub use self::chat_operations::ChatOperationConstraints; +pub use self::chat_sessions::ChatSessionConstraints; +pub use self::chat_tool_calls::ChatToolCallConstraints; pub use self::document_annotations::DocumentAnnotationConstraints; pub use self::document_chunks::DocumentChunkConstraints; pub use self::document_comments::DocumentCommentConstraints; pub use self::document_files::DocumentFileConstraints; pub use self::document_versions::DocumentVersionConstraints; pub use self::documents::DocumentConstraints; -pub use self::studio_operations::StudioOperationConstraints; -pub use self::studio_sessions::StudioSessionConstraints; -pub use self::studio_tool_calls::StudioToolCallConstraints; pub use self::workspace_activities::WorkspaceActivitiesConstraints; pub use self::workspace_integration_runs::WorkspaceIntegrationRunConstraints; pub use self::workspace_integrations::WorkspaceIntegrationConstraints; @@ -87,10 +87,10 @@ pub enum ConstraintViolation { DocumentFile(DocumentFileConstraints), DocumentVersion(DocumentVersionConstraints), - // Studio-related constraints - StudioSession(StudioSessionConstraints), - StudioToolCall(StudioToolCallConstraints), - StudioOperation(StudioOperationConstraints), + // Chat-related constraints + ChatSession(ChatSessionConstraints), + ChatToolCall(ChatToolCallConstraints), + ChatOperation(ChatOperationConstraints), } /// Categories of database constraint violations. @@ -167,10 +167,10 @@ impl ConstraintViolation { DocumentFileConstraints::new => DocumentFile, DocumentVersionConstraints::new => DocumentVersion, }, - "studio" => try_parse! { - StudioSessionConstraints::new => StudioSession, - StudioToolCallConstraints::new => StudioToolCall, - StudioOperationConstraints::new => StudioOperation, + "chat" => try_parse! { + ChatSessionConstraints::new => ChatSession, + ChatToolCallConstraints::new => ChatToolCall, + ChatOperationConstraints::new => ChatOperation, }, _ => None, } @@ -204,10 +204,10 @@ impl ConstraintViolation { ConstraintViolation::DocumentFile(_) => "document_files", ConstraintViolation::DocumentVersion(_) => "document_versions", - // Studio-related tables - ConstraintViolation::StudioSession(_) => "studio_sessions", - ConstraintViolation::StudioToolCall(_) => "studio_tool_calls", - ConstraintViolation::StudioOperation(_) => "studio_operations", + // Chat-related tables + ConstraintViolation::ChatSession(_) => "chat_sessions", + ConstraintViolation::ChatToolCall(_) => "chat_tool_calls", + ConstraintViolation::ChatOperation(_) => "chat_operations", } } @@ -237,9 +237,9 @@ impl ConstraintViolation { | ConstraintViolation::DocumentFile(_) | ConstraintViolation::DocumentVersion(_) => "documents", - ConstraintViolation::StudioSession(_) - | ConstraintViolation::StudioToolCall(_) - | ConstraintViolation::StudioOperation(_) => "studio", + ConstraintViolation::ChatSession(_) + | ConstraintViolation::ChatToolCall(_) + | ConstraintViolation::ChatOperation(_) => "chat", } } @@ -268,9 +268,9 @@ impl ConstraintViolation { ConstraintViolation::DocumentFile(c) => c.categorize(), ConstraintViolation::DocumentVersion(c) => c.categorize(), - ConstraintViolation::StudioSession(c) => c.categorize(), - ConstraintViolation::StudioToolCall(c) => c.categorize(), - ConstraintViolation::StudioOperation(c) => c.categorize(), + ConstraintViolation::ChatSession(c) => c.categorize(), + ConstraintViolation::ChatToolCall(c) => c.categorize(), + ConstraintViolation::ChatOperation(c) => c.categorize(), } } @@ -304,9 +304,9 @@ impl fmt::Display for ConstraintViolation { ConstraintViolation::DocumentFile(c) => write!(f, "{}", c), ConstraintViolation::DocumentVersion(c) => write!(f, "{}", c), - ConstraintViolation::StudioSession(c) => write!(f, "{}", c), - ConstraintViolation::StudioToolCall(c) => write!(f, "{}", c), - ConstraintViolation::StudioOperation(c) => write!(f, "{}", c), + ConstraintViolation::ChatSession(c) => write!(f, "{}", c), + ConstraintViolation::ChatToolCall(c) => write!(f, "{}", c), + ConstraintViolation::ChatOperation(c) => write!(f, "{}", c), } } } diff --git a/crates/nvisy-postgres/src/types/constraint/studio_operations.rs b/crates/nvisy-postgres/src/types/constraint/studio_operations.rs deleted file mode 100644 index 041fd1a..0000000 --- a/crates/nvisy-postgres/src/types/constraint/studio_operations.rs +++ /dev/null @@ -1,61 +0,0 @@ -//! Studio operations table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Studio operations table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum StudioOperationConstraints { - // Operation validation constraints - #[strum(serialize = "studio_operations_operation_type_length")] - OperationTypeLength, - #[strum(serialize = "studio_operations_operation_diff_size")] - OperationDiffSize, - - // Operation business logic constraints - #[strum(serialize = "studio_operations_revert_requires_applied")] - RevertRequiresApplied, - - // Operation chronological constraints - #[strum(serialize = "studio_operations_applied_after_created")] - AppliedAfterCreated, -} - -impl StudioOperationConstraints { - /// Creates a new [`StudioOperationConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - StudioOperationConstraints::OperationTypeLength - | StudioOperationConstraints::OperationDiffSize => ConstraintCategory::Validation, - - StudioOperationConstraints::RevertRequiresApplied => ConstraintCategory::BusinessLogic, - - StudioOperationConstraints::AppliedAfterCreated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: StudioOperationConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for StudioOperationConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/studio_sessions.rs b/crates/nvisy-postgres/src/types/constraint/studio_sessions.rs deleted file mode 100644 index a3f5802..0000000 --- a/crates/nvisy-postgres/src/types/constraint/studio_sessions.rs +++ /dev/null @@ -1,61 +0,0 @@ -//! Studio sessions table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Studio sessions table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum StudioSessionConstraints { - // Session validation constraints - #[strum(serialize = "studio_sessions_display_name_length")] - DisplayNameLength, - #[strum(serialize = "studio_sessions_model_config_size")] - ModelConfigSize, - #[strum(serialize = "studio_sessions_message_count_min")] - MessageCountMin, - #[strum(serialize = "studio_sessions_token_count_min")] - TokenCountMin, - - // Session chronological constraints - #[strum(serialize = "studio_sessions_updated_after_created")] - UpdatedAfterCreated, -} - -impl StudioSessionConstraints { - /// Creates a new [`StudioSessionConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - StudioSessionConstraints::DisplayNameLength - | StudioSessionConstraints::ModelConfigSize - | StudioSessionConstraints::MessageCountMin - | StudioSessionConstraints::TokenCountMin => ConstraintCategory::Validation, - - StudioSessionConstraints::UpdatedAfterCreated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: StudioSessionConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for StudioSessionConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/studio_tool_calls.rs b/crates/nvisy-postgres/src/types/constraint/studio_tool_calls.rs deleted file mode 100644 index 7a4dfe4..0000000 --- a/crates/nvisy-postgres/src/types/constraint/studio_tool_calls.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! Studio tool calls table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Studio tool calls table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum StudioToolCallConstraints { - // Tool call validation constraints - #[strum(serialize = "studio_tool_calls_tool_name_length")] - ToolNameLength, - #[strum(serialize = "studio_tool_calls_tool_input_size")] - ToolInputSize, - #[strum(serialize = "studio_tool_calls_tool_output_size")] - ToolOutputSize, - - // Tool call chronological constraints - #[strum(serialize = "studio_tool_calls_completed_after_started")] - CompletedAfterStarted, -} - -impl StudioToolCallConstraints { - /// Creates a new [`StudioToolCallConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - StudioToolCallConstraints::ToolNameLength - | StudioToolCallConstraints::ToolInputSize - | StudioToolCallConstraints::ToolOutputSize => ConstraintCategory::Validation, - - StudioToolCallConstraints::CompletedAfterStarted => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: StudioToolCallConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for StudioToolCallConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/enums/studio_session_status.rs b/crates/nvisy-postgres/src/types/enums/chat_session_status.rs similarity index 66% rename from crates/nvisy-postgres/src/types/enums/studio_session_status.rs rename to crates/nvisy-postgres/src/types/enums/chat_session_status.rs index 1522a0d..31366c5 100644 --- a/crates/nvisy-postgres/src/types/enums/studio_session_status.rs +++ b/crates/nvisy-postgres/src/types/enums/chat_session_status.rs @@ -1,4 +1,4 @@ -//! Studio session status enumeration for LLM-assisted editing sessions. +//! Chat session status enumeration for LLM-assisted editing sessions. use diesel_derive_enum::DbEnum; #[cfg(feature = "schema")] @@ -6,16 +6,16 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use strum::{Display, EnumIter, EnumString}; -/// Defines the lifecycle status of a studio editing session. +/// Defines the lifecycle status of a chat editing session. /// -/// This enumeration corresponds to the `STUDIO_SESSION_STATUS` PostgreSQL enum and is used +/// This enumeration corresponds to the `CHAT_SESSION_STATUS` PostgreSQL enum and is used /// to track the state of LLM-assisted document editing sessions as they progress through /// their lifecycle from active use to archival. #[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] #[cfg_attr(feature = "schema", derive(JsonSchema))] #[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::StudioSessionStatus"] -pub enum StudioSessionStatus { +#[ExistingTypePath = "crate::schema::sql_types::ChatSessionStatus"] +pub enum ChatSessionStatus { /// Session is currently active and in use #[db_rename = "active"] #[serde(rename = "active")] @@ -33,60 +33,57 @@ pub enum StudioSessionStatus { Archived, } -impl StudioSessionStatus { +impl ChatSessionStatus { /// Returns whether the session is currently active. #[inline] pub fn is_active(self) -> bool { - matches!(self, StudioSessionStatus::Active) + matches!(self, ChatSessionStatus::Active) } /// Returns whether the session is paused. #[inline] pub fn is_paused(self) -> bool { - matches!(self, StudioSessionStatus::Paused) + matches!(self, ChatSessionStatus::Paused) } /// Returns whether the session is archived. #[inline] pub fn is_archived(self) -> bool { - matches!(self, StudioSessionStatus::Archived) + matches!(self, ChatSessionStatus::Archived) } /// Returns whether the session can accept new messages or tool calls. #[inline] pub fn can_accept_input(self) -> bool { - matches!(self, StudioSessionStatus::Active) + matches!(self, ChatSessionStatus::Active) } /// Returns whether the session can be resumed. #[inline] pub fn can_resume(self) -> bool { - matches!(self, StudioSessionStatus::Paused) + matches!(self, ChatSessionStatus::Paused) } /// Returns whether the session can be paused. #[inline] pub fn can_pause(self) -> bool { - matches!(self, StudioSessionStatus::Active) + matches!(self, ChatSessionStatus::Active) } /// Returns whether the session can be archived. #[inline] pub fn can_archive(self) -> bool { - matches!( - self, - StudioSessionStatus::Active | StudioSessionStatus::Paused - ) + matches!(self, ChatSessionStatus::Active | ChatSessionStatus::Paused) } /// Returns whether the session is in a final state. #[inline] pub fn is_final(self) -> bool { - matches!(self, StudioSessionStatus::Archived) + matches!(self, ChatSessionStatus::Archived) } /// Returns session statuses that are considered active (not archived). - pub fn active_statuses() -> &'static [StudioSessionStatus] { - &[StudioSessionStatus::Active, StudioSessionStatus::Paused] + pub fn active_statuses() -> &'static [ChatSessionStatus] { + &[ChatSessionStatus::Active, ChatSessionStatus::Paused] } } diff --git a/crates/nvisy-postgres/src/types/enums/studio_tool_status.rs b/crates/nvisy-postgres/src/types/enums/chat_tool_status.rs similarity index 62% rename from crates/nvisy-postgres/src/types/enums/studio_tool_status.rs rename to crates/nvisy-postgres/src/types/enums/chat_tool_status.rs index 9ee5ed7..e1f4fe2 100644 --- a/crates/nvisy-postgres/src/types/enums/studio_tool_status.rs +++ b/crates/nvisy-postgres/src/types/enums/chat_tool_status.rs @@ -1,4 +1,4 @@ -//! Studio tool status enumeration for tool execution tracking. +//! Chat tool status enumeration for tool execution tracking. use diesel_derive_enum::DbEnum; #[cfg(feature = "schema")] @@ -6,16 +6,16 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use strum::{Display, EnumIter, EnumString}; -/// Defines the execution status of a studio tool call. +/// Defines the execution status of a chat tool call. /// -/// This enumeration corresponds to the `STUDIO_TOOL_STATUS` PostgreSQL enum and is used -/// to track the state of tool invocations within studio sessions as they progress +/// This enumeration corresponds to the `CHAT_TOOL_STATUS` PostgreSQL enum and is used +/// to track the state of tool invocations within chat sessions as they progress /// from pending through execution to completion or cancellation. #[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] #[cfg_attr(feature = "schema", derive(JsonSchema))] #[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::StudioToolStatus"] -pub enum StudioToolStatus { +#[ExistingTypePath = "crate::schema::sql_types::ChatToolStatus"] +pub enum ChatToolStatus { /// Tool call is queued and waiting to be executed #[db_rename = "pending"] #[serde(rename = "pending")] @@ -38,65 +38,62 @@ pub enum StudioToolStatus { Cancelled, } -impl StudioToolStatus { +impl ChatToolStatus { /// Returns whether the tool call is pending execution. #[inline] pub fn is_pending(self) -> bool { - matches!(self, StudioToolStatus::Pending) + matches!(self, ChatToolStatus::Pending) } /// Returns whether the tool is currently running. #[inline] pub fn is_running(self) -> bool { - matches!(self, StudioToolStatus::Running) + matches!(self, ChatToolStatus::Running) } /// Returns whether the tool execution completed successfully. #[inline] pub fn is_completed(self) -> bool { - matches!(self, StudioToolStatus::Completed) + matches!(self, ChatToolStatus::Completed) } /// Returns whether the tool execution was cancelled. #[inline] pub fn is_cancelled(self) -> bool { - matches!(self, StudioToolStatus::Cancelled) + matches!(self, ChatToolStatus::Cancelled) } /// Returns whether the tool is in a final state. #[inline] pub fn is_final(self) -> bool { - matches!( - self, - StudioToolStatus::Completed | StudioToolStatus::Cancelled - ) + matches!(self, ChatToolStatus::Completed | ChatToolStatus::Cancelled) } /// Returns whether the tool can be started. #[inline] pub fn can_start(self) -> bool { - matches!(self, StudioToolStatus::Pending) + matches!(self, ChatToolStatus::Pending) } /// Returns whether the tool can be cancelled. #[inline] pub fn can_cancel(self) -> bool { - matches!(self, StudioToolStatus::Pending | StudioToolStatus::Running) + matches!(self, ChatToolStatus::Pending | ChatToolStatus::Running) } /// Returns whether the tool execution is active (not final). #[inline] pub fn is_active(self) -> bool { - matches!(self, StudioToolStatus::Pending | StudioToolStatus::Running) + matches!(self, ChatToolStatus::Pending | ChatToolStatus::Running) } /// Returns tool statuses that are considered active (not final). - pub fn active_statuses() -> &'static [StudioToolStatus] { - &[StudioToolStatus::Pending, StudioToolStatus::Running] + pub fn active_statuses() -> &'static [ChatToolStatus] { + &[ChatToolStatus::Pending, ChatToolStatus::Running] } /// Returns tool statuses that represent final states. - pub fn final_statuses() -> &'static [StudioToolStatus] { - &[StudioToolStatus::Completed, StudioToolStatus::Cancelled] + pub fn final_statuses() -> &'static [ChatToolStatus] { + &[ChatToolStatus::Completed, ChatToolStatus::Cancelled] } } diff --git a/crates/nvisy-postgres/src/types/enums/file_source.rs b/crates/nvisy-postgres/src/types/enums/file_source.rs new file mode 100644 index 0000000..3c33723 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/file_source.rs @@ -0,0 +1,66 @@ +//! File source enumeration indicating how a file was created. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines how a file was created in the system. +/// +/// This enumeration corresponds to the `FILE_SOURCE` PostgreSQL enum and is used +/// to track the origin of files - whether they were uploaded by users, imported +/// from external sources, or generated by the system. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::FileSource"] +pub enum FileSource { + /// File was manually uploaded by a user + #[db_rename = "uploaded"] + #[serde(rename = "uploaded")] + #[default] + Uploaded, + + /// File was imported from an external source + #[db_rename = "imported"] + #[serde(rename = "imported")] + Imported, + + /// File was generated by the system + #[db_rename = "generated"] + #[serde(rename = "generated")] + Generated, +} + +impl FileSource { + /// Returns whether the file was uploaded by a user. + #[inline] + pub fn is_uploaded(self) -> bool { + matches!(self, FileSource::Uploaded) + } + + /// Returns whether the file was imported from an external source. + #[inline] + pub fn is_imported(self) -> bool { + matches!(self, FileSource::Imported) + } + + /// Returns whether the file was generated by the system. + #[inline] + pub fn is_generated(self) -> bool { + matches!(self, FileSource::Generated) + } + + /// Returns whether the file was created by a user action (uploaded or imported). + #[inline] + pub fn is_user_created(self) -> bool { + matches!(self, FileSource::Uploaded | FileSource::Imported) + } + + /// Returns whether the file was created automatically (generated). + #[inline] + pub fn is_system_created(self) -> bool { + matches!(self, FileSource::Generated) + } +} diff --git a/crates/nvisy-postgres/src/types/enums/mod.rs b/crates/nvisy-postgres/src/types/enums/mod.rs index 32c76ba..d74d4ea 100644 --- a/crates/nvisy-postgres/src/types/enums/mod.rs +++ b/crates/nvisy-postgres/src/types/enums/mod.rs @@ -23,18 +23,22 @@ pub mod workspace_role; // Document-related enumerations pub mod annotation_type; pub mod content_segmentation; +pub mod file_source; pub mod processing_status; pub mod require_mode; -// Studio-related enumerations -pub mod studio_session_status; -pub mod studio_tool_status; +// Chat-related enumerations +pub mod chat_session_status; +pub mod chat_tool_status; pub use action_token_type::ActionTokenType; pub use activity_type::{ActivityCategory, ActivityType}; pub use annotation_type::AnnotationType; pub use api_token_type::ApiTokenType; +pub use chat_session_status::ChatSessionStatus; +pub use chat_tool_status::ChatToolStatus; pub use content_segmentation::ContentSegmentation; +pub use file_source::FileSource; pub use integration_status::IntegrationStatus; pub use integration_type::IntegrationType; pub use invite_status::InviteStatus; @@ -42,8 +46,6 @@ pub use notification_event::NotificationEvent; pub use processing_status::ProcessingStatus; pub use require_mode::RequireMode; pub use run_type::RunType; -pub use studio_session_status::StudioSessionStatus; -pub use studio_tool_status::StudioToolStatus; pub use webhook_event::WebhookEvent; pub use webhook_status::WebhookStatus; pub use webhook_type::WebhookType; diff --git a/crates/nvisy-postgres/src/types/enums/require_mode.rs b/crates/nvisy-postgres/src/types/enums/require_mode.rs index e62d09b..da52458 100644 --- a/crates/nvisy-postgres/src/types/enums/require_mode.rs +++ b/crates/nvisy-postgres/src/types/enums/require_mode.rs @@ -1,4 +1,4 @@ -//! Require mode enumeration for file processing requirements. +//! Require mode enumeration for file content type classification. use diesel_derive_enum::DbEnum; #[cfg(feature = "schema")] @@ -6,124 +6,117 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use strum::{Display, EnumIter, EnumString}; -/// Defines the processing requirements for input files. +/// Classifies the content type of uploaded files. /// /// This enumeration corresponds to the `REQUIRE_MODE` PostgreSQL enum and is used -/// to specify what type of processing is needed to extract content from uploaded files. +/// to categorize files based on their content type for appropriate processing. #[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] #[cfg_attr(feature = "schema", derive(JsonSchema))] #[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] #[ExistingTypePath = "crate::schema::sql_types::RequireMode"] pub enum RequireMode { - /// No special processing required. - #[db_rename = "none"] - #[serde(rename = "none")] + /// Unknown or unrecognized file type. + #[db_rename = "unknown"] + #[serde(rename = "unknown")] #[default] - None, - - /// Requires Optical Character Recognition (OCR). - #[db_rename = "optical"] - #[serde(rename = "optical")] - Optical, - - /// Requires Vision Language Model (VLM). - #[db_rename = "language"] - #[serde(rename = "language")] - Language, - - /// Requires both OCR and VLM processing. - #[db_rename = "both"] - #[serde(rename = "both")] - Both, + Unknown, + + /// Text documents (PDF, DOCX, TXT, etc.). + #[db_rename = "document"] + #[serde(rename = "document")] + Document, + + /// Image files (PNG, JPG, SVG, etc.). + #[db_rename = "image"] + #[serde(rename = "image")] + Image, + + /// Spreadsheet files (XLSX, CSV, etc.). + #[db_rename = "spreadsheet"] + #[serde(rename = "spreadsheet")] + Spreadsheet, + + /// Presentation files (PPTX, KEY, etc.). + #[db_rename = "presentation"] + #[serde(rename = "presentation")] + Presentation, + + /// Audio files (MP3, WAV, etc.). + #[db_rename = "audio"] + #[serde(rename = "audio")] + Audio, + + /// Video files (MP4, MOV, etc.). + #[db_rename = "video"] + #[serde(rename = "video")] + Video, + + /// Archive files (ZIP, TAR, etc.). + #[db_rename = "archive"] + #[serde(rename = "archive")] + Archive, + + /// Data files (JSON, XML, CSV, etc.). + #[db_rename = "data"] + #[serde(rename = "data")] + Data, } impl RequireMode { - /// Returns whether this mode requires OCR processing. + /// Returns whether this is a text-based content type. #[inline] - pub fn requires_ocr(self) -> bool { - matches!(self, RequireMode::Optical | RequireMode::Both) - } - - /// Returns whether this mode requires VLM processing. - #[inline] - pub fn requires_vlm(self) -> bool { - matches!(self, RequireMode::Language | RequireMode::Both) - } - - /// Returns whether this mode requires any special processing. - #[inline] - pub fn requires_processing(self) -> bool { - !matches!(self, RequireMode::None) - } - - /// Returns whether this mode involves multiple processing types. - #[inline] - pub fn is_complex(self) -> bool { - matches!(self, RequireMode::Both) + pub fn is_text_based(self) -> bool { + matches!( + self, + RequireMode::Document | RequireMode::Spreadsheet | RequireMode::Data + ) } - /// Returns whether this mode is ready for immediate analysis. + /// Returns whether this is a visual content type. #[inline] - pub fn is_ready_for_analysis(self) -> bool { - matches!(self, RequireMode::None) + pub fn is_visual(self) -> bool { + matches!( + self, + RequireMode::Image | RequireMode::Video | RequireMode::Presentation + ) } - /// Returns whether this mode requires external processing services. + /// Returns whether this is a media content type. #[inline] - pub fn requires_external_services(self) -> bool { + pub fn is_media(self) -> bool { matches!( self, - RequireMode::Optical | RequireMode::Language | RequireMode::Both + RequireMode::Image | RequireMode::Audio | RequireMode::Video ) } - /// Returns whether this mode typically has higher processing costs. + /// Returns whether this content type can be indexed for search. #[inline] - pub fn is_expensive_to_process(self) -> bool { + pub fn is_indexable(self) -> bool { matches!( self, - RequireMode::Optical | RequireMode::Language | RequireMode::Both + RequireMode::Document + | RequireMode::Spreadsheet + | RequireMode::Presentation + | RequireMode::Data ) } - /// Returns the estimated processing complexity (1 = simple, 5 = very complex). + /// Returns whether this content type requires extraction before processing. #[inline] - pub fn processing_complexity(self) -> u8 { - match self { - RequireMode::None => 1, - RequireMode::Optical => 3, - RequireMode::Language => 4, - RequireMode::Both => 5, - } + pub fn requires_extraction(self) -> bool { + matches!(self, RequireMode::Archive) } - /// Returns the estimated processing time factor (multiplier for base time). + /// Returns whether this content type requires transcription. #[inline] - pub fn processing_time_factor(self) -> f32 { - match self { - RequireMode::None => 1.0, - RequireMode::Optical => 3.0, - RequireMode::Language => 5.0, - RequireMode::Both => 8.0, - } - } - - /// Returns the types of processing that this mode typically involves. - pub fn processing_types(self) -> &'static [&'static str] { - match self { - RequireMode::None => &[], - RequireMode::Optical => &["optical_character_recognition"], - RequireMode::Language => &["vision_language_model"], - RequireMode::Both => &["optical_character_recognition", "vision_language_model"], - } + pub fn requires_transcription(self) -> bool { + matches!(self, RequireMode::Audio | RequireMode::Video) } - /// Returns require modes that need external processing. - pub fn external_processing_modes() -> &'static [RequireMode] { - &[ - RequireMode::Optical, - RequireMode::Language, - RequireMode::Both, - ] + /// Returns whether this content type requires OCR. + #[inline] + pub fn requires_ocr(self) -> bool { + matches!(self, RequireMode::Image) } } diff --git a/crates/nvisy-postgres/src/types/mod.rs b/crates/nvisy-postgres/src/types/mod.rs index 6841ff5..5d33d3d 100644 --- a/crates/nvisy-postgres/src/types/mod.rs +++ b/crates/nvisy-postgres/src/types/mod.rs @@ -1,6 +1,6 @@ //! Contains constraints, enumerations and other custom types. -pub mod constants; +mod constants; mod constraint; mod enums; mod filtering; @@ -8,21 +8,26 @@ mod pagination; mod sorting; mod utilities; +pub use constants::{ + DEFAULT_RETENTION_DAYS, EDIT_GRACE_PERIOD_SECONDS, EMBEDDING_DIMENSIONS, + EXPIRY_WARNING_MINUTES, LONG_LIVED_THRESHOLD_HOURS, RECENTLY_SENT_HOURS, + RECENTLY_UPLOADED_HOURS, +}; pub use constraint::{ AccountActionTokenConstraints, AccountApiTokenConstraints, AccountConstraints, - AccountNotificationConstraints, ConstraintCategory, ConstraintViolation, + AccountNotificationConstraints, ChatOperationConstraints, ChatSessionConstraints, + ChatToolCallConstraints, ConstraintCategory, ConstraintViolation, DocumentAnnotationConstraints, DocumentChunkConstraints, DocumentCommentConstraints, DocumentConstraints, DocumentFileConstraints, DocumentVersionConstraints, - StudioOperationConstraints, StudioSessionConstraints, StudioToolCallConstraints, WorkspaceActivitiesConstraints, WorkspaceConstraints, WorkspaceIntegrationConstraints, WorkspaceIntegrationRunConstraints, WorkspaceInviteConstraints, WorkspaceMemberConstraints, WorkspaceWebhookConstraints, }; pub use enums::{ ActionTokenType, ActivityCategory, ActivityType, AnnotationType, ApiTokenType, - ContentSegmentation, IntegrationStatus, IntegrationType, InviteStatus, NotificationEvent, - ProcessingStatus, RequireMode, RunType, StudioSessionStatus, StudioToolStatus, WebhookEvent, - WebhookStatus, WebhookType, WorkspaceRole, + ChatSessionStatus, ChatToolStatus, ContentSegmentation, FileSource, IntegrationStatus, + IntegrationType, InviteStatus, NotificationEvent, ProcessingStatus, RequireMode, RunType, + WebhookEvent, WebhookStatus, WebhookType, WorkspaceRole, }; pub use filtering::{FileFilter, FileFormat, InviteFilter, MemberFilter}; pub use pagination::{Cursor, CursorPage, CursorPagination, OffsetPage, OffsetPagination}; diff --git a/crates/nvisy-rig/src/provider/embedding.rs b/crates/nvisy-rig/src/provider/embedding.rs index 418f226..e59fa0d 100644 --- a/crates/nvisy-rig/src/provider/embedding.rs +++ b/crates/nvisy-rig/src/provider/embedding.rs @@ -3,6 +3,8 @@ //! Wraps different embedding model providers into a unified enum, //! eliminating the need for generic parameters throughout the codebase. +use nvisy_postgres::types::EMBEDDING_DIMENSIONS; +use rig::client::Nothing; use rig::embeddings::{Embedding, EmbeddingError, EmbeddingModel}; use rig::providers::ollama; @@ -11,50 +13,68 @@ use rig::providers::ollama; /// This enum provides a concrete type for embedding operations, /// removing the need for generic `M: EmbeddingModel` parameters. /// -/// Implements [`EmbeddingModel`] so it can be used directly with rig's -/// APIs like `VectorStoreIndex` and `EmbeddingsBuilder`. +/// All providers use [`EMBEDDING_DIMENSIONS`] to ensure consistency with the +/// `document_chunks` table schema. #[derive(Clone)] pub enum EmbeddingProvider { /// Ollama embedding model. - Ollama(ollama::EmbeddingModel), + Ollama { + client: ollama::Client, + model: String, + }, } impl EmbeddingProvider { /// Creates a new Ollama embedding provider. pub fn ollama(base_url: &str, model: &str) -> Self { - let client = ollama::Client::from_url(base_url); - Self::Ollama(client.embedding_model(model)) - } + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(base_url) + .build() + .expect("Failed to create Ollama client"); - /// Creates a new Ollama embedding provider with custom dimensions. - pub fn ollama_with_ndims(base_url: &str, model: &str, ndims: usize) -> Self { - let client = ollama::Client::from_url(base_url); - Self::Ollama(client.embedding_model_with_ndims(model, ndims)) + Self::Ollama { + client, + model: model.to_string(), + } } /// Returns the model name. pub fn model_name(&self) -> &str { match self { - Self::Ollama(model) => &model.model, + Self::Ollama { model, .. } => model, } } -} -impl EmbeddingModel for EmbeddingProvider { - const MAX_DOCUMENTS: usize = 1024; + /// Returns the number of dimensions. + /// + /// This always returns [`EMBEDDING_DIMENSIONS`] to ensure consistency with the database schema. + pub fn ndims(&self) -> usize { + EMBEDDING_DIMENSIONS + } - fn ndims(&self) -> usize { + /// Embed a single text document. + pub async fn embed_text(&self, text: &str) -> Result { match self { - Self::Ollama(model) => model.ndims(), + Self::Ollama { client, model } => { + let embedding_model = + ollama::EmbeddingModel::new(client.clone(), model, EMBEDDING_DIMENSIONS); + embedding_model.embed_text(text).await + } } } - async fn embed_texts( + /// Embed multiple text documents. + pub async fn embed_texts( &self, texts: impl IntoIterator + Send, ) -> Result, EmbeddingError> { match self { - Self::Ollama(model) => model.embed_texts(texts).await, + Self::Ollama { client, model } => { + let embedding_model = + ollama::EmbeddingModel::new(client.clone(), model, EMBEDDING_DIMENSIONS); + embedding_model.embed_texts(texts).await + } } } } @@ -62,9 +82,10 @@ impl EmbeddingModel for EmbeddingProvider { impl std::fmt::Debug for EmbeddingProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Ollama(model) => f + Self::Ollama { model, .. } => f .debug_struct("EmbeddingProvider::Ollama") - .field("model", &model.model) + .field("model", model) + .field("ndims", &EMBEDDING_DIMENSIONS) .finish(), } } diff --git a/crates/nvisy-rig/src/rag/indexer/mod.rs b/crates/nvisy-rig/src/rag/indexer/mod.rs index 37bf65e..b15f964 100644 --- a/crates/nvisy-rig/src/rag/indexer/mod.rs +++ b/crates/nvisy-rig/src/rag/indexer/mod.rs @@ -7,7 +7,7 @@ mod indexed; use nvisy_postgres::model::NewDocumentChunk; use nvisy_postgres::query::DocumentChunkRepository; use nvisy_postgres::{PgClient, Vector}; -use rig::embeddings::EmbeddingModel; + use sha2::{Digest, Sha256}; use uuid::Uuid; diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs index 1d22b7d..3c0e171 100644 --- a/crates/nvisy-rig/src/rag/searcher/mod.rs +++ b/crates/nvisy-rig/src/rag/searcher/mod.rs @@ -11,7 +11,7 @@ use nvisy_nats::object::{DocumentKey, DocumentStore, Files}; use nvisy_postgres::model::ScoredDocumentChunk; use nvisy_postgres::query::DocumentChunkRepository; use nvisy_postgres::{PgClient, Vector}; -use rig::embeddings::EmbeddingModel; + use tokio::io::AsyncReadExt; use uuid::Uuid; diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index 7be2d42..2ef60e8 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -41,18 +41,19 @@ nvisy-webhook = { workspace = true, features = ["schema"] } # Async runtime tokio = { workspace = true, features = [] } tokio-util = { workspace = true, features = [] } +tokio-stream = { workspace = true, features = [] } futures = { workspace = true, features = [] } async-trait = { workspace = true, features = [] } # HTTP & Web server -axum = { workspace = true, features = ["http2", "macros", "ws", "multipart"] } +axum = { workspace = true, features = ["http2", "macros", "multipart"] } axum-client-ip = { workspace = true, features = [] } axum-extra = { workspace = true, features = ["typed-header", "attachment", "query"] } tower = { workspace = true, features = [] } tower-http = { workspace = true, features = [] } # OpenAPI/Documentation -aide = { workspace = true, features = ["axum", "axum-query", "axum-form", "axum-json", "axum-ws", "axum-multipart", "axum-extra", "bytes", "http"] } +aide = { workspace = true, features = ["axum", "axum-query", "axum-form", "axum-json", "axum-multipart", "axum-extra", "bytes", "http"] } schemars = { workspace = true, features = [] } # Observability diff --git a/crates/nvisy-server/src/handler/chat.rs b/crates/nvisy-server/src/handler/chat.rs new file mode 100644 index 0000000..bf5ebe7 --- /dev/null +++ b/crates/nvisy-server/src/handler/chat.rs @@ -0,0 +1,420 @@ +//! Chat session handlers for LLM-assisted document editing. +//! +//! This module provides comprehensive chat session management functionality within workspaces, +//! including creation, reading, updating, and deletion of sessions. All operations +//! are secured with proper authorization and follow workspace-based access control. +//! +//! ## Streaming +//! +//! The `/chat/sessions/{sessionId}/messages` endpoint uses Server-Sent Events (SSE) to stream +//! LLM responses back to the client. Clients can cancel generation by closing the connection +//! (e.g., using `AbortController` in JavaScript). + +use std::convert::Infallible; + +use aide::axum::ApiRouter; +use aide::transform::TransformOperation; +use axum::extract::State; +use axum::http::StatusCode; +use axum::response::sse::{Event, KeepAlive, Sse}; +use futures::StreamExt; +use nvisy_postgres::PgClient; +use nvisy_postgres::query::ChatSessionRepository; +use nvisy_rig::RigService; +use tokio_stream::wrappers::ReceiverStream; + +use crate::extract::{AuthProvider, AuthState, Json, Path, Permission, Query, ValidateJson}; +use crate::handler::request::{ + ChatSessionPathParams, CreateChatSession, CursorPagination, SendChatMessage, UpdateChatSession, + WorkspacePathParams, +}; +use crate::handler::response::{ChatSession, ChatSessionsPage, ChatStreamEvent, ErrorResponse}; +use crate::handler::{ErrorKind, Result}; +use crate::service::ServiceState; + +/// Tracing target for chat session operations. +const TRACING_TARGET: &str = "nvisy_server::handler::chat"; + +/// Creates a new chat session. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + workspace_id = %path_params.workspace_id, + ) +)] +async fn create_chat_session( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + ValidateJson(request): ValidateJson, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Creating chat session"); + + let mut conn = pg_client.get_connection().await?; + + auth_state + .authorize_workspace( + &mut conn, + path_params.workspace_id, + Permission::CreateDocuments, + ) + .await?; + + let new_session = request.into_model(path_params.workspace_id, auth_state.account_id); + let session = conn.create_chat_session(new_session).await?; + + tracing::info!( + target: TRACING_TARGET, + session_id = %session.id, + "Chat session created", + ); + + Ok((StatusCode::CREATED, Json(ChatSession::from_model(session)))) +} + +fn create_chat_session_docs(op: TransformOperation) -> TransformOperation { + op.summary("Create chat session") + .description("Creates a new LLM-assisted editing session for a document file.") + .response::<201, Json>() + .response::<400, Json>() + .response::<401, Json>() + .response::<403, Json>() +} + +/// Returns all chat sessions for a workspace. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + workspace_id = %path_params.workspace_id, + ) +)] +async fn get_all_chat_sessions( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + Query(pagination): Query, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Listing chat sessions"); + + let mut conn = pg_client.get_connection().await?; + + auth_state + .authorize_workspace( + &mut conn, + path_params.workspace_id, + Permission::ViewDocuments, + ) + .await?; + + let page = conn + .cursor_list_chat_sessions(path_params.workspace_id, pagination.into()) + .await?; + + let response = ChatSessionsPage::from_cursor_page(page, ChatSession::from_model); + + tracing::debug!( + target: TRACING_TARGET, + session_count = response.items.len(), + "Chat sessions listed", + ); + + Ok((StatusCode::OK, Json(response))) +} + +fn get_all_chat_sessions_docs(op: TransformOperation) -> TransformOperation { + op.summary("List chat sessions") + .description("Lists all chat sessions in a workspace with pagination.") + .response::<200, Json>() + .response::<401, Json>() + .response::<403, Json>() +} + +/// Gets a chat session by its session ID. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + session_id = %path_params.session_id, + ) +)] +async fn get_chat_session( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Reading chat session"); + + let mut conn = pg_client.get_connection().await?; + + let session = find_chat_session(&mut conn, path_params.session_id).await?; + + auth_state + .authorize_workspace(&mut conn, session.workspace_id, Permission::ViewDocuments) + .await?; + + tracing::info!(target: TRACING_TARGET, "Chat session read"); + + Ok((StatusCode::OK, Json(ChatSession::from_model(session)))) +} + +fn get_chat_session_docs(op: TransformOperation) -> TransformOperation { + op.summary("Get chat session") + .description("Returns chat session details by ID.") + .response::<200, Json>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Updates a chat session by its session ID. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + session_id = %path_params.session_id, + ) +)] +async fn update_chat_session( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + ValidateJson(request): ValidateJson, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Updating chat session"); + + let mut conn = pg_client.get_connection().await?; + + let existing = find_chat_session(&mut conn, path_params.session_id).await?; + + auth_state + .authorize_workspace( + &mut conn, + existing.workspace_id, + Permission::UpdateDocuments, + ) + .await?; + + let update_data = request.into_model(); + let session = conn + .update_chat_session(path_params.session_id, update_data) + .await?; + + tracing::info!(target: TRACING_TARGET, "Chat session updated"); + + Ok((StatusCode::OK, Json(ChatSession::from_model(session)))) +} + +fn update_chat_session_docs(op: TransformOperation) -> TransformOperation { + op.summary("Update chat session") + .description("Updates chat session metadata and configuration.") + .response::<200, Json>() + .response::<400, Json>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Deletes (archives) a chat session by its session ID. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + session_id = %path_params.session_id, + ) +)] +async fn delete_chat_session( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, +) -> Result { + tracing::debug!(target: TRACING_TARGET, "Deleting chat session"); + + let mut conn = pg_client.get_connection().await?; + + let session = find_chat_session(&mut conn, path_params.session_id).await?; + + auth_state + .authorize_workspace(&mut conn, session.workspace_id, Permission::DeleteDocuments) + .await?; + + conn.delete_chat_session(path_params.session_id).await?; + + tracing::info!(target: TRACING_TARGET, "Chat session deleted"); + + Ok(StatusCode::OK) +} + +fn delete_chat_session_docs(op: TransformOperation) -> TransformOperation { + op.summary("Delete chat session") + .description("Archives the chat session (soft delete).") + .response_with::<200, (), _>(|res| res.description("Chat session deleted.")) + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Sends a message to a chat session and streams the response via SSE. +/// +/// The response is streamed as Server-Sent Events with different event types: +/// - `thinking`: Agent is processing/planning +/// - `text_delta`: Incremental text from the LLM +/// - `tool_call`: Agent is calling a tool +/// - `tool_result`: Tool execution completed +/// - `proposed_edit`: Agent proposes a document edit +/// - `edit_applied`: Edit was auto-applied +/// - `done`: Response completed with final summary +/// - `error`: An error occurred +/// +/// Clients can cancel generation by closing the connection (AbortController). +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + session_id = %path_params.session_id, + ) +)] +async fn send_message( + State(pg_client): State, + State(rig_service): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + ValidateJson(request): ValidateJson, +) -> Result { + tracing::debug!(target: TRACING_TARGET, "Sending chat message"); + + let mut conn = pg_client.get_connection().await?; + + // Verify session exists and user has access + let session = find_chat_session(&mut conn, path_params.session_id).await?; + + auth_state + .authorize_workspace(&mut conn, session.workspace_id, Permission::UpdateDocuments) + .await?; + + // Create SSE stream + let (tx, rx) = tokio::sync::mpsc::channel::>(32); + + // Get the chat stream from rig service + let chat_stream = rig_service + .chat() + .chat(path_params.session_id, &request.content) + .await + .map_err(|e| { + tracing::error!(target: TRACING_TARGET, error = %e, "Failed to create chat stream"); + ErrorKind::InternalServerError + .with_message("Failed to start chat") + .with_context(e.to_string()) + })?; + + // Spawn task to process the chat stream and send SSE events + let session_id = path_params.session_id; + tokio::spawn(async move { + let mut stream = std::pin::pin!(chat_stream); + + while let Some(result) = stream.next().await { + let event = match result { + Ok(chat_event) => { + let stream_event = ChatStreamEvent::new(chat_event); + let event_type = stream_event.event_type(); + + match serde_json::to_string(&stream_event) { + Ok(json) => Event::default().event(event_type).data(json), + Err(e) => { + tracing::error!( + target: TRACING_TARGET, + session_id = %session_id, + error = %e, + "Failed to serialize chat event" + ); + continue; + } + } + } + Err(e) => { + tracing::error!( + target: TRACING_TARGET, + session_id = %session_id, + error = %e, + "Chat stream error" + ); + // Send error event and break + let error_event = ChatStreamEvent::new(nvisy_rig::chat::ChatEvent::Error { + message: e.to_string(), + }); + if let Ok(json) = serde_json::to_string(&error_event) { + let _ = tx + .send(Ok(Event::default().event("error").data(json))) + .await; + } + break; + } + }; + + // Send the event; if send fails, client disconnected (cancelled) + if tx.send(Ok(event)).await.is_err() { + tracing::info!( + target: TRACING_TARGET, + session_id = %session_id, + "Client disconnected, cancelling chat stream" + ); + break; + } + } + + tracing::debug!( + target: TRACING_TARGET, + session_id = %session_id, + "Chat stream completed" + ); + }); + + tracing::info!( + target: TRACING_TARGET, + session_id = %path_params.session_id, + "Chat message stream started" + ); + + Ok(Sse::new(ReceiverStream::new(rx)).keep_alive(KeepAlive::default())) +} + +/// Finds a chat session by ID or returns NotFound error. +async fn find_chat_session( + conn: &mut nvisy_postgres::PgConn, + session_id: uuid::Uuid, +) -> Result { + conn.find_chat_session_by_id(session_id) + .await? + .ok_or_else(|| { + ErrorKind::NotFound + .with_message("Chat session not found.") + .with_resource("chat_session") + }) +} + +/// Returns a [`Router`] with all related routes. +/// +/// [`Router`]: axum::routing::Router +pub fn routes() -> ApiRouter { + use aide::axum::routing::*; + + ApiRouter::new() + .api_route( + "/workspaces/{workspaceId}/chat/sessions", + post_with(create_chat_session, create_chat_session_docs) + .get_with(get_all_chat_sessions, get_all_chat_sessions_docs), + ) + .api_route( + "/chat/sessions/{sessionId}", + get_with(get_chat_session, get_chat_session_docs) + .patch_with(update_chat_session, update_chat_session_docs) + .delete_with(delete_chat_session, delete_chat_session_docs), + ) + // SSE endpoint - uses regular axum routing as aide doesn't support SSE in OpenAPI + .route( + "/chat/sessions/{sessionId}/messages", + axum::routing::post(send_message), + ) + .with_path_items(|item| item.tag("Chat")) +} diff --git a/crates/nvisy-server/src/handler/error/mod.rs b/crates/nvisy-server/src/handler/error/mod.rs index 591b0f6..2d9d182 100644 --- a/crates/nvisy-server/src/handler/error/mod.rs +++ b/crates/nvisy-server/src/handler/error/mod.rs @@ -3,9 +3,9 @@ mod http_error; mod nats_error; mod pg_account; +mod pg_chat; mod pg_document; mod pg_error; -mod pg_studio; mod pg_workspace; mod service_error; diff --git a/crates/nvisy-server/src/handler/error/pg_chat.rs b/crates/nvisy-server/src/handler/error/pg_chat.rs new file mode 100644 index 0000000..5550e72 --- /dev/null +++ b/crates/nvisy-server/src/handler/error/pg_chat.rs @@ -0,0 +1,65 @@ +//! Chat-related constraint violation error handlers. + +use nvisy_postgres::types::{ + ChatOperationConstraints, ChatSessionConstraints, ChatToolCallConstraints, +}; + +use crate::handler::{Error, ErrorKind}; + +impl From for Error<'static> { + fn from(c: ChatSessionConstraints) -> Self { + let error = match c { + ChatSessionConstraints::DisplayNameLength => ErrorKind::BadRequest + .with_message("Session name must be between 1 and 255 characters long"), + ChatSessionConstraints::ModelConfigSize => { + ErrorKind::BadRequest.with_message("Model configuration size is invalid") + } + ChatSessionConstraints::MessageCountMin => ErrorKind::InternalServerError.into_error(), + ChatSessionConstraints::TokenCountMin => ErrorKind::InternalServerError.into_error(), + ChatSessionConstraints::UpdatedAfterCreated => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("chat_session") + } +} + +impl From for Error<'static> { + fn from(c: ChatToolCallConstraints) -> Self { + let error = match c { + ChatToolCallConstraints::ToolNameLength => ErrorKind::BadRequest + .with_message("Tool name must be between 1 and 128 characters long"), + ChatToolCallConstraints::ToolInputSize => { + ErrorKind::BadRequest.with_message("Tool input size exceeds maximum allowed") + } + ChatToolCallConstraints::ToolOutputSize => { + ErrorKind::BadRequest.with_message("Tool output size exceeds maximum allowed") + } + ChatToolCallConstraints::CompletedAfterStarted => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("chat_tool_call") + } +} + +impl From for Error<'static> { + fn from(c: ChatOperationConstraints) -> Self { + let error = match c { + ChatOperationConstraints::OperationTypeLength => ErrorKind::BadRequest + .with_message("Operation type must be between 1 and 64 characters long"), + ChatOperationConstraints::OperationDiffSize => { + ErrorKind::BadRequest.with_message("Operation diff size exceeds maximum allowed") + } + ChatOperationConstraints::RevertRequiresApplied => ErrorKind::BadRequest + .with_message("Cannot revert an operation that has not been applied"), + ChatOperationConstraints::AppliedAfterCreated => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("chat_operation") + } +} diff --git a/crates/nvisy-server/src/handler/error/pg_error.rs b/crates/nvisy-server/src/handler/error/pg_error.rs index a5ee229..6b086c7 100644 --- a/crates/nvisy-server/src/handler/error/pg_error.rs +++ b/crates/nvisy-server/src/handler/error/pg_error.rs @@ -34,9 +34,9 @@ impl From for Error<'static> { ConstraintViolation::DocumentFile(c) => c.into(), ConstraintViolation::DocumentVersion(c) => c.into(), ConstraintViolation::WorkspaceWebhook(c) => c.into(), - ConstraintViolation::StudioSession(c) => c.into(), - ConstraintViolation::StudioToolCall(c) => c.into(), - ConstraintViolation::StudioOperation(c) => c.into(), + ConstraintViolation::ChatSession(c) => c.into(), + ConstraintViolation::ChatToolCall(c) => c.into(), + ConstraintViolation::ChatOperation(c) => c.into(), } } } diff --git a/crates/nvisy-server/src/handler/error/pg_studio.rs b/crates/nvisy-server/src/handler/error/pg_studio.rs deleted file mode 100644 index f43f75b..0000000 --- a/crates/nvisy-server/src/handler/error/pg_studio.rs +++ /dev/null @@ -1,67 +0,0 @@ -//! Studio-related constraint violation error handlers. - -use nvisy_postgres::types::{ - StudioOperationConstraints, StudioSessionConstraints, StudioToolCallConstraints, -}; - -use crate::handler::{Error, ErrorKind}; - -impl From for Error<'static> { - fn from(c: StudioSessionConstraints) -> Self { - let error = match c { - StudioSessionConstraints::DisplayNameLength => ErrorKind::BadRequest - .with_message("Session name must be between 1 and 255 characters long"), - StudioSessionConstraints::ModelConfigSize => { - ErrorKind::BadRequest.with_message("Model configuration size is invalid") - } - StudioSessionConstraints::MessageCountMin => { - ErrorKind::InternalServerError.into_error() - } - StudioSessionConstraints::TokenCountMin => ErrorKind::InternalServerError.into_error(), - StudioSessionConstraints::UpdatedAfterCreated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("studio_session") - } -} - -impl From for Error<'static> { - fn from(c: StudioToolCallConstraints) -> Self { - let error = match c { - StudioToolCallConstraints::ToolNameLength => ErrorKind::BadRequest - .with_message("Tool name must be between 1 and 128 characters long"), - StudioToolCallConstraints::ToolInputSize => { - ErrorKind::BadRequest.with_message("Tool input size exceeds maximum allowed") - } - StudioToolCallConstraints::ToolOutputSize => { - ErrorKind::BadRequest.with_message("Tool output size exceeds maximum allowed") - } - StudioToolCallConstraints::CompletedAfterStarted => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("studio_tool_call") - } -} - -impl From for Error<'static> { - fn from(c: StudioOperationConstraints) -> Self { - let error = match c { - StudioOperationConstraints::OperationTypeLength => ErrorKind::BadRequest - .with_message("Operation type must be between 1 and 64 characters long"), - StudioOperationConstraints::OperationDiffSize => { - ErrorKind::BadRequest.with_message("Operation diff size exceeds maximum allowed") - } - StudioOperationConstraints::RevertRequiresApplied => ErrorKind::BadRequest - .with_message("Cannot revert an operation that has not been applied"), - StudioOperationConstraints::AppliedAfterCreated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("studio_operation") - } -} diff --git a/crates/nvisy-server/src/handler/mod.rs b/crates/nvisy-server/src/handler/mod.rs index 0a3e933..7ac13c1 100644 --- a/crates/nvisy-server/src/handler/mod.rs +++ b/crates/nvisy-server/src/handler/mod.rs @@ -6,6 +6,7 @@ mod accounts; mod annotations; mod authentication; +mod chat; mod comments; mod documents; mod error; @@ -20,7 +21,6 @@ mod runs; mod tokens; mod utility; mod webhooks; -mod websocket; mod workspaces; use aide::axum::ApiRouter; @@ -51,11 +51,11 @@ fn private_routes( .merge(invites::routes()) .merge(members::routes()) .merge(webhooks::routes()) - .merge(websocket::routes()) .merge(files::routes()) .merge(documents::routes()) .merge(comments::routes()) - .merge(annotations::routes()); + .merge(annotations::routes()) + .merge(chat::routes()); if let Some(additional) = additional_routes { router = router.merge(additional); diff --git a/crates/nvisy-server/src/handler/request/chat.rs b/crates/nvisy-server/src/handler/request/chat.rs new file mode 100644 index 0000000..2ea181d --- /dev/null +++ b/crates/nvisy-server/src/handler/request/chat.rs @@ -0,0 +1,74 @@ +//! Chat session request types. + +use nvisy_postgres::model::{NewChatSession, UpdateChatSession as UpdateChatSessionModel}; +use nvisy_postgres::types::ChatSessionStatus; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; +use validator::Validate; + +/// Request payload for creating a new chat session. +#[must_use] +#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct CreateChatSession { + /// ID of the primary file being edited in this session. + pub primary_file_id: Uuid, + /// Display name of the session. + #[validate(length(min = 1, max = 255))] + pub display_name: Option, + /// LLM configuration (model, temperature, max tokens, etc.). + pub model_config: Option, +} + +impl CreateChatSession { + /// Converts this request into a database model. + pub fn into_model(self, workspace_id: Uuid, account_id: Uuid) -> NewChatSession { + NewChatSession { + workspace_id, + account_id, + primary_file_id: self.primary_file_id, + display_name: self.display_name, + model_config: self.model_config, + session_status: None, + } + } +} + +/// Request payload for updating a chat session. +#[must_use] +#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct UpdateChatSession { + /// Updated display name. + #[validate(length(min = 1, max = 255))] + pub display_name: Option, + /// Updated session status. + pub session_status: Option, + /// Updated LLM configuration. + pub model_config: Option, +} + +impl UpdateChatSession { + /// Converts this request into a database model. + pub fn into_model(self) -> UpdateChatSessionModel { + UpdateChatSessionModel { + display_name: self.display_name, + session_status: self.session_status, + model_config: self.model_config, + ..Default::default() + } + } +} + +/// Request payload for sending a chat message. +#[must_use] +#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct SendChatMessage { + /// The message content to send. + #[validate(length(min = 1, max = 32000))] + pub content: String, + /// Optional model override for this message. + pub model: Option, +} diff --git a/crates/nvisy-server/src/handler/request/mod.rs b/crates/nvisy-server/src/handler/request/mod.rs index d3ce496..d6f24a7 100644 --- a/crates/nvisy-server/src/handler/request/mod.rs +++ b/crates/nvisy-server/src/handler/request/mod.rs @@ -3,6 +3,7 @@ mod accounts; mod annotations; mod authentications; +mod chat; mod comments; mod documents; mod files; @@ -20,6 +21,7 @@ mod workspaces; pub use accounts::*; pub use annotations::*; pub use authentications::*; +pub use chat::*; pub use comments::*; pub use documents::*; pub use files::*; diff --git a/crates/nvisy-server/src/handler/request/paths.rs b/crates/nvisy-server/src/handler/request/paths.rs index 7a60af2..b59c919 100644 --- a/crates/nvisy-server/src/handler/request/paths.rs +++ b/crates/nvisy-server/src/handler/request/paths.rs @@ -169,3 +169,15 @@ pub struct AccountPathParams { /// Unique identifier of the account. pub account_id: Uuid, } + +/// Path parameters for chat session operations (session ID only). +/// +/// Since session IDs are globally unique UUIDs, workspace context can be +/// derived from the session record itself for authorization purposes. +#[must_use] +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct ChatSessionPathParams { + /// Unique identifier of the chat session. + pub session_id: Uuid, +} diff --git a/crates/nvisy-server/src/handler/response/chat.rs b/crates/nvisy-server/src/handler/response/chat.rs new file mode 100644 index 0000000..31f64c1 --- /dev/null +++ b/crates/nvisy-server/src/handler/response/chat.rs @@ -0,0 +1,94 @@ +//! Chat session response types. + +use jiff::Timestamp; +use nvisy_postgres::model; +use nvisy_postgres::types::ChatSessionStatus; +use nvisy_rig::chat::ChatEvent; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::Page; + +/// Represents a chat session with full details. +#[must_use] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct ChatSession { + /// Unique session identifier. + pub session_id: Uuid, + /// ID of the workspace this session belongs to. + pub workspace_id: Uuid, + /// ID of the account that owns this session. + pub account_id: Uuid, + /// ID of the primary file being edited. + pub primary_file_id: Uuid, + /// Display name of the session. + pub display_name: String, + /// Current session status. + pub session_status: ChatSessionStatus, + /// LLM configuration. + pub model_config: serde_json::Value, + /// Total number of messages in this session. + pub message_count: i32, + /// Total tokens used in this session. + pub token_count: i32, + /// Timestamp when the session was created. + pub created_at: Timestamp, + /// Timestamp when the session was last updated. + pub updated_at: Timestamp, +} + +impl ChatSession { + /// Creates a response from a database model. + pub fn from_model(session: model::ChatSession) -> Self { + Self { + session_id: session.id, + workspace_id: session.workspace_id, + account_id: session.account_id, + primary_file_id: session.primary_file_id, + display_name: session.display_name, + session_status: session.session_status, + model_config: session.model_config, + message_count: session.message_count, + token_count: session.token_count, + created_at: session.created_at.into(), + updated_at: session.updated_at.into(), + } + } +} + +/// Paginated list of chat sessions. +pub type ChatSessionsPage = Page; + +/// SSE event wrapper for chat streaming. +/// +/// This wraps `ChatEvent` from nvisy-rig and provides SSE-compatible serialization. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ChatStreamEvent { + /// The underlying chat event. + #[serde(flatten)] + pub event: ChatEvent, +} + +impl ChatStreamEvent { + /// Creates a new stream event from a chat event. + pub fn new(event: ChatEvent) -> Self { + Self { event } + } + + /// Returns the SSE event type name. + pub fn event_type(&self) -> &'static str { + match &self.event { + ChatEvent::Thinking { .. } => "thinking", + ChatEvent::TextDelta { .. } => "text_delta", + ChatEvent::ToolCall { .. } => "tool_call", + ChatEvent::ToolResult { .. } => "tool_result", + ChatEvent::ProposedEdit { .. } => "proposed_edit", + ChatEvent::EditApplied { .. } => "edit_applied", + ChatEvent::Done { .. } => "done", + ChatEvent::Error { .. } => "error", + } + } +} diff --git a/crates/nvisy-server/src/handler/response/files.rs b/crates/nvisy-server/src/handler/response/files.rs index 94f0bcd..bd103a4 100644 --- a/crates/nvisy-server/src/handler/response/files.rs +++ b/crates/nvisy-server/src/handler/response/files.rs @@ -2,7 +2,7 @@ use jiff::Timestamp; use nvisy_postgres::model::DocumentFile; -use nvisy_postgres::types::{ContentSegmentation, ProcessingStatus}; +use nvisy_postgres::types::{ContentSegmentation, FileSource, ProcessingStatus}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -40,6 +40,10 @@ pub struct File { pub processing_priority: i32, /// Classification tags. pub tags: Vec, + /// How the file was created (uploaded, imported, generated). + pub source: FileSource, + /// Account ID of the user who uploaded/created the file. + pub uploaded_by: Uuid, /// Knowledge extraction settings. pub file_knowledge: FileKnowledge, /// Creation timestamp. @@ -57,6 +61,8 @@ impl File { status: file.processing_status, processing_priority: file.processing_priority, tags: file.tags.into_iter().flatten().collect(), + source: file.source, + uploaded_by: file.account_id, file_knowledge: FileKnowledge { is_indexed: file.is_indexed, content_segmentation: file.content_segmentation, diff --git a/crates/nvisy-server/src/handler/response/mod.rs b/crates/nvisy-server/src/handler/response/mod.rs index e4d1d3a..eb2bcd8 100644 --- a/crates/nvisy-server/src/handler/response/mod.rs +++ b/crates/nvisy-server/src/handler/response/mod.rs @@ -8,6 +8,7 @@ mod accounts; mod activities; mod annotations; mod authentications; +mod chat; mod comments; mod documents; mod errors; @@ -26,6 +27,7 @@ pub use accounts::*; pub use activities::*; pub use annotations::*; pub use authentications::*; +pub use chat::*; pub use comments::*; pub use documents::*; pub use errors::*; diff --git a/crates/nvisy-server/src/handler/websocket.rs b/crates/nvisy-server/src/handler/websocket.rs deleted file mode 100644 index 9e3e64c..0000000 --- a/crates/nvisy-server/src/handler/websocket.rs +++ /dev/null @@ -1,820 +0,0 @@ -//! WebSocket handler for real-time workspace communication via NATS. - -use std::ops::ControlFlow; -use std::sync::Arc; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::time::Duration; - -use aide::axum::ApiRouter; -use aide::transform::TransformOperation; -use axum::extract::State; -use axum::extract::ws::{Message, Utf8Bytes, WebSocket, WebSocketUpgrade}; -use axum::response::Response; -use futures::{SinkExt, StreamExt}; -use nvisy_nats::NatsClient; -use nvisy_nats::stream::{WorkspaceEventPublisher, WorkspaceWsMessage}; -use nvisy_postgres::PgClient; -use nvisy_postgres::query::{AccountRepository, WorkspaceRepository}; -use uuid::Uuid; - -use crate::extract::{AuthProvider, AuthState, Json, Path, Permission}; -use crate::handler::request::WorkspacePathParams; -use crate::handler::response::ErrorResponse; -use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; - -/// Tracing target for workspace websocket operations. -const TRACING_TARGET: &str = "nvisy_server::handler::workspace_websocket"; - -/// Maximum size of a WebSocket message in bytes (1 MB). -const MAX_MESSAGE_SIZE: usize = 1_024 * 1_024; - -/// Timeout for fetching messages from NATS stream. -const NATS_FETCH_TIMEOUT: Duration = Duration::from_millis(100); - -/// Maximum time to wait for graceful connection shutdown. -const GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5); - -/// Context for a WebSocket connection. -#[derive(Debug, Clone)] -struct WsContext { - /// Unique connection identifier for logging/debugging. - connection_id: Uuid, - /// The workspace this connection belongs to. - workspace_id: Uuid, - /// The authenticated account ID. - account_id: Uuid, -} - -impl WsContext { - /// Creates a new WebSocket connection context. - fn new(workspace_id: Uuid, account_id: Uuid) -> Self { - Self { - connection_id: Uuid::new_v4(), - workspace_id, - account_id, - } - } -} - -/// Metrics for a WebSocket connection. -#[derive(Debug, Default)] -struct ConnectionMetrics { - messages_sent: AtomicU64, - messages_received: AtomicU64, - messages_published: AtomicU64, - messages_dropped: AtomicU64, - errors: AtomicU64, -} - -impl ConnectionMetrics { - fn new() -> Arc { - Arc::new(Self::default()) - } - - fn increment_sent(&self) { - self.messages_sent.fetch_add(1, Ordering::Relaxed); - } - - fn increment_received(&self) { - self.messages_received.fetch_add(1, Ordering::Relaxed); - } - - fn increment_published(&self) { - self.messages_published.fetch_add(1, Ordering::Relaxed); - } - - fn increment_dropped(&self) { - self.messages_dropped.fetch_add(1, Ordering::Relaxed); - } - - fn increment_errors(&self) { - self.errors.fetch_add(1, Ordering::Relaxed); - } - - fn snapshot(&self) -> MetricsSnapshot { - MetricsSnapshot { - sent: self.messages_sent.load(Ordering::Relaxed), - received: self.messages_received.load(Ordering::Relaxed), - published: self.messages_published.load(Ordering::Relaxed), - dropped: self.messages_dropped.load(Ordering::Relaxed), - errors: self.errors.load(Ordering::Relaxed), - } - } -} - -#[derive(Debug)] -struct MetricsSnapshot { - sent: u64, - received: u64, - published: u64, - dropped: u64, - errors: u64, -} - -/// Validate message size to prevent DoS attacks. -fn validate_message_size(ctx: &WsContext, size: usize, metrics: &ConnectionMetrics) -> bool { - if size > MAX_MESSAGE_SIZE { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - message_size = size, - max_size = MAX_MESSAGE_SIZE, - "message exceeds maximum size, dropping" - ); - metrics.increment_dropped(); - false - } else { - true - } -} - -/// Check if the account has permission to perform the action in the message. -async fn check_event_permission( - conn: &mut nvisy_postgres::PgConn, - ctx: &WsContext, - msg: &WorkspaceWsMessage, -) -> Result<()> { - // Determine required permission based on message type - let permission = match msg { - // Read-only events - require ViewDocuments permission - WorkspaceWsMessage::Typing(_) | WorkspaceWsMessage::MemberPresence(_) => { - Permission::ViewDocuments - } - - // Document write events - require UpdateDocuments permission - WorkspaceWsMessage::DocumentUpdate(_) => Permission::UpdateDocuments, - WorkspaceWsMessage::DocumentCreated(_) => Permission::CreateDocuments, - WorkspaceWsMessage::DocumentDeleted(_) => Permission::DeleteDocuments, - - // File events - require appropriate file permissions - WorkspaceWsMessage::FilePreprocessed(_) - | WorkspaceWsMessage::FilePostprocessed(_) - | WorkspaceWsMessage::JobProgress(_) - | WorkspaceWsMessage::JobCompleted(_) - | WorkspaceWsMessage::JobFailed(_) => Permission::ViewFiles, - WorkspaceWsMessage::FileTransformed(_) => Permission::UpdateFiles, - - // Member management - require InviteMembers/RemoveMembers permission - WorkspaceWsMessage::MemberAdded(_) => Permission::InviteMembers, - WorkspaceWsMessage::MemberRemoved(_) => Permission::RemoveMembers, - - // Workspace settings - require UpdateWorkspace permission - WorkspaceWsMessage::WorkspaceUpdated(_) => Permission::UpdateWorkspace, - - // System events - always allowed (sent by server) - WorkspaceWsMessage::Join(_) - | WorkspaceWsMessage::Leave(_) - | WorkspaceWsMessage::Error(_) => { - return Ok(()); - } - }; - - // Fetch workspace membership directly - use nvisy_postgres::query::WorkspaceMemberRepository; - - let member = conn - .find_workspace_member(ctx.workspace_id, ctx.account_id) - .await?; - - // Check if member exists and has the required permission - match member { - Some(m) if permission.is_permitted_by_role(m.member_role) => Ok(()), - Some(m) => { - tracing::debug!( - target: TRACING_TARGET, - account_id = %ctx.account_id, - workspace_id = %ctx.workspace_id, - required_permission = ?permission, - current_role = ?m.member_role, - "insufficient permissions for event" - ); - Err(ErrorKind::Forbidden.with_context(format!( - "Insufficient permissions: requires {:?}", - permission.minimum_required_role() - ))) - } - None => { - tracing::debug!( - target: TRACING_TARGET, - account_id = %ctx.account_id, - workspace_id = %ctx.workspace_id, - "not a member of workspace" - ); - Err(ErrorKind::Forbidden.with_context("Not a workspace member")) - } - } -} - -/// Processes an incoming WebSocket message from the client. -async fn process_client_message( - ctx: &WsContext, - msg: Message, - publisher: &WorkspaceEventPublisher, - conn: &mut nvisy_postgres::PgConn, - metrics: &ConnectionMetrics, -) -> ControlFlow<(), ()> { - match msg { - Message::Text(text) => { - metrics.increment_received(); - - if !validate_message_size(ctx, text.len(), metrics) { - return ControlFlow::Continue(()); - } - - tracing::trace!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - message_length = text.len(), - "received text message" - ); - - match serde_json::from_str::(&text) { - Ok(ws_msg) => { - handle_client_message(ctx, ws_msg, publisher, conn, metrics).await; - ControlFlow::Continue(()) - } - Err(e) => { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to parse message, dropping" - ); - metrics.increment_errors(); - metrics.increment_dropped(); - ControlFlow::Continue(()) - } - } - } - Message::Binary(data) => { - metrics.increment_received(); - - if !validate_message_size(ctx, data.len(), metrics) { - return ControlFlow::Continue(()); - } - - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - data_length = data.len(), - "received binary message (not supported), dropping" - ); - metrics.increment_dropped(); - ControlFlow::Continue(()) - } - Message::Close(close_frame) => { - if let Some(cf) = close_frame { - tracing::info!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - close_code = cf.code, - close_reason = %cf.reason, - "client sent close frame" - ); - } else { - tracing::info!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "client sent close frame" - ); - } - ControlFlow::Break(()) - } - Message::Ping(payload) => { - tracing::trace!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - payload_len = payload.len(), - "received ping" - ); - ControlFlow::Continue(()) - } - Message::Pong(payload) => { - tracing::trace!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - payload_len = payload.len(), - "received pong" - ); - ControlFlow::Continue(()) - } - } -} - -/// Handles parsed messages from the client with permission checking. -async fn handle_client_message( - ctx: &WsContext, - msg: WorkspaceWsMessage, - publisher: &WorkspaceEventPublisher, - conn: &mut nvisy_postgres::PgConn, - metrics: &ConnectionMetrics, -) { - // Check permissions for this event - if let Err(e) = check_event_permission(conn, ctx, &msg).await { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %ctx.account_id, - message_type = ?std::mem::discriminant(&msg), - error = %e, - "permission denied for event, dropping" - ); - metrics.increment_dropped(); - metrics.increment_errors(); - return; - } - - match &msg { - WorkspaceWsMessage::Typing(_) => { - tracing::trace!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "publishing typing indicator" - ); - - // Publish with fresh timestamp - let msg_with_ts = WorkspaceWsMessage::typing(ctx.account_id, None); - - if let Err(e) = publisher - .publish_message(ctx.workspace_id, msg_with_ts) - .await - { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to publish typing indicator" - ); - metrics.increment_errors(); - } else { - metrics.increment_published(); - } - } - WorkspaceWsMessage::DocumentUpdate(_) - | WorkspaceWsMessage::DocumentCreated(_) - | WorkspaceWsMessage::DocumentDeleted(_) - | WorkspaceWsMessage::FilePreprocessed(_) - | WorkspaceWsMessage::FileTransformed(_) - | WorkspaceWsMessage::FilePostprocessed(_) - | WorkspaceWsMessage::JobProgress(_) - | WorkspaceWsMessage::JobCompleted(_) - | WorkspaceWsMessage::JobFailed(_) - | WorkspaceWsMessage::MemberPresence(_) - | WorkspaceWsMessage::MemberAdded(_) - | WorkspaceWsMessage::MemberRemoved(_) - | WorkspaceWsMessage::WorkspaceUpdated(_) => { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - message_type = ?std::mem::discriminant(&msg), - "publishing event to NATS" - ); - - if let Err(e) = publisher.publish_message(ctx.workspace_id, msg).await { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to publish event to NATS" - ); - metrics.increment_errors(); - } else { - metrics.increment_published(); - } - } - WorkspaceWsMessage::Join(_) - | WorkspaceWsMessage::Leave(_) - | WorkspaceWsMessage::Error(_) => { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - message_type = ?std::mem::discriminant(&msg), - "ignoring system message from client" - ); - metrics.increment_dropped(); - } - } -} - -/// Handles the WebSocket connection lifecycle with NATS pub/sub. -/// -/// This function: -/// 1. Fetches account details and creates context -/// 2. Creates a unique NATS consumer for this WebSocket connection -/// 3. Publishes a join message to all clients -/// 4. Spawns separate tasks for sending and receiving -/// 5. Uses `tokio::select!` to handle whichever task completes first -/// 6. Publishes a leave message and cleans up -async fn handle_workspace_websocket( - socket: WebSocket, - workspace_id: Uuid, - account_id: Uuid, - nats_client: NatsClient, - pg_client: PgClient, -) { - let start_time = std::time::Instant::now(); - let ctx = WsContext::new(workspace_id, account_id); - let metrics = ConnectionMetrics::new(); - - tracing::info!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %ctx.account_id, - workspace_id = %ctx.workspace_id, - "websocket connection established" - ); - - // Get a connection for initial queries - let mut conn = match pg_client.get_connection().await { - Ok(conn) => conn, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to acquire database connection" - ); - return; - } - }; - - // Fetch account display name - let display_name = match conn.find_account_by_id(account_id).await { - Ok(Some(account)) => account.display_name, - Ok(None) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %account_id, - "account not found, aborting connection" - ); - return; - } - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %account_id, - error = %e, - "failed to fetch account, aborting connection" - ); - return; - } - }; - - // Create publisher for this connection - let publisher = match nats_client.workspace_event_publisher().await { - Ok(p) => p, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to create event publisher, aborting connection" - ); - return; - } - }; - - // Create subscriber with unique consumer name for this connection - let consumer_name = format!("ws-{}", ctx.connection_id); - let subscriber = match nats_client - .workspace_event_subscriber_for_workspace(&consumer_name, workspace_id) - .await - { - Ok(s) => s, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to create event subscriber, aborting connection" - ); - return; - } - }; - - // Get message stream - let mut message_stream = match subscriber.subscribe().await { - Ok(stream) => stream, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to subscribe to event stream, aborting connection" - ); - return; - } - }; - - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - consumer_name = %consumer_name, - "NATS subscriber created" - ); - - // Split socket into sender and receiver - let (mut sender, mut receiver) = socket.split(); - - // Create and publish join message - let join_msg = WorkspaceWsMessage::join(ctx.account_id, display_name); - - if let Err(e) = publisher - .publish_message(ctx.workspace_id, join_msg.clone()) - .await - { - tracing::error!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to publish join message" - ); - } else { - metrics.increment_published(); - } - - // Clone context and clients for the receive task - let recv_ctx = ctx.clone(); - let recv_publisher = publisher.clone(); - let recv_pg_client = pg_client.clone(); - let recv_metrics = metrics.clone(); - - // Spawn a task to receive messages from the client - let recv_task = tokio::spawn(async move { - // Get a dedicated connection for the receive task - let mut recv_conn = match recv_pg_client.get_connection().await { - Ok(conn) => conn, - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %recv_ctx.connection_id, - error = %e, - "failed to acquire database connection for receive task" - ); - return; - } - }; - - while let Some(msg_result) = receiver.next().await { - match msg_result { - Ok(msg) => { - if process_client_message( - &recv_ctx, - msg, - &recv_publisher, - &mut recv_conn, - &recv_metrics, - ) - .await - .is_break() - { - break; - } - } - Err(e) => { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %recv_ctx.connection_id, - error = %e, - "error receiving from websocket" - ); - recv_metrics.increment_errors(); - break; - } - } - } - }); - - // Spawn a task to send messages from NATS to the client - let send_ctx = ctx.clone(); - let send_metrics = metrics.clone(); - let send_task = tokio::spawn(async move { - // Send initial join message to this client - if let Ok(text) = serde_json::to_string(&join_msg) { - if let Err(e) = sender.send(Message::Text(Utf8Bytes::from(text))).await { - tracing::error!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to send join message, aborting connection" - ); - return; - } - send_metrics.increment_sent(); - } - - // Listen for NATS messages and forward to this client - loop { - match message_stream.next_with_timeout(NATS_FETCH_TIMEOUT).await { - Ok(Some(mut nats_msg)) => { - let ws_message = &nats_msg.payload().message; - - // Echo prevention: don't send messages back to the sender - if let Some(sender_id) = ws_message.account_id() - && sender_id == send_ctx.account_id - { - if let Err(e) = nats_msg.ack().await { - tracing::trace!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to ack echoed message" - ); - } - continue; - } - - // Serialize and send the message - match serde_json::to_string(ws_message) { - Ok(text) => { - if let Err(e) = sender.send(Message::Text(Utf8Bytes::from(text))).await - { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to send message, client disconnected" - ); - break; - } - send_metrics.increment_sent(); - - // Acknowledge the message - if let Err(e) = nats_msg.ack().await { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to acknowledge NATS message" - ); - send_metrics.increment_errors(); - } - } - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "failed to serialize message" - ); - send_metrics.increment_errors(); - - // Still ack to prevent redelivery - let _ = nats_msg.ack().await; - } - } - } - Ok(None) => { - // Timeout - continue waiting - continue; - } - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - connection_id = %send_ctx.connection_id, - error = %e, - "error receiving from NATS stream" - ); - send_metrics.increment_errors(); - break; - } - } - } - }); - - // Wait for either task to complete with graceful shutdown - let shutdown_result = tokio::time::timeout(GRACEFUL_SHUTDOWN_TIMEOUT, async { - tokio::select! { - _ = recv_task => { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "receive task completed" - ); - }, - _ = send_task => { - tracing::debug!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "send task completed" - ); - } - } - }) - .await; - - if shutdown_result.is_err() { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - "graceful shutdown timeout exceeded" - ); - } - - // Publish leave message - let leave_msg = WorkspaceWsMessage::leave(ctx.account_id); - if let Err(e) = publisher.publish_message(ctx.workspace_id, leave_msg).await { - tracing::warn!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - error = %e, - "failed to publish leave message" - ); - } - - // Log final metrics - let duration = start_time.elapsed(); - let final_metrics = metrics.snapshot(); - tracing::info!( - target: TRACING_TARGET, - connection_id = %ctx.connection_id, - account_id = %ctx.account_id, - workspace_id = %ctx.workspace_id, - duration_ms = duration.as_millis(), - messages_sent = final_metrics.sent, - messages_received = final_metrics.received, - messages_published = final_metrics.published, - messages_dropped = final_metrics.dropped, - errors = final_metrics.errors, - "websocket connection closed" - ); -} - -/// Establishes a WebSocket connection for a workspace. -#[tracing::instrument(skip_all, fields( - account_id = %auth_claims.account_id, - workspace_id = %path_params.workspace_id -))] -async fn workspace_websocket_handler( - State(pg_client): State, - State(nats_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, - ws: WebSocketUpgrade, -) -> Result { - let workspace_id = path_params.workspace_id; - let account_id = auth_claims.account_id; - - tracing::debug!( - target: TRACING_TARGET, - account_id = %account_id, - workspace_id = %workspace_id, - "websocket connection requested" - ); - - // Verify workspace exists and user has basic access - let mut conn = pg_client.get_connection().await?; - - // Check if user has minimum permission to view documents - auth_claims - .authorize_workspace(&mut conn, workspace_id, Permission::ViewDocuments) - .await?; - - // Verify the workspace exists - if conn.find_workspace_by_id(workspace_id).await?.is_none() { - return Err(ErrorKind::NotFound.with_resource("workspace")); - } - - tracing::info!( - target: TRACING_TARGET, - account_id = %account_id, - workspace_id = %workspace_id, - "websocket upgrade authorized" - ); - - // Upgrade the connection to WebSocket - Ok(ws.on_upgrade(move |socket| { - handle_workspace_websocket(socket, workspace_id, account_id, nats_client, pg_client) - })) -} - -fn workspace_websocket_handler_docs(op: TransformOperation) -> TransformOperation { - op.summary("Connect to workspace WebSocket") - .description( - "Establishes a WebSocket connection for real-time workspace events and collaboration.", - ) - .response::<101, ()>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Returns a [`Router`] with WebSocket routes for workspaces. -/// -/// [`Router`]: axum::routing::Router -pub fn routes() -> ApiRouter { - use aide::axum::routing::*; - - ApiRouter::new() - .api_route( - "/workspaces/{workspaceId}/ws/", - get_with( - workspace_websocket_handler, - workspace_websocket_handler_docs, - ), - ) - .with_path_items(|item| item.tag("WebSocket")) -} diff --git a/crates/nvisy-server/src/middleware/specification.rs b/crates/nvisy-server/src/middleware/specification.rs index 8606489..fa49681 100644 --- a/crates/nvisy-server/src/middleware/specification.rs +++ b/crates/nvisy-server/src/middleware/specification.rs @@ -202,8 +202,8 @@ fn api_docs(api: TransformOpenApi) -> TransformOpenApi { ..Default::default() }) .tag(Tag { - name: "WebSocket".into(), - description: Some("Real-time communication".into()), + name: "Chat".into(), + description: Some("AI chat and document interaction".into()), ..Default::default() }) } diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..4f10c27 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,84 @@ +# Architecture + +## Crate Structure + +| Crate | Responsibility | +|-------|----------------| +| `nvisy-server` | HTTP API, handlers, middleware, auth | +| `nvisy-postgres` | Database models, queries, migrations | +| `nvisy-nats` | Messaging, job queues, object storage | +| `nvisy-rig` | LLM orchestration, RAG, chat agents | +| `nvisy-webhook` | External event delivery | +| `nvisy-core` | Shared types and utilities | +| `nvisy-cli` | Command-line interface | + +## Technology Stack + +| Layer | Technology | Purpose | +|-------|------------|---------| +| Language | Rust | Memory safety, performance, concurrency | +| Database | PostgreSQL + pgvector | Relational data + vector embeddings | +| Messaging | NATS | Pub/sub, job queues, object storage | +| AI Framework | rig-core | LLM orchestration | +| HTTP Server | Axum + Tower | API endpoints and middleware | +| Real-time | SSE | Streaming AI responses | +| Auth | JWT | Stateless authentication | + +## Data Model + +### Core Entities + +| Entity | Purpose | +|--------|---------| +| Account | User authentication and profile | +| Workspace | Collaborative space for documents | +| Document | Logical grouping of related files | +| File | Individual uploaded file with metadata | +| Version | Parsed representation at a point in time | +| Section | Hierarchical content structure | +| Chunk | Indexed segment with vector embedding | +| Entity | Extracted person, company, date, amount | +| ChatSession | AI conversation context | + +### Hierarchy + +- **Workspace** contains Documents +- **Document** contains Files and Versions +- **Version** contains Sections +- **Section** contains Chunks +- **Chunk** contains Entities, Claims, and References + +### Content Types + +| Type | Examples | Processing | +|------|----------|------------| +| Document | PDF, DOCX, TXT | Text extraction, structure parsing | +| Image | PNG, JPG, SVG | OCR, visual analysis | +| Spreadsheet | XLSX, CSV | Table normalization, schema inference | +| Presentation | PPTX, KEY | Slide extraction, structure parsing | +| Audio | MP3, WAV | Transcription with timestamps | +| Video | MP4, MOV | Transcription, frame extraction | +| Archive | ZIP, TAR | Recursive extraction and processing | +| Data | JSON, XML | Schema inference, normalization | + +## Canonical Representation + +All source files normalize to a common schema containing: + +- **Sections**: Hierarchical structure +- **Entities**: People, companies, dates, amounts +- **Tables**: Structured data +- **Claims**: Assertions that can be verified +- **References**: Links to other documents/sections +- **Provenance**: Source file, extraction method, confidence + +## Chunking Strategy + +Effective cross-file intelligence depends on chunking quality. + +Requirements: +- **Semantic chunks**: Based on meaning, not fixed token sizes +- **Stable chunk IDs**: Enable diffs, history, and references +- **Hierarchical chunks**: Document → Section → Paragraph → Sentence + +Each chunk maintains: stable content-addressable ID, hierarchical location, vector embedding, extracted entities, token count, and byte range in source. diff --git a/docs/INTELLIGENCE.md b/docs/INTELLIGENCE.md new file mode 100644 index 0000000..0c11028 --- /dev/null +++ b/docs/INTELLIGENCE.md @@ -0,0 +1,67 @@ +# Intelligence Layer + +## Cross-Document Linking + +Related content across files must be explicitly linked. This is relationship modeling, not retrieval. + +| Technique | Purpose | +|-----------|---------| +| Entity resolution | Same person/company across files | +| Concept embeddings | Same idea, different wording | +| Citation graphs | What references what | +| Contradiction detection | Conflicting statements across documents | + +## Hybrid Search + +Vector search alone is insufficient for cross-file queries. + +| Layer | Purpose | Example | +|-------|---------|---------| +| Vector search | Semantic similarity | "Find clauses about liability" | +| Symbolic filters | Dates, types, authors | "After 2021", "Type: NDA" | +| Graph traversal | Relationships | "Related to Company X" | + +A query like "Show me all NDA clauses after 2021 that conflict with policy X" requires all three layers. + +## Temporal Intelligence + +| Capability | Description | +|------------|-------------| +| Versioned representations | Track document evolution | +| Semantic diffing | Changes in meaning, not just text | +| Temporal queries | "What changed since last quarter?" | +| Change attribution | Who changed what and when | + +## Grounded Reasoning + +Every assertion links to evidence: file, section, exact text, and relevance score. Without this, enterprise users cannot validate conclusions. + +## Cross-File Reasoning Patterns + +Reusable patterns across any document set: + +| Pattern | Question | Example | +|---------|----------|---------| +| Consistency | Do all docs use the same definition? | "Is 'confidential' defined consistently?" | +| Coverage | Is X addressed somewhere? | "Do all contracts have termination clauses?" | +| Conflict | Do any statements contradict? | "Are there conflicting liability terms?" | +| Redundancy | Are we repeating ourselves? | "Is the same clause duplicated?" | +| Completeness | What's missing? | "Which required sections are absent?" | +| Drift | Has X changed from the standard? | "How does this differ from the template?" | + +## Entity Resolution + +The same entity appears differently across files. + +| Challenge | Example | +|-----------|---------| +| Name variations | "IBM", "International Business Machines", "Big Blue" | +| Role changes | "John Smith (CEO)" vs "John Smith (Board Member)" | +| Temporal | "Acme Corp" acquired by "MegaCorp" in 2022 | +| Abbreviations | "NDA", "Non-Disclosure Agreement" | + +Resolution process: extraction → clustering → disambiguation → linking → propagation. + +## Knowledge Graph + +Entities link to Sections. Sections reference Sections. Documents relate to Documents. This graph grows over time and cannot be replicated by tools that process files in isolation. diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..c1e5e3f --- /dev/null +++ b/docs/README.md @@ -0,0 +1,13 @@ +# Nvisy Documentation + +## Overview + +Nvisy transforms uploaded files into structured, normalized representations that enable cross-file intelligence. The system focuses on four core capabilities: reading, search, comparison, and extraction. + +## Documentation + +| Document | Description | +|----------|-------------| +| [Vision](./VISION.md) | Core philosophy and design principles | +| [Architecture](./ARCHITECTURE.md) | System design, data model, and technology stack | +| [Intelligence](./INTELLIGENCE.md) | Cross-file reasoning, search, and extraction | diff --git a/docs/VISION.md b/docs/VISION.md new file mode 100644 index 0000000..d79b0f5 --- /dev/null +++ b/docs/VISION.md @@ -0,0 +1,50 @@ +# Vision & Design Principles + +## Problem Statement + +Document intelligence tools typically treat files as the unit of work. This approach prevents cross-file reasoning, entity resolution across documents, and institutional memory accumulation. + +Nvisy addresses this by transforming uploaded files into structured, normalized representations. The knowledge graph—not the files—is the primary asset. + +## Design Principles + +### 1. Structure Over Blobs + +Every file type is converted into machine-readable structure containing both content and structure (headings, tables, sections, entities). Raw files are archived; structured representations are the working data. + +### 2. Canonical Representation + +A single internal schema normalizes all source formats. This enables comparisons across documents, unified search, and cross-file reasoning regardless of original file type. + +### 3. Grounded Reasoning + +Every conclusion links back to source material: file, section, exact text, and confidence score. Without provenance, enterprise users cannot validate or trust outputs. + +### 4. Isolation & Trust + +Cross-file intelligence requires strict isolation: +- Tenant-aware embeddings (tenant data never mixed) +- Permission-filtered retrieval (filter before search, not after) +- Comprehensive audit logs +- Provenance tracking + +### 5. Time Awareness + +Documents evolve. The system maintains versioned representations and supports semantic diffing (changes in meaning, not just text) and temporal queries across document history. + +## Core Capabilities + +| Capability | Description | +|------------|-------------| +| Reading | Parse and normalize any supported file format | +| Search | Hybrid search combining vector, symbolic, and graph queries | +| Comparison | Identify differences, conflicts, and drift across documents | +| Extraction | Pull entities, tables, claims, and structured data | + +## Differentiation + +The knowledge graph compounds over time. Tools that process files in isolation cannot replicate: +- Evolving cross-file graphs +- Entity resolution across time and authors +- Institutional memory accumulation +- Continuous learning from document corpus diff --git a/migrations/2025-05-27-011852_documents/down.sql b/migrations/2025-05-27-011852_documents/down.sql index 821a0f2..1af64ba 100644 --- a/migrations/2025-05-27-011852_documents/down.sql +++ b/migrations/2025-05-27-011852_documents/down.sql @@ -17,6 +17,7 @@ DROP TABLE IF EXISTS documents; -- Drop enum types DROP TYPE IF EXISTS ANNOTATION_TYPE; +DROP TYPE IF EXISTS FILE_SOURCE; DROP TYPE IF EXISTS CONTENT_SEGMENTATION; DROP TYPE IF EXISTS REQUIRE_MODE; DROP TYPE IF EXISTS PROCESSING_STATUS; diff --git a/migrations/2025-05-27-011852_documents/up.sql b/migrations/2025-05-27-011852_documents/up.sql index 9a2474d..9b48ccd 100644 --- a/migrations/2025-05-27-011852_documents/up.sql +++ b/migrations/2025-05-27-011852_documents/up.sql @@ -104,6 +104,16 @@ CREATE TYPE CONTENT_SEGMENTATION AS ENUM ( COMMENT ON TYPE CONTENT_SEGMENTATION IS 'Content segmentation strategy for document processing.'; +-- Create file source enum +CREATE TYPE FILE_SOURCE AS ENUM ( + 'uploaded', -- File was manually uploaded by a user + 'imported', -- File was imported from an external source + 'generated' -- File was generated by the system +); + +COMMENT ON TYPE FILE_SOURCE IS + 'Indicates how a file was created in the system.'; + -- Create document files table - Source files for processing CREATE TABLE document_files ( -- Primary identifiers @@ -115,11 +125,17 @@ CREATE TABLE document_files ( account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, parent_id UUID DEFAULT NULL REFERENCES document_files (id) ON DELETE SET NULL, + -- Version tracking (parent_id links to previous version, version_number tracks sequence) + version_number INTEGER NOT NULL DEFAULT 1, + + CONSTRAINT document_files_version_number_min CHECK (version_number >= 1), + -- File metadata display_name TEXT NOT NULL DEFAULT 'Untitled', original_filename TEXT NOT NULL DEFAULT 'Untitled', file_extension TEXT NOT NULL DEFAULT 'txt', tags TEXT[] NOT NULL DEFAULT '{}', + source FILE_SOURCE NOT NULL DEFAULT 'uploaded', CONSTRAINT document_files_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), CONSTRAINT document_files_original_filename_length CHECK (length(original_filename) BETWEEN 1 AND 255), @@ -192,18 +208,24 @@ CREATE INDEX document_files_display_name_trgm_idx ON document_files USING gin (display_name gin_trgm_ops) WHERE deleted_at IS NULL; +CREATE INDEX document_files_version_chain_idx + ON document_files (parent_id, version_number DESC) + WHERE parent_id IS NOT NULL AND deleted_at IS NULL; + -- Add table and column comments COMMENT ON TABLE document_files IS - 'Source files for document processing with pipeline management.'; + 'Source files for document processing with pipeline management and version tracking.'; COMMENT ON COLUMN document_files.id IS 'Unique file identifier'; COMMENT ON COLUMN document_files.workspace_id IS 'Parent workspace reference (required)'; COMMENT ON COLUMN document_files.document_id IS 'Parent document reference (optional)'; COMMENT ON COLUMN document_files.account_id IS 'Uploading account reference'; +COMMENT ON COLUMN document_files.version_number IS 'Version number (1 for original, increments for new versions via parent_id chain)'; COMMENT ON COLUMN document_files.display_name IS 'Display name (1-255 chars)'; COMMENT ON COLUMN document_files.original_filename IS 'Original upload filename (1-255 chars)'; COMMENT ON COLUMN document_files.file_extension IS 'File extension (1-20 alphanumeric)'; COMMENT ON COLUMN document_files.tags IS 'Classification tags (max 32)'; +COMMENT ON COLUMN document_files.source IS 'How the file was created (uploaded, imported, generated)'; COMMENT ON COLUMN document_files.require_mode IS 'Processing mode required'; COMMENT ON COLUMN document_files.processing_priority IS 'Priority 1-10 (10=highest)'; COMMENT ON COLUMN document_files.processing_status IS 'Current processing status'; diff --git a/migrations/2026-01-09-002114_chat/down.sql b/migrations/2026-01-09-002114_chat/down.sql new file mode 100644 index 0000000..26897ab --- /dev/null +++ b/migrations/2026-01-09-002114_chat/down.sql @@ -0,0 +1,10 @@ +-- Revert chat migration + +-- Drop tables in reverse order (respecting foreign key dependencies) +DROP TABLE IF EXISTS chat_operations; +DROP TABLE IF EXISTS chat_tool_calls; +DROP TABLE IF EXISTS chat_sessions; + +-- Drop enums +DROP TYPE IF EXISTS CHAT_TOOL_STATUS; +DROP TYPE IF EXISTS CHAT_SESSION_STATUS; diff --git a/migrations/2026-01-09-002114_chat/up.sql b/migrations/2026-01-09-002114_chat/up.sql new file mode 100644 index 0000000..cf5dae8 --- /dev/null +++ b/migrations/2026-01-09-002114_chat/up.sql @@ -0,0 +1,202 @@ +-- Chat: LLM-powered document editing sessions and operations tracking + +-- Chat session lifecycle status +CREATE TYPE CHAT_SESSION_STATUS AS ENUM ( + 'active', + 'paused', + 'archived' +); + +COMMENT ON TYPE CHAT_SESSION_STATUS IS + 'Lifecycle status for chat editing sessions.'; + +-- Chat sessions table definition +CREATE TABLE chat_sessions ( + -- Primary identifier + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + + -- References + workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, + account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, + primary_file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, + + -- Session attributes + display_name TEXT NOT NULL DEFAULT 'Untitled Session', + session_status CHAT_SESSION_STATUS NOT NULL DEFAULT 'active', + + CONSTRAINT chat_sessions_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), + + -- Model configuration (model name, temperature, max tokens, etc.) + model_config JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT chat_sessions_model_config_size CHECK (length(model_config::TEXT) BETWEEN 2 AND 8192), + + -- Usage statistics + message_count INTEGER NOT NULL DEFAULT 0, + token_count INTEGER NOT NULL DEFAULT 0, + + CONSTRAINT chat_sessions_message_count_min CHECK (message_count >= 0), + CONSTRAINT chat_sessions_token_count_min CHECK (token_count >= 0), + + -- Lifecycle timestamps + created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + + CONSTRAINT chat_sessions_updated_after_created CHECK (updated_at >= created_at) +); + +-- Triggers for chat_sessions table +SELECT setup_updated_at('chat_sessions'); + +-- Indexes for chat_sessions table +CREATE INDEX chat_sessions_workspace_idx + ON chat_sessions (workspace_id, created_at DESC); + +CREATE INDEX chat_sessions_account_idx + ON chat_sessions (account_id, created_at DESC); + +CREATE INDEX chat_sessions_file_idx + ON chat_sessions (primary_file_id); + +CREATE INDEX chat_sessions_status_idx + ON chat_sessions (session_status, workspace_id) + WHERE session_status = 'active'; + +-- Comments for chat_sessions table +COMMENT ON TABLE chat_sessions IS + 'LLM-assisted document editing sessions.'; + +COMMENT ON COLUMN chat_sessions.id IS 'Unique session identifier'; +COMMENT ON COLUMN chat_sessions.workspace_id IS 'Reference to the workspace'; +COMMENT ON COLUMN chat_sessions.account_id IS 'Account that created the session'; +COMMENT ON COLUMN chat_sessions.primary_file_id IS 'Primary file being edited in this session'; +COMMENT ON COLUMN chat_sessions.display_name IS 'User-friendly session name (1-255 chars)'; +COMMENT ON COLUMN chat_sessions.session_status IS 'Session lifecycle status (active, paused, archived)'; +COMMENT ON COLUMN chat_sessions.model_config IS 'LLM configuration (model, temperature, etc.)'; +COMMENT ON COLUMN chat_sessions.message_count IS 'Total number of messages exchanged in this session'; +COMMENT ON COLUMN chat_sessions.token_count IS 'Total tokens used in this session'; +COMMENT ON COLUMN chat_sessions.created_at IS 'Timestamp when session was created'; +COMMENT ON COLUMN chat_sessions.updated_at IS 'Timestamp when session was last modified'; + +-- Tool execution status +CREATE TYPE CHAT_TOOL_STATUS AS ENUM ( + 'pending', + 'running', + 'completed', + 'cancelled' +); + +COMMENT ON TYPE CHAT_TOOL_STATUS IS + 'Execution status for chat tool calls.'; + +-- Chat tool calls table definition +CREATE TABLE chat_tool_calls ( + -- Primary identifier + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + + -- References + session_id UUID NOT NULL REFERENCES chat_sessions (id) ON DELETE CASCADE, + file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, + chunk_id UUID DEFAULT NULL REFERENCES document_chunks (id) ON DELETE SET NULL, + + -- Tool attributes + tool_name TEXT NOT NULL, + tool_input JSONB NOT NULL DEFAULT '{}', + tool_output JSONB NOT NULL DEFAULT '{}', + tool_status CHAT_TOOL_STATUS NOT NULL DEFAULT 'pending', + + CONSTRAINT chat_tool_calls_tool_name_length CHECK (length(trim(tool_name)) BETWEEN 1 AND 128), + CONSTRAINT chat_tool_calls_tool_input_size CHECK (length(tool_input::TEXT) BETWEEN 2 AND 65536), + CONSTRAINT chat_tool_calls_tool_output_size CHECK (length(tool_output::TEXT) BETWEEN 2 AND 65536), + + -- Timing + started_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + completed_at TIMESTAMPTZ DEFAULT NULL, + + CONSTRAINT chat_tool_calls_completed_after_started CHECK (completed_at IS NULL OR completed_at >= started_at) +); + +-- Indexes for chat_tool_calls table +CREATE INDEX chat_tool_calls_session_idx + ON chat_tool_calls (session_id, started_at DESC); + +CREATE INDEX chat_tool_calls_file_idx + ON chat_tool_calls (file_id, started_at DESC); + +CREATE INDEX chat_tool_calls_status_idx + ON chat_tool_calls (tool_status, started_at DESC) + WHERE tool_status IN ('pending', 'running'); + +CREATE INDEX chat_tool_calls_tool_name_idx + ON chat_tool_calls (tool_name); + +-- Comments for chat_tool_calls table +COMMENT ON TABLE chat_tool_calls IS + 'Tool invocations for debugging and usage tracking. Input/output contain references, not document content.'; + +COMMENT ON COLUMN chat_tool_calls.id IS 'Unique tool call identifier'; +COMMENT ON COLUMN chat_tool_calls.session_id IS 'Reference to the chat session'; +COMMENT ON COLUMN chat_tool_calls.file_id IS 'Reference to the file being operated on'; +COMMENT ON COLUMN chat_tool_calls.chunk_id IS 'Optional reference to a specific chunk'; +COMMENT ON COLUMN chat_tool_calls.tool_name IS 'Name of the tool (merge, split, redact, translate, etc.)'; +COMMENT ON COLUMN chat_tool_calls.tool_input IS 'Tool parameters as JSON (references, not content)'; +COMMENT ON COLUMN chat_tool_calls.tool_output IS 'Tool result as JSON (references, not content)'; +COMMENT ON COLUMN chat_tool_calls.tool_status IS 'Execution status (pending, running, completed, cancelled)'; +COMMENT ON COLUMN chat_tool_calls.started_at IS 'Timestamp when tool call was created/started'; +COMMENT ON COLUMN chat_tool_calls.completed_at IS 'Timestamp when tool execution completed'; + +-- Chat operations table definition +CREATE TABLE chat_operations ( + -- Primary identifier + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + + -- References + tool_call_id UUID NOT NULL REFERENCES chat_tool_calls (id) ON DELETE CASCADE, + file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, + chunk_id UUID DEFAULT NULL REFERENCES document_chunks (id) ON DELETE SET NULL, + + -- Operation attributes + operation_type TEXT NOT NULL, + operation_diff JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT chat_operations_operation_type_length CHECK (length(trim(operation_type)) BETWEEN 1 AND 64), + CONSTRAINT chat_operations_operation_diff_size CHECK (length(operation_diff::TEXT) BETWEEN 2 AND 131072), + + -- Application state + applied BOOLEAN NOT NULL DEFAULT FALSE, + reverted BOOLEAN NOT NULL DEFAULT FALSE, + + CONSTRAINT chat_operations_revert_requires_applied CHECK (NOT reverted OR applied), + + -- Timing + created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + applied_at TIMESTAMPTZ DEFAULT NULL, + + CONSTRAINT chat_operations_applied_after_created CHECK (applied_at IS NULL OR applied_at >= created_at) +); + +-- Indexes for chat_operations table +CREATE INDEX chat_operations_tool_call_idx + ON chat_operations (tool_call_id); + +CREATE INDEX chat_operations_file_idx + ON chat_operations (file_id, created_at DESC); + +CREATE INDEX chat_operations_pending_idx + ON chat_operations (file_id, applied) + WHERE NOT applied; + +-- Comments for chat_operations table +COMMENT ON TABLE chat_operations IS + 'Document operations (diffs) produced by tool calls. Stores positions, not content.'; + +COMMENT ON COLUMN chat_operations.id IS 'Unique operation identifier'; +COMMENT ON COLUMN chat_operations.tool_call_id IS 'Reference to the tool call that produced this operation'; +COMMENT ON COLUMN chat_operations.file_id IS 'Reference to the file being modified'; +COMMENT ON COLUMN chat_operations.chunk_id IS 'Optional reference to a specific chunk'; +COMMENT ON COLUMN chat_operations.operation_type IS 'Type of operation (insert, replace, delete, format, merge, split, etc.)'; +COMMENT ON COLUMN chat_operations.operation_diff IS 'The diff specification as JSON (positions, not content)'; +COMMENT ON COLUMN chat_operations.applied IS 'Whether this operation has been applied to the document'; +COMMENT ON COLUMN chat_operations.reverted IS 'Whether this operation was reverted by the user'; +COMMENT ON COLUMN chat_operations.created_at IS 'Timestamp when operation was created'; +COMMENT ON COLUMN chat_operations.applied_at IS 'Timestamp when operation was applied'; diff --git a/migrations/2026-01-09-002114_studio/down.sql b/migrations/2026-01-09-002114_studio/down.sql deleted file mode 100644 index 18160a6..0000000 --- a/migrations/2026-01-09-002114_studio/down.sql +++ /dev/null @@ -1,10 +0,0 @@ --- Revert studio migration - --- Drop tables in reverse order (respecting foreign key dependencies) -DROP TABLE IF EXISTS studio_operations; -DROP TABLE IF EXISTS studio_tool_calls; -DROP TABLE IF EXISTS studio_sessions; - --- Drop enums -DROP TYPE IF EXISTS STUDIO_TOOL_STATUS; -DROP TYPE IF EXISTS STUDIO_SESSION_STATUS; diff --git a/migrations/2026-01-09-002114_studio/up.sql b/migrations/2026-01-09-002114_studio/up.sql deleted file mode 100644 index 584f089..0000000 --- a/migrations/2026-01-09-002114_studio/up.sql +++ /dev/null @@ -1,202 +0,0 @@ --- Studio: LLM-powered document editing sessions and operations tracking - --- Studio session lifecycle status -CREATE TYPE STUDIO_SESSION_STATUS AS ENUM ( - 'active', - 'paused', - 'archived' -); - -COMMENT ON TYPE STUDIO_SESSION_STATUS IS - 'Lifecycle status for studio editing sessions.'; - --- Studio sessions table definition -CREATE TABLE studio_sessions ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, - account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - primary_file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - - -- Session attributes - display_name TEXT NOT NULL DEFAULT 'Untitled Session', - session_status STUDIO_SESSION_STATUS NOT NULL DEFAULT 'active', - - CONSTRAINT studio_sessions_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), - - -- Model configuration (model name, temperature, max tokens, etc.) - model_config JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT studio_sessions_model_config_size CHECK (length(model_config::TEXT) BETWEEN 2 AND 8192), - - -- Usage statistics - message_count INTEGER NOT NULL DEFAULT 0, - token_count INTEGER NOT NULL DEFAULT 0, - - CONSTRAINT studio_sessions_message_count_min CHECK (message_count >= 0), - CONSTRAINT studio_sessions_token_count_min CHECK (token_count >= 0), - - -- Lifecycle timestamps - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - - CONSTRAINT studio_sessions_updated_after_created CHECK (updated_at >= created_at) -); - --- Triggers for studio_sessions table -SELECT setup_updated_at('studio_sessions'); - --- Indexes for studio_sessions table -CREATE INDEX studio_sessions_workspace_idx - ON studio_sessions (workspace_id, created_at DESC); - -CREATE INDEX studio_sessions_account_idx - ON studio_sessions (account_id, created_at DESC); - -CREATE INDEX studio_sessions_file_idx - ON studio_sessions (primary_file_id); - -CREATE INDEX studio_sessions_status_idx - ON studio_sessions (session_status, workspace_id) - WHERE session_status = 'active'; - --- Comments for studio_sessions table -COMMENT ON TABLE studio_sessions IS - 'LLM-assisted document editing sessions.'; - -COMMENT ON COLUMN studio_sessions.id IS 'Unique session identifier'; -COMMENT ON COLUMN studio_sessions.workspace_id IS 'Reference to the workspace'; -COMMENT ON COLUMN studio_sessions.account_id IS 'Account that created the session'; -COMMENT ON COLUMN studio_sessions.primary_file_id IS 'Primary file being edited in this session'; -COMMENT ON COLUMN studio_sessions.display_name IS 'User-friendly session name (1-255 chars)'; -COMMENT ON COLUMN studio_sessions.session_status IS 'Session lifecycle status (active, paused, archived)'; -COMMENT ON COLUMN studio_sessions.model_config IS 'LLM configuration (model, temperature, etc.)'; -COMMENT ON COLUMN studio_sessions.message_count IS 'Total number of messages exchanged in this session'; -COMMENT ON COLUMN studio_sessions.token_count IS 'Total tokens used in this session'; -COMMENT ON COLUMN studio_sessions.created_at IS 'Timestamp when session was created'; -COMMENT ON COLUMN studio_sessions.updated_at IS 'Timestamp when session was last modified'; - --- Tool execution status -CREATE TYPE STUDIO_TOOL_STATUS AS ENUM ( - 'pending', - 'running', - 'completed', - 'cancelled' -); - -COMMENT ON TYPE STUDIO_TOOL_STATUS IS - 'Execution status for studio tool calls.'; - --- Studio tool calls table definition -CREATE TABLE studio_tool_calls ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - session_id UUID NOT NULL REFERENCES studio_sessions (id) ON DELETE CASCADE, - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - chunk_id UUID DEFAULT NULL REFERENCES document_chunks (id) ON DELETE SET NULL, - - -- Tool attributes - tool_name TEXT NOT NULL, - tool_input JSONB NOT NULL DEFAULT '{}', - tool_output JSONB NOT NULL DEFAULT '{}', - tool_status STUDIO_TOOL_STATUS NOT NULL DEFAULT 'pending', - - CONSTRAINT studio_tool_calls_tool_name_length CHECK (length(trim(tool_name)) BETWEEN 1 AND 128), - CONSTRAINT studio_tool_calls_tool_input_size CHECK (length(tool_input::TEXT) BETWEEN 2 AND 65536), - CONSTRAINT studio_tool_calls_tool_output_size CHECK (length(tool_output::TEXT) BETWEEN 2 AND 65536), - - -- Timing - started_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - completed_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT studio_tool_calls_completed_after_started CHECK (completed_at IS NULL OR completed_at >= started_at) -); - --- Indexes for studio_tool_calls table -CREATE INDEX studio_tool_calls_session_idx - ON studio_tool_calls (session_id, started_at DESC); - -CREATE INDEX studio_tool_calls_file_idx - ON studio_tool_calls (file_id, started_at DESC); - -CREATE INDEX studio_tool_calls_status_idx - ON studio_tool_calls (tool_status, started_at DESC) - WHERE tool_status IN ('pending', 'running'); - -CREATE INDEX studio_tool_calls_tool_name_idx - ON studio_tool_calls (tool_name); - --- Comments for studio_tool_calls table -COMMENT ON TABLE studio_tool_calls IS - 'Tool invocations for debugging and usage tracking. Input/output contain references, not document content.'; - -COMMENT ON COLUMN studio_tool_calls.id IS 'Unique tool call identifier'; -COMMENT ON COLUMN studio_tool_calls.session_id IS 'Reference to the studio session'; -COMMENT ON COLUMN studio_tool_calls.file_id IS 'Reference to the file being operated on'; -COMMENT ON COLUMN studio_tool_calls.chunk_id IS 'Optional reference to a specific chunk'; -COMMENT ON COLUMN studio_tool_calls.tool_name IS 'Name of the tool (merge, split, redact, translate, etc.)'; -COMMENT ON COLUMN studio_tool_calls.tool_input IS 'Tool parameters as JSON (references, not content)'; -COMMENT ON COLUMN studio_tool_calls.tool_output IS 'Tool result as JSON (references, not content)'; -COMMENT ON COLUMN studio_tool_calls.tool_status IS 'Execution status (pending, running, completed, cancelled)'; -COMMENT ON COLUMN studio_tool_calls.started_at IS 'Timestamp when tool call was created/started'; -COMMENT ON COLUMN studio_tool_calls.completed_at IS 'Timestamp when tool execution completed'; - --- Studio operations table definition -CREATE TABLE studio_operations ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - tool_call_id UUID NOT NULL REFERENCES studio_tool_calls (id) ON DELETE CASCADE, - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - chunk_id UUID DEFAULT NULL REFERENCES document_chunks (id) ON DELETE SET NULL, - - -- Operation attributes - operation_type TEXT NOT NULL, - operation_diff JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT studio_operations_operation_type_length CHECK (length(trim(operation_type)) BETWEEN 1 AND 64), - CONSTRAINT studio_operations_operation_diff_size CHECK (length(operation_diff::TEXT) BETWEEN 2 AND 131072), - - -- Application state - applied BOOLEAN NOT NULL DEFAULT FALSE, - reverted BOOLEAN NOT NULL DEFAULT FALSE, - - CONSTRAINT studio_operations_revert_requires_applied CHECK (NOT reverted OR applied), - - -- Timing - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - applied_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT studio_operations_applied_after_created CHECK (applied_at IS NULL OR applied_at >= created_at) -); - --- Indexes for studio_operations table -CREATE INDEX studio_operations_tool_call_idx - ON studio_operations (tool_call_id); - -CREATE INDEX studio_operations_file_idx - ON studio_operations (file_id, created_at DESC); - -CREATE INDEX studio_operations_pending_idx - ON studio_operations (file_id, applied) - WHERE NOT applied; - --- Comments for studio_operations table -COMMENT ON TABLE studio_operations IS - 'Document operations (diffs) produced by tool calls. Stores positions, not content.'; - -COMMENT ON COLUMN studio_operations.id IS 'Unique operation identifier'; -COMMENT ON COLUMN studio_operations.tool_call_id IS 'Reference to the tool call that produced this operation'; -COMMENT ON COLUMN studio_operations.file_id IS 'Reference to the file being modified'; -COMMENT ON COLUMN studio_operations.chunk_id IS 'Optional reference to a specific chunk'; -COMMENT ON COLUMN studio_operations.operation_type IS 'Type of operation (insert, replace, delete, format, merge, split, etc.)'; -COMMENT ON COLUMN studio_operations.operation_diff IS 'The diff specification as JSON (positions, not content)'; -COMMENT ON COLUMN studio_operations.applied IS 'Whether this operation has been applied to the document'; -COMMENT ON COLUMN studio_operations.reverted IS 'Whether this operation was reverted by the user'; -COMMENT ON COLUMN studio_operations.created_at IS 'Timestamp when operation was created'; -COMMENT ON COLUMN studio_operations.applied_at IS 'Timestamp when operation was applied'; From f7a650333fce21874fac99f553142ebee5260f75 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 19 Jan 2026 05:40:47 +0100 Subject: [PATCH 03/28] feat(runtime): add nvisy-runtime crate with archive service - Add new nvisy-runtime crate for runtime services - Implement archive extraction and handling - Add archive error handling to server - Update document file constraints and models - Add SSH access documentation for private dependencies - Update migrations for document handling --- CONTRIBUTING.md | 17 ++ Cargo.lock | 273 ++++++++++++++++-- Cargo.toml | 13 +- README.md | 3 + .../nvisy-postgres/src/model/document_file.rs | 2 - .../src/types/constraint/document_files.rs | 10 +- crates/nvisy-runtime/Cargo.toml | 42 +++ crates/nvisy-runtime/src/archive.rs | 196 +++++++++++++ crates/nvisy-runtime/src/lib.rs | 19 ++ crates/nvisy-runtime/src/service.rs | 157 ++++++++++ crates/nvisy-server/Cargo.toml | 6 +- .../src/handler/error/archive_error.rs | 31 ++ crates/nvisy-server/src/handler/error/mod.rs | 1 + .../src/handler/error/pg_document.rs | 5 + .../src/pipeline/preprocessing.rs | 2 +- crates/nvisy-server/src/service/mod.rs | 19 +- migrations/2025-05-27-011852_documents/up.sql | 52 ++++ 17 files changed, 797 insertions(+), 51 deletions(-) create mode 100644 crates/nvisy-runtime/Cargo.toml create mode 100644 crates/nvisy-runtime/src/archive.rs create mode 100644 crates/nvisy-runtime/src/lib.rs create mode 100644 crates/nvisy-runtime/src/service.rs create mode 100644 crates/nvisy-server/src/handler/error/archive_error.rs diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1de2919..3f45705 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,6 +17,23 @@ make install-all make generate-keys ``` +### SSH Access + +Some dependencies are fetched from private GitHub repositories via SSH. Ensure +your SSH key is added to your GitHub account and ssh-agent is running: + +```bash +eval "$(ssh-agent -s)" +ssh-add ~/.ssh/id_ed25519 +ssh -T git@github.com # verify access +``` + +If cargo fails to fetch git dependencies, enable CLI-based git fetching: + +```bash +export CARGO_NET_GIT_FETCH_WITH_CLI=true +``` + ## Development Run all CI checks locally before submitting a pull request: diff --git a/Cargo.lock b/Cargo.lock index 2e82068..a5e28d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,15 +168,6 @@ dependencies = [ "backtrace", ] -[[package]] -name = "arbitrary" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" -dependencies = [ - "derive_arbitrary", -] - [[package]] name = "arc-swap" version = "1.8.0" @@ -905,6 +896,27 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + [[package]] name = "curve25519-dalek" version = "4.1.3" @@ -1055,17 +1067,6 @@ dependencies = [ "serde_core", ] -[[package]] -name = "derive_arbitrary" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "derive_builder" version = "0.20.2" @@ -1587,6 +1588,21 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows-link", + "windows-result", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -1732,6 +1748,16 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hipstr" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07a5072958d04f9147e517881d929d3f4706612712f8f4cfcd247f2b716d5262" +dependencies = [ + "loom", + "serde", +] + [[package]] name = "hmac" version = "0.12.1" @@ -2283,6 +2309,19 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lru-slab" version = "0.1.2" @@ -2291,14 +2330,34 @@ checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" [[package]] name = "lzma-rust2" -version = "0.15.4" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48172246aa7c3ea28e423295dd1ca2589a24617cc4e588bb8cfe177cb2c54d95" +checksum = "1670343e58806300d87950e3401e820b519b9384281bbabfb15e3636689ffd69" dependencies = [ "crc", "sha2", ] +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "markdown" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5cab8f2cadc416a82d2e783a1946388b31654d391d1c7d92cc1f03e295b1deb" +dependencies = [ + "unicode-id", +] + [[package]] name = "matchers" version = "0.2.0" @@ -2653,6 +2712,142 @@ dependencies = [ "uuid", ] +[[package]] +name = "nvisy-rt-archive" +version = "0.1.0" +source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "bytes", + "bzip2", + "derive_more", + "flate2", + "nvisy-rt-core", + "strum", + "tar", + "tempfile", + "tokio", + "tracing", + "xz2", + "zip", +] + +[[package]] +name = "nvisy-rt-core" +version = "0.1.0" +source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "bytes", + "derive_more", + "hex", + "hipstr", + "jiff", + "serde", + "sha2", + "strum", + "thiserror 2.0.17", + "tokio", + "uuid", +] + +[[package]] +name = "nvisy-rt-document" +version = "0.1.0" +source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "base64", + "bytes", + "derive_more", + "jiff", + "nvisy-rt-core", + "serde", + "serde_json", + "strum", + "thiserror 2.0.17", + "tokio", + "uuid", +] + +[[package]] +name = "nvisy-rt-docx" +version = "0.1.0" +source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "bytes", + "nvisy-rt-document", + "thiserror 2.0.17", +] + +[[package]] +name = "nvisy-rt-engine" +version = "0.1.0" +source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "bytes", + "jiff", + "nvisy-rt-archive", + "nvisy-rt-document", + "nvisy-rt-docx", + "nvisy-rt-image", + "nvisy-rt-pdf", + "nvisy-rt-text", + "serde", + "tracing", + "uuid", +] + +[[package]] +name = "nvisy-rt-image" +version = "0.1.0" +source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "bytes", + "nvisy-rt-document", + "thiserror 2.0.17", +] + +[[package]] +name = "nvisy-rt-pdf" +version = "0.1.0" +source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "bytes", + "nvisy-rt-document", + "thiserror 2.0.17", +] + +[[package]] +name = "nvisy-rt-text" +version = "0.1.0" +source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +dependencies = [ + "async-trait", + "bytes", + "csv", + "markdown", + "nvisy-rt-document", + "serde_json", + "thiserror 2.0.17", +] + +[[package]] +name = "nvisy-runtime" +version = "0.1.0" +dependencies = [ + "clap", + "derive_more", + "nvisy-rt-core", + "nvisy-rt-document", + "nvisy-rt-engine", + "schemars 0.9.0", + "serde", + "thiserror 2.0.17", + "tokio", +] + [[package]] name = "nvisy-server" version = "0.1.0" @@ -2670,7 +2865,6 @@ dependencies = [ "clap", "derive_more", "dotenvy", - "flate2", "futures", "ipnet", "jiff", @@ -2678,6 +2872,7 @@ dependencies = [ "nvisy-nats", "nvisy-postgres", "nvisy-rig", + "nvisy-runtime", "nvisy-webhook", "rand 0.10.0-rc.6", "regex", @@ -2686,7 +2881,6 @@ dependencies = [ "serde_json", "sha2", "strum", - "tar", "tempfile", "thiserror 2.0.17", "tokio", @@ -2700,7 +2894,6 @@ dependencies = [ "uuid", "validator", "woothee", - "zip", "zxcvbn", ] @@ -3570,6 +3763,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" @@ -4469,6 +4668,12 @@ version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" +[[package]] +name = "unicode-id" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ba288e709927c043cbe476718d37be306be53fb1fafecd0dbe36d072be2580" + [[package]] name = "unicode-ident" version = "1.0.22" @@ -5013,6 +5218,15 @@ dependencies = [ "rustix", ] +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yansi" version = "1.0.1" @@ -5094,9 +5308,9 @@ dependencies = [ [[package]] name = "zeroize_derive" -version = "1.4.2" +version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", @@ -5139,12 +5353,11 @@ dependencies = [ [[package]] name = "zip" -version = "7.0.0" +version = "7.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdd8a47718a4ee5fe78e07667cd36f3de80e7c2bfe727c7074245ffc7303c037" +checksum = "9013f1222db8a6d680f13a7ccdc60a781199cd09c2fa4eff58e728bb181757fc" dependencies = [ "aes", - "arbitrary", "bzip2", "constant_time_eq", "crc32fast", diff --git a/Cargo.toml b/Cargo.toml index bbd309b..a29c2a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ members = [ "./crates/nvisy-nats", "./crates/nvisy-postgres", "./crates/nvisy-rig", + "./crates/nvisy-runtime", "./crates/nvisy-server", "./crates/nvisy-webhook", ] @@ -35,9 +36,16 @@ nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } nvisy-nats = { path = "./crates/nvisy-nats", version = "0.1.0" } nvisy-postgres = { path = "./crates/nvisy-postgres", version = "0.1.0" } nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" } +nvisy-runtime = { path = "./crates/nvisy-runtime", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } nvisy-webhook = { path = "./crates/nvisy-webhook", version = "0.1.0" } +# Runtime crates (from github.com/nvisycom/runtime) +nvisy-rt-archive = { git = "ssh://git@github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-core = { git = "ssh://git@github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-document = { git = "ssh://git@github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-engine = { git = "ssh://git@github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } + # CLI clap = { version = "4.5", features = [] } @@ -125,8 +133,3 @@ woothee = { version = "0.13", features = [] } # AI/ML frameworks rig-core = { version = "0.28", default-features = false, features = ["reqwest-rustls"] } - -# Archive/Compression -tar = { version = "0.4", features = [] } -flate2 = { version = "1.1", features = [] } -zip = { version = "7.0", features = [] } diff --git a/README.md b/README.md index d3b48f7..6c39e39 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,9 @@ make generate-all cargo run --features dotenv ``` +> **Note**: Some dependencies require SSH access to private GitHub repositories. +> See [CONTRIBUTING.md](CONTRIBUTING.md#ssh-access) for setup instructions. + ## Configuration See [.env.example](.env.example) for all available environment variables. diff --git a/crates/nvisy-postgres/src/model/document_file.rs b/crates/nvisy-postgres/src/model/document_file.rs index b407d3e..19896b2 100644 --- a/crates/nvisy-postgres/src/model/document_file.rs +++ b/crates/nvisy-postgres/src/model/document_file.rs @@ -81,8 +81,6 @@ pub struct NewDocumentFile { pub account_id: Uuid, /// Parent file ID (for derived files or version chains). pub parent_id: Option, - /// Version number (defaults to 1). - pub version_number: Option, /// Display name. pub display_name: Option, /// Original filename. diff --git a/crates/nvisy-postgres/src/types/constraint/document_files.rs b/crates/nvisy-postgres/src/types/constraint/document_files.rs index f95977a..3fa0533 100644 --- a/crates/nvisy-postgres/src/types/constraint/document_files.rs +++ b/crates/nvisy-postgres/src/types/constraint/document_files.rs @@ -42,6 +42,12 @@ pub enum DocumentFileConstraints { #[strum(serialize = "document_files_retention_period")] RetentionPeriod, + // File version constraints + #[strum(serialize = "document_files_version_number_min")] + VersionNumberMin, + #[strum(serialize = "document_files_parent_same_document")] + ParentSameDocument, + // File chronological constraints #[strum(serialize = "document_files_updated_after_created")] UpdatedAfterCreated, @@ -72,7 +78,9 @@ impl DocumentFileConstraints { | DocumentFileConstraints::StorageBucketNotEmpty | DocumentFileConstraints::FileHashSha256Length | DocumentFileConstraints::MetadataSize - | DocumentFileConstraints::RetentionPeriod => ConstraintCategory::Validation, + | DocumentFileConstraints::RetentionPeriod + | DocumentFileConstraints::VersionNumberMin + | DocumentFileConstraints::ParentSameDocument => ConstraintCategory::Validation, DocumentFileConstraints::UpdatedAfterCreated | DocumentFileConstraints::DeletedAfterCreated diff --git a/crates/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml new file mode 100644 index 0000000..7e14a2f --- /dev/null +++ b/crates/nvisy-runtime/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "nvisy-runtime" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +description = "Document processing runtime for nvisy" +keywords = ["document", "pdf", "docx", "archive", "processing"] +categories = ["document-processing", "parsing"] + +[features] +default = [] +config = ["clap"] + +[dependencies] +nvisy-rt-core = { workspace = true } +nvisy-rt-document = { workspace = true } +nvisy-rt-engine = { workspace = true } + +# CLI +clap = { workspace = true, features = ["derive", "env"], optional = true } + +# (De)serialization +serde = { workspace = true } +schemars = { workspace = true } + +# Derive macros +derive_more = { workspace = true, features = ["deref", "deref_mut"] } +thiserror = { workspace = true } + +# Async runtime +tokio = { workspace = true, features = ["fs"] } + +[dev-dependencies] +tokio = { workspace = true, features = ["macros", "rt-multi-thread", "fs"] } diff --git a/crates/nvisy-runtime/src/archive.rs b/crates/nvisy-runtime/src/archive.rs new file mode 100644 index 0000000..81056d6 --- /dev/null +++ b/crates/nvisy-runtime/src/archive.rs @@ -0,0 +1,196 @@ +//! Archive service for creating compressed archives. + +use derive_more::{Deref, DerefMut}; +use nvisy_rt_engine::{ArchiveRegistry, ArchiveType}; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Supported archive formats. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + JsonSchema +)] +#[serde(rename_all = "lowercase")] +pub enum ArchiveFormat { + /// ZIP archive format. + Zip, + /// TAR archive format (gzip compressed). + Tar, +} + +impl ArchiveFormat { + /// Returns the file extension for this format. + #[must_use] + pub fn extension(&self) -> &'static str { + match self { + Self::Zip => "zip", + Self::Tar => "tar.gz", + } + } + + /// Returns the MIME type for this format. + #[must_use] + pub fn mime_type(&self) -> &'static str { + match self { + Self::Zip => "application/zip", + Self::Tar => "application/x-tar", + } + } + + /// Converts to the underlying [`ArchiveType`]. + #[must_use] + pub fn to_archive_type(self) -> ArchiveType { + match self { + Self::Zip => ArchiveType::Zip, + Self::Tar => ArchiveType::TarGz, + } + } +} + +/// Error type for archive operations. +#[derive(Debug, thiserror::Error)] +pub enum ArchiveError { + /// Error from the archive library. + #[error("Archive error: {0}")] + Archive(#[from] nvisy_rt_engine::arc::Error), + + /// IO error during archive creation. + #[error("IO error: {0}")] + Io(#[from] std::io::Error), +} + +/// Result type for archive operations. +pub type ArchiveResult = Result; + +/// Service for creating compressed archives. +/// +/// This service derefs to the underlying [`ArchiveRegistry`]. +#[derive(Debug, Clone, Deref, DerefMut)] +pub struct ArchiveService { + #[deref] + #[deref_mut] + registry: ArchiveRegistry, +} + +impl ArchiveService { + /// Creates a new archive service with default settings. + /// + /// # Panics + /// + /// Panics if the temp directory cannot be created. + #[must_use] + pub fn new() -> Self { + Self { + registry: ArchiveRegistry::new(std::env::temp_dir().join("nvisy-archive")) + .expect("failed to create archive registry"), + } + } + + /// Creates an archive from a list of files. + /// + /// # Arguments + /// + /// * `files` - A list of (filename, content) tuples. + /// * `format` - The archive format to create. + /// + /// # Errors + /// + /// Returns an error if archive creation fails. + pub async fn create_archive( + &self, + files: Vec<(String, Vec)>, + format: ArchiveFormat, + ) -> ArchiveResult> { + let archive_type = format.to_archive_type(); + + // Create a handler for assembling files + let mut handler = self.registry.create_archive_dir(archive_type)?; + + // Write all files to the directory + for (filename, content) in files { + handler.write_file(&filename, &content).await?; + } + + // Pack into an archive and read the bytes + let archive_name = format!("archive.{}", format.extension()); + let archive_file = handler.pack(&archive_name).await?; + let archive_path = archive_file + .path() + .ok_or_else(|| ArchiveError::Io(std::io::Error::other("Archive has no path")))?; + let archive_bytes = tokio::fs::read(archive_path).await?; + + Ok(archive_bytes) + } +} + +impl Default for ArchiveService { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_archive_format_extension() { + assert_eq!(ArchiveFormat::Zip.extension(), "zip"); + assert_eq!(ArchiveFormat::Tar.extension(), "tar.gz"); + } + + #[test] + fn test_archive_format_mime_type() { + assert_eq!(ArchiveFormat::Zip.mime_type(), "application/zip"); + assert_eq!(ArchiveFormat::Tar.mime_type(), "application/x-tar"); + } + + #[test] + fn test_archive_format_to_archive_type() { + assert_eq!(ArchiveFormat::Zip.to_archive_type(), ArchiveType::Zip); + assert_eq!(ArchiveFormat::Tar.to_archive_type(), ArchiveType::TarGz); + } + + #[tokio::test] + async fn test_create_zip_archive() { + let service = ArchiveService::new(); + let files = vec![ + ("test1.txt".to_string(), b"Hello".to_vec()), + ("test2.txt".to_string(), b"World".to_vec()), + ]; + + let archive = service + .create_archive(files, ArchiveFormat::Zip) + .await + .unwrap(); + assert!(!archive.is_empty()); + + // Verify it's a valid ZIP (starts with PK signature) + assert_eq!(&archive[0..2], b"PK"); + } + + #[tokio::test] + async fn test_create_tar_archive() { + let service = ArchiveService::new(); + let files = vec![ + ("test1.txt".to_string(), b"Hello".to_vec()), + ("test2.txt".to_string(), b"World".to_vec()), + ]; + + let archive = service + .create_archive(files, ArchiveFormat::Tar) + .await + .unwrap(); + assert!(!archive.is_empty()); + + // Verify it's a valid gzip (starts with 0x1f 0x8b) + assert_eq!(&archive[0..2], &[0x1f, 0x8b]); + } +} diff --git a/crates/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs new file mode 100644 index 0000000..01e8c9a --- /dev/null +++ b/crates/nvisy-runtime/src/lib.rs @@ -0,0 +1,19 @@ +//! Document processing runtime for nvisy. +//! +//! This crate provides a service wrapper around the nvisy runtime engine, +//! integrating document processing capabilities with the server infrastructure. + +mod archive; +mod service; + +pub use nvisy_rt_core as rt_core; +pub use nvisy_rt_engine as rt_engine; + +pub use archive::{ArchiveError, ArchiveFormat, ArchiveResult, ArchiveService}; +pub use service::{RuntimeConfig, RuntimeService}; + +// Re-export commonly used types from the engine +pub use nvisy_rt_engine::{ + BoundingBox, Capabilities, Document, DocumentFormat, Engine, EngineConfig, FormatRegistry, + LoadedDocument, Point, Region, RegionId, RegionKind, doc, +}; diff --git a/crates/nvisy-runtime/src/service.rs b/crates/nvisy-runtime/src/service.rs new file mode 100644 index 0000000..9862dd0 --- /dev/null +++ b/crates/nvisy-runtime/src/service.rs @@ -0,0 +1,157 @@ +//! Runtime service for document processing. + +use derive_more::{Deref, DerefMut}; +use nvisy_rt_engine::{Engine, EngineConfig}; +use serde::{Deserialize, Serialize}; + +#[cfg(feature = "config")] +use clap::Args; + +/// Configuration for the runtime service with sensible defaults. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(feature = "config", derive(Args))] +pub struct RuntimeConfig { + /// Maximum file size in bytes (optional). + #[cfg_attr( + feature = "config", + arg(long = "runtime-max-file-size", env = "RUNTIME_MAX_FILE_SIZE") + )] + pub runtime_max_file_size: Option, +} + +// Default values +const DEFAULT_MAX_FILE_SIZE: u64 = 100 * 1024 * 1024; // 100 MB + +impl RuntimeConfig { + /// Creates a new runtime configuration with defaults. + #[must_use] + pub fn new() -> Self { + Self { + runtime_max_file_size: None, + } + } + + /// Returns the maximum file size, using the default if not set. + #[inline] + #[must_use] + pub fn max_file_size(&self) -> u64 { + self.runtime_max_file_size.unwrap_or(DEFAULT_MAX_FILE_SIZE) + } + + /// Set the maximum file size in bytes. + #[must_use] + pub fn with_max_file_size(mut self, size: u64) -> Self { + self.runtime_max_file_size = Some(size); + self + } + + /// Validate the configuration and return any issues. + pub fn validate(&self) -> Result<(), String> { + if self.runtime_max_file_size == Some(0) { + return Err("Maximum file size cannot be zero".to_string()); + } + Ok(()) + } +} + +impl Default for RuntimeConfig { + fn default() -> Self { + Self::new() + } +} + +/// Runtime service for document processing. +/// +/// Wraps the nvisy runtime engine and provides document loading +/// and processing capabilities for the server. +/// +/// This service derefs to the underlying [`Engine`], allowing direct +/// access to all engine methods. +#[derive(Debug, Clone, Deref, DerefMut)] +pub struct RuntimeService { + #[deref] + #[deref_mut] + engine: Engine, +} + +impl RuntimeService { + /// Creates a new runtime service with default configuration. + #[must_use] + pub fn new() -> Self { + Self { + engine: Engine::new(), + } + } + + /// Creates a new runtime service with custom configuration. + #[must_use] + pub fn with_config(config: &RuntimeConfig) -> Self { + let engine_config = EngineConfig { + max_file_size: Some(config.max_file_size()), + ..Default::default() + }; + Self { + engine: Engine::with_config(engine_config), + } + } + + /// Returns a reference to the underlying engine. + #[must_use] + pub fn engine(&self) -> &Engine { + &self.engine + } + + /// Returns a mutable reference to the underlying engine. + #[must_use] + pub fn engine_mut(&mut self) -> &mut Engine { + &mut self.engine + } +} + +impl Default for RuntimeService { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_config() { + let config = RuntimeConfig::new(); + assert_eq!(config.max_file_size(), DEFAULT_MAX_FILE_SIZE); + assert!(config.validate().is_ok()); + } + + #[test] + fn test_config_builder() { + let config = RuntimeConfig::new().with_max_file_size(50 * 1024 * 1024); + assert_eq!(config.max_file_size(), 50 * 1024 * 1024); + assert!(config.validate().is_ok()); + } + + #[test] + fn test_config_validation() { + let valid_config = RuntimeConfig::new(); + assert!(valid_config.validate().is_ok()); + + let zero_size = RuntimeConfig::new().with_max_file_size(0); + assert!(zero_size.validate().is_err()); + } + + #[test] + fn test_service_deref() { + let service = RuntimeService::new(); + // Test that we can call Engine methods directly via Deref + let _extensions = service.supported_extensions(); + let _mimes = service.supported_mime_types(); + } + + #[test] + fn test_service_with_config() { + let config = RuntimeConfig::new().with_max_file_size(10 * 1024 * 1024); + let _service = RuntimeService::with_config(&config); + } +} diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index 2ef60e8..e94e498 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -36,6 +36,7 @@ config = [ nvisy-nats = { workspace = true, features = ["schema"] } nvisy-postgres = { workspace = true, features = ["schema"] } nvisy-rig = { workspace = true, features = [] } +nvisy-runtime = { workspace = true } nvisy-webhook = { workspace = true, features = ["schema"] } # Async runtime @@ -76,11 +77,6 @@ validator = { workspace = true, features = [] } regex = { workspace = true, features = [] } woothee = { workspace = true, features = [] } -# Archive/Compression -tar = { workspace = true, features = [] } -flate2 = { workspace = true, features = [] } -zip = { workspace = true, features = [] } - # Derive macros & utilities thiserror = { workspace = true, features = [] } derive_more = { workspace = true, features = ["full"] } diff --git a/crates/nvisy-server/src/handler/error/archive_error.rs b/crates/nvisy-server/src/handler/error/archive_error.rs new file mode 100644 index 0000000..cf2c346 --- /dev/null +++ b/crates/nvisy-server/src/handler/error/archive_error.rs @@ -0,0 +1,31 @@ +//! Archive error to HTTP error conversion implementation. +//! +//! This module provides conversion from nvisy-runtime archive errors to appropriate +//! HTTP errors with proper status codes and user-friendly messages. + +use nvisy_runtime::ArchiveError; + +use super::http_error::{Error as HttpError, ErrorKind}; + +/// Tracing target for archive error conversions. +const TRACING_TARGET: &str = "nvisy_server::handler::archive"; + +impl From for HttpError<'static> { + fn from(error: ArchiveError) -> Self { + tracing::error!( + target: TRACING_TARGET, + error = %error, + "Archive operation failed" + ); + + match error { + ArchiveError::Archive(e) => ErrorKind::InternalServerError + .with_message("Failed to create archive") + .with_context(e.to_string()), + + ArchiveError::Io(e) => ErrorKind::InternalServerError + .with_message("Archive I/O error") + .with_context(e.to_string()), + } + } +} diff --git a/crates/nvisy-server/src/handler/error/mod.rs b/crates/nvisy-server/src/handler/error/mod.rs index 2d9d182..7512732 100644 --- a/crates/nvisy-server/src/handler/error/mod.rs +++ b/crates/nvisy-server/src/handler/error/mod.rs @@ -1,5 +1,6 @@ //! [`Error`], [`ErrorKind`] and [`Result`]. +mod archive_error; mod http_error; mod nats_error; mod pg_account; diff --git a/crates/nvisy-server/src/handler/error/pg_document.rs b/crates/nvisy-server/src/handler/error/pg_document.rs index 0862561..9b7a925 100644 --- a/crates/nvisy-server/src/handler/error/pg_document.rs +++ b/crates/nvisy-server/src/handler/error/pg_document.rs @@ -63,6 +63,11 @@ impl From for Error<'static> { DocumentFileConstraints::TagsCountMax => { ErrorKind::BadRequest.with_message("Maximum number of tags exceeded") } + DocumentFileConstraints::VersionNumberMin => { + ErrorKind::BadRequest.with_message("Version number must be at least 1") + } + DocumentFileConstraints::ParentSameDocument => ErrorKind::BadRequest + .with_message("Parent file must belong to the same document"), DocumentFileConstraints::UpdatedAfterCreated | DocumentFileConstraints::DeletedAfterCreated | DocumentFileConstraints::DeletedAfterUpdated diff --git a/crates/nvisy-server/src/pipeline/preprocessing.rs b/crates/nvisy-server/src/pipeline/preprocessing.rs index bce4f01..65f8fc7 100644 --- a/crates/nvisy-server/src/pipeline/preprocessing.rs +++ b/crates/nvisy-server/src/pipeline/preprocessing.rs @@ -37,7 +37,7 @@ impl JobHandler for PreprocessingHandler { // TODO: Implement metadata validation // - Format detection // - File integrity checks - // - Metadata extraction and fixes + // - Metadata extraction } // Step 2: Run OCR diff --git a/crates/nvisy-server/src/service/mod.rs b/crates/nvisy-server/src/service/mod.rs index 95251f7..29af2b7 100644 --- a/crates/nvisy-server/src/service/mod.rs +++ b/crates/nvisy-server/src/service/mod.rs @@ -1,7 +1,6 @@ //! Application state and dependency injection. mod cache; -mod compression; mod config; mod integration; mod security; @@ -9,11 +8,14 @@ mod security; use nvisy_nats::NatsClient; use nvisy_postgres::PgClient; use nvisy_rig::RigService; +use nvisy_runtime::RuntimeService; use nvisy_webhook::WebhookService; +// Re-export archive types for handler use +pub use nvisy_runtime::{ArchiveFormat, ArchiveResult, ArchiveService}; + use crate::Result; pub use crate::service::cache::HealthCache; -pub use crate::service::compression::{ArchiveFormat, ArchiveService}; pub use crate::service::config::ServiceConfig; pub use crate::service::integration::IntegrationProvider; pub use crate::service::security::{ @@ -33,12 +35,13 @@ pub struct ServiceState { pub nats: NatsClient, pub webhook: WebhookService, - // AI services: + // AI & document services: pub rig: RigService, + pub runtime: RuntimeService, + pub archive: ArchiveService, // Internal services: pub health_cache: HealthCache, - pub archive_service: ArchiveService, pub integration_provider: IntegrationProvider, pub password_hasher: PasswordHasher, pub password_strength: PasswordStrength, @@ -74,9 +77,10 @@ impl ServiceState { webhook: webhook_service, rig, + runtime: RuntimeService::new(), + archive: ArchiveService::new(), health_cache: HealthCache::new(), - archive_service: ArchiveService::new(), integration_provider: IntegrationProvider::new(), password_hasher: PasswordHasher::new(), password_strength: PasswordStrength::new(), @@ -103,12 +107,13 @@ impl_di!(postgres: PgClient); impl_di!(nats: NatsClient); impl_di!(webhook: WebhookService); -// AI services: +// AI and document services: impl_di!(rig: RigService); +impl_di!(runtime: RuntimeService); +impl_di!(archive: ArchiveService); // Internal services: impl_di!(health_cache: HealthCache); -impl_di!(archive_service: ArchiveService); impl_di!(integration_provider: IntegrationProvider); impl_di!(password_hasher: PasswordHasher); impl_di!(password_strength: PasswordStrength); diff --git a/migrations/2025-05-27-011852_documents/up.sql b/migrations/2025-05-27-011852_documents/up.sql index 9b48ccd..4b64708 100644 --- a/migrations/2025-05-27-011852_documents/up.sql +++ b/migrations/2025-05-27-011852_documents/up.sql @@ -212,6 +212,58 @@ CREATE INDEX document_files_version_chain_idx ON document_files (parent_id, version_number DESC) WHERE parent_id IS NOT NULL AND deleted_at IS NULL; +-- Trigger function to ensure parent file is from the same document +CREATE OR REPLACE FUNCTION check_parent_same_document() +RETURNS TRIGGER AS $$ +BEGIN + IF NEW.parent_id IS NOT NULL THEN + IF NOT EXISTS ( + SELECT 1 FROM document_files + WHERE id = NEW.parent_id + AND (document_id IS NOT DISTINCT FROM NEW.document_id) + ) THEN + RAISE EXCEPTION 'Parent file must belong to the same document' + USING ERRCODE = 'check_violation', + CONSTRAINT = 'document_files_parent_same_document'; + END IF; + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER document_files_parent_same_document_trigger + BEFORE INSERT OR UPDATE OF parent_id, document_id ON document_files + FOR EACH ROW + EXECUTE FUNCTION check_parent_same_document(); + +COMMENT ON FUNCTION check_parent_same_document() IS + 'Ensures parent_id references a file in the same document.'; + +-- Trigger function to auto-set version_number based on parent +CREATE OR REPLACE FUNCTION set_file_version_number() +RETURNS TRIGGER AS $$ +BEGIN + -- If parent_id is set, calculate version as parent's version + 1 + IF NEW.parent_id IS NOT NULL THEN + SELECT version_number + 1 INTO NEW.version_number + FROM document_files + WHERE id = NEW.parent_id; + ELSE + -- No parent means version 1 + NEW.version_number := 1; + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER document_files_set_version_trigger + BEFORE INSERT ON document_files + FOR EACH ROW + EXECUTE FUNCTION set_file_version_number(); + +COMMENT ON FUNCTION set_file_version_number() IS + 'Automatically sets version_number based on parent file version.'; + -- Add table and column comments COMMENT ON TABLE document_files IS 'Source files for document processing with pipeline management and version tracking.'; From 1edc1e99fb763e885cd50f306466006872450495 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 19 Jan 2026 08:54:43 +0100 Subject: [PATCH 04/28] feat(runtime, opendal): rename workflow to runtime, add cloud storage backends - Rename nvisy-workflow crate to nvisy-runtime - Move RuntimeConfig and RuntimeService into nvisy-runtime - Delete nvisy-runtime archive functionality (was separate crate) - Update nvisy-opendal: remove fs/memory backends - Add cloud storage backends: azblob, gdrive, dropbox, onedrive - Apply derive improvements (thiserror, derive_more) - Add prelude module with doc(hidden) - Update sources/sinks to match opendal providers: - S3, Gcs, AzureBlob, GoogleDrive, Dropbox, OneDrive - Remove FileStorage and DirectoryWatch - Delete delete_multiple_files endpoint from files handler - Delete archive endpoints and ArchiveService from server - Add pipeline handler with CRUD operations - Add pipeline-specific permissions - Update file/annotation models and handlers --- Cargo.lock | 437 +++++++++++++- Cargo.toml | 6 + crates/nvisy-opendal/Cargo.toml | 72 +++ crates/nvisy-opendal/README.md | 44 ++ crates/nvisy-opendal/src/backend.rs | 293 ++++++++++ crates/nvisy-opendal/src/config.rs | 244 ++++++++ crates/nvisy-opendal/src/error.rs | 98 ++++ crates/nvisy-opendal/src/lib.rs | 17 + crates/nvisy-opendal/src/prelude.rs | 5 + .../src/model/chat_operation.rs | 186 ------ .../nvisy-postgres/src/model/chat_session.rs | 168 ------ .../src/model/chat_tool_call.rs | 155 ----- crates/nvisy-postgres/src/model/document.rs | 143 ----- .../src/model/document_comment.rs | 152 ----- .../src/model/{document_file.rs => file.rs} | 152 ++--- ...ument_annotation.rs => file_annotation.rs} | 42 +- .../{document_chunk.rs => file_chunk.rs} | 50 +- crates/nvisy-postgres/src/model/mod.rs | 38 +- crates/nvisy-postgres/src/model/pipeline.rs | 147 +++++ .../nvisy-postgres/src/model/pipeline_run.rs | 177 ++++++ .../src/query/chat_operation.rs | 371 ------------ .../nvisy-postgres/src/query/chat_session.rs | 291 ---------- .../src/query/chat_tool_call.rs | 310 ---------- crates/nvisy-postgres/src/query/document.rs | 340 ----------- .../src/query/document_chunk.rs | 380 ------------- .../src/query/document_comment.rs | 316 ----------- .../src/query/{document_file.rs => file.rs} | 238 +++----- ...ument_annotation.rs => file_annotation.rs} | 169 +++--- crates/nvisy-postgres/src/query/file_chunk.rs | 338 +++++++++++ crates/nvisy-postgres/src/query/mod.rs | 31 +- crates/nvisy-postgres/src/query/pipeline.rs | 361 ++++++++++++ .../nvisy-postgres/src/query/pipeline_run.rs | 536 +++++++++++++++++ crates/nvisy-postgres/src/schema.rs | 175 ++---- .../src/types/constraint/chat_operations.rs | 61 -- .../src/types/constraint/chat_sessions.rs | 61 -- .../src/types/constraint/chat_tool_calls.rs | 58 -- .../types/constraint/document_annotations.rs | 68 --- .../src/types/constraint/document_chunks.rs | 79 --- .../src/types/constraint/document_comments.rs | 68 --- .../src/types/constraint/document_files.rs | 107 ---- .../src/types/constraint/document_versions.rs | 107 ---- .../src/types/constraint/documents.rs | 69 --- .../src/types/constraint/file_annotations.rs | 64 +++ .../src/types/constraint/file_chunks.rs | 79 +++ .../src/types/constraint/files.rs | 94 +++ .../src/types/constraint/mod.rs | 150 ++--- .../src/types/constraint/pipeline_runs.rs | 73 +++ .../src/types/constraint/pipelines.rs | 71 +++ .../src/types/enums/chat_session_status.rs | 89 --- .../src/types/enums/chat_tool_status.rs | 99 ---- .../src/types/enums/content_segmentation.rs | 59 -- crates/nvisy-postgres/src/types/enums/mod.rs | 21 +- .../src/types/enums/pipeline_run_status.rs | 105 ++++ .../src/types/enums/pipeline_status.rs | 65 +++ .../src/types/enums/pipeline_trigger_type.rs | 68 +++ .../src/types/enums/processing_status.rs | 111 ---- .../src/types/enums/require_mode.rs | 122 ---- crates/nvisy-postgres/src/types/mod.rs | 20 +- crates/nvisy-rig/src/rag/indexer/indexed.rs | 6 +- crates/nvisy-rig/src/rag/indexer/mod.rs | 14 +- crates/nvisy-rig/src/rag/searcher/mod.rs | 12 +- crates/nvisy-rig/src/rag/searcher/scope.rs | 17 +- crates/nvisy-runtime/Cargo.toml | 42 +- crates/nvisy-runtime/README.md | 69 +++ crates/nvisy-runtime/src/archive.rs | 196 ------- crates/nvisy-runtime/src/engine/config.rs | 48 ++ crates/nvisy-runtime/src/engine/executor.rs | 112 ++++ crates/nvisy-runtime/src/engine/mod.rs | 11 + crates/nvisy-runtime/src/error.rs | 54 ++ crates/nvisy-runtime/src/graph/edge.rs | 59 ++ crates/nvisy-runtime/src/graph/mod.rs | 11 + crates/nvisy-runtime/src/graph/workflow.rs | 286 ++++++++++ crates/nvisy-runtime/src/lib.rs | 28 +- crates/nvisy-runtime/src/node/data.rs | 229 ++++++++ crates/nvisy-runtime/src/node/id.rs | 61 ++ crates/nvisy-runtime/src/node/mod.rs | 13 + crates/nvisy-runtime/src/prelude.rs | 14 + crates/nvisy-runtime/src/runtime/config.rs | 79 +++ crates/nvisy-runtime/src/runtime/mod.rs | 16 + crates/nvisy-runtime/src/runtime/service.rs | 79 +++ crates/nvisy-runtime/src/service.rs | 157 ----- crates/nvisy-server/Cargo.toml | 1 - .../src/extract/auth/auth_provider.rs | 54 +- .../src/extract/auth/permission.rs | 62 +- .../nvisy-server/src/handler/annotations.rs | 44 +- crates/nvisy-server/src/handler/chat.rs | 420 -------------- crates/nvisy-server/src/handler/comments.rs | 258 --------- crates/nvisy-server/src/handler/documents.rs | 288 ---------- .../src/handler/error/archive_error.rs | 31 - crates/nvisy-server/src/handler/error/mod.rs | 3 +- .../nvisy-server/src/handler/error/pg_chat.rs | 65 --- .../src/handler/error/pg_document.rs | 202 ++----- .../src/handler/error/pg_error.rs | 14 +- .../src/handler/error/pg_pipeline.rs | 50 ++ crates/nvisy-server/src/handler/files.rs | 242 +------- crates/nvisy-server/src/handler/mod.rs | 8 +- crates/nvisy-server/src/handler/pipelines.rs | 316 +++++++++++ .../src/handler/request/annotations.rs | 13 +- .../nvisy-server/src/handler/request/chat.rs | 74 --- .../src/handler/request/comments.rs | 57 -- .../src/handler/request/documents.rs | 67 --- .../nvisy-server/src/handler/request/files.rs | 65 +-- .../nvisy-server/src/handler/request/mod.rs | 8 +- .../nvisy-server/src/handler/request/paths.rs | 33 +- .../src/handler/request/pipelines.rs | 100 ++++ .../src/handler/response/annotations.rs | 6 +- .../nvisy-server/src/handler/response/chat.rs | 94 --- .../src/handler/response/comments.rs | 50 -- .../src/handler/response/documents.rs | 50 -- .../src/handler/response/files.rs | 61 +- .../nvisy-server/src/handler/response/mod.rs | 8 +- .../src/handler/response/pipelines.rs | 111 ++++ .../src/middleware/specification.rs | 14 +- crates/nvisy-server/src/service/mod.rs | 14 +- .../2025-05-27-011852_documents/down.sql | 26 +- migrations/2025-05-27-011852_documents/up.sql | 537 +++++------------- migrations/2026-01-09-002114_chat/down.sql | 10 - migrations/2026-01-09-002114_chat/up.sql | 202 ------- .../2026-01-19-045012_pipelines/down.sql | 11 + migrations/2026-01-19-045012_pipelines/up.sql | 231 ++++++++ 120 files changed, 6173 insertions(+), 7790 deletions(-) create mode 100644 crates/nvisy-opendal/Cargo.toml create mode 100644 crates/nvisy-opendal/README.md create mode 100644 crates/nvisy-opendal/src/backend.rs create mode 100644 crates/nvisy-opendal/src/config.rs create mode 100644 crates/nvisy-opendal/src/error.rs create mode 100644 crates/nvisy-opendal/src/lib.rs create mode 100644 crates/nvisy-opendal/src/prelude.rs delete mode 100644 crates/nvisy-postgres/src/model/chat_operation.rs delete mode 100644 crates/nvisy-postgres/src/model/chat_session.rs delete mode 100644 crates/nvisy-postgres/src/model/chat_tool_call.rs delete mode 100644 crates/nvisy-postgres/src/model/document.rs delete mode 100644 crates/nvisy-postgres/src/model/document_comment.rs rename crates/nvisy-postgres/src/model/{document_file.rs => file.rs} (56%) rename crates/nvisy-postgres/src/model/{document_annotation.rs => file_annotation.rs} (76%) rename crates/nvisy-postgres/src/model/{document_chunk.rs => file_chunk.rs} (74%) create mode 100644 crates/nvisy-postgres/src/model/pipeline.rs create mode 100644 crates/nvisy-postgres/src/model/pipeline_run.rs delete mode 100644 crates/nvisy-postgres/src/query/chat_operation.rs delete mode 100644 crates/nvisy-postgres/src/query/chat_session.rs delete mode 100644 crates/nvisy-postgres/src/query/chat_tool_call.rs delete mode 100644 crates/nvisy-postgres/src/query/document.rs delete mode 100644 crates/nvisy-postgres/src/query/document_chunk.rs delete mode 100644 crates/nvisy-postgres/src/query/document_comment.rs rename crates/nvisy-postgres/src/query/{document_file.rs => file.rs} (64%) rename crates/nvisy-postgres/src/query/{document_annotation.rs => file_annotation.rs} (54%) create mode 100644 crates/nvisy-postgres/src/query/file_chunk.rs create mode 100644 crates/nvisy-postgres/src/query/pipeline.rs create mode 100644 crates/nvisy-postgres/src/query/pipeline_run.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/chat_operations.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/chat_sessions.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/chat_tool_calls.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/document_annotations.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/document_chunks.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/document_comments.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/document_files.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/document_versions.rs delete mode 100644 crates/nvisy-postgres/src/types/constraint/documents.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/file_annotations.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/file_chunks.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/files.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/pipeline_runs.rs create mode 100644 crates/nvisy-postgres/src/types/constraint/pipelines.rs delete mode 100644 crates/nvisy-postgres/src/types/enums/chat_session_status.rs delete mode 100644 crates/nvisy-postgres/src/types/enums/chat_tool_status.rs delete mode 100644 crates/nvisy-postgres/src/types/enums/content_segmentation.rs create mode 100644 crates/nvisy-postgres/src/types/enums/pipeline_run_status.rs create mode 100644 crates/nvisy-postgres/src/types/enums/pipeline_status.rs create mode 100644 crates/nvisy-postgres/src/types/enums/pipeline_trigger_type.rs delete mode 100644 crates/nvisy-postgres/src/types/enums/processing_status.rs delete mode 100644 crates/nvisy-postgres/src/types/enums/require_mode.rs create mode 100644 crates/nvisy-runtime/README.md delete mode 100644 crates/nvisy-runtime/src/archive.rs create mode 100644 crates/nvisy-runtime/src/engine/config.rs create mode 100644 crates/nvisy-runtime/src/engine/executor.rs create mode 100644 crates/nvisy-runtime/src/engine/mod.rs create mode 100644 crates/nvisy-runtime/src/error.rs create mode 100644 crates/nvisy-runtime/src/graph/edge.rs create mode 100644 crates/nvisy-runtime/src/graph/mod.rs create mode 100644 crates/nvisy-runtime/src/graph/workflow.rs create mode 100644 crates/nvisy-runtime/src/node/data.rs create mode 100644 crates/nvisy-runtime/src/node/id.rs create mode 100644 crates/nvisy-runtime/src/node/mod.rs create mode 100644 crates/nvisy-runtime/src/prelude.rs create mode 100644 crates/nvisy-runtime/src/runtime/config.rs create mode 100644 crates/nvisy-runtime/src/runtime/mod.rs create mode 100644 crates/nvisy-runtime/src/runtime/service.rs delete mode 100644 crates/nvisy-runtime/src/service.rs delete mode 100644 crates/nvisy-server/src/handler/chat.rs delete mode 100644 crates/nvisy-server/src/handler/comments.rs delete mode 100644 crates/nvisy-server/src/handler/documents.rs delete mode 100644 crates/nvisy-server/src/handler/error/archive_error.rs delete mode 100644 crates/nvisy-server/src/handler/error/pg_chat.rs create mode 100644 crates/nvisy-server/src/handler/error/pg_pipeline.rs create mode 100644 crates/nvisy-server/src/handler/pipelines.rs delete mode 100644 crates/nvisy-server/src/handler/request/chat.rs delete mode 100644 crates/nvisy-server/src/handler/request/comments.rs delete mode 100644 crates/nvisy-server/src/handler/request/documents.rs create mode 100644 crates/nvisy-server/src/handler/request/pipelines.rs delete mode 100644 crates/nvisy-server/src/handler/response/chat.rs delete mode 100644 crates/nvisy-server/src/handler/response/comments.rs delete mode 100644 crates/nvisy-server/src/handler/response/documents.rs create mode 100644 crates/nvisy-server/src/handler/response/pipelines.rs delete mode 100644 migrations/2026-01-09-002114_chat/down.sql delete mode 100644 migrations/2026-01-09-002114_chat/up.sql create mode 100644 migrations/2026-01-19-045012_pipelines/down.sql create mode 100644 migrations/2026-01-19-045012_pipelines/up.sql diff --git a/Cargo.lock b/Cargo.lock index a5e28d6..8ae0e9b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -208,6 +208,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "async-lock" +version = "3.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + [[package]] name = "async-nats" version = "0.45.0" @@ -500,6 +511,17 @@ dependencies = [ "url", ] +[[package]] +name = "backon" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" +dependencies = [ + "fastrand", + "gloo-timers", + "tokio", +] + [[package]] name = "backtrace" version = "0.3.76" @@ -589,6 +611,15 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "block-padding" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] + [[package]] name = "brotli" version = "8.0.2" @@ -646,6 +677,15 @@ dependencies = [ "libbz2-rs-sys", ] +[[package]] +name = "cbc" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" +dependencies = [ + "cipher", +] + [[package]] name = "cc" version = "1.2.50" @@ -788,12 +828,41 @@ version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "const-oid" version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + [[package]] name = "constant_time_eq" version = "0.3.1" @@ -868,6 +937,15 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -877,6 +955,36 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.6" @@ -1227,6 +1335,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.4", + "const-oid", "crypto-common 0.1.6", "subtle", ] @@ -1253,6 +1362,15 @@ dependencies = [ "syn", ] +[[package]] +name = "dlv-list" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" +dependencies = [ + "const-random", +] + [[package]] name = "dotenvy" version = "0.15.7" @@ -1364,6 +1482,27 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "event-listener" +version = "5.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +dependencies = [ + "event-listener", + "pin-project-lite", +] + [[package]] name = "eventsource-stream" version = "0.2.3" @@ -1665,6 +1804,18 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "h2" version = "0.4.12" @@ -1684,6 +1835,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "hashbrown" version = "0.16.1" @@ -1767,6 +1924,15 @@ dependencies = [ "digest 0.10.7", ] +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "http" version = "1.4.0" @@ -2078,7 +2244,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", "serde", "serde_core", ] @@ -2089,6 +2255,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ + "block-padding", "generic-array", ] @@ -2221,6 +2388,21 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonwebtoken" +version = "9.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +dependencies = [ + "base64", + "js-sys", + "pem", + "ring", + "serde", + "serde_json", + "simple_asn1", +] + [[package]] name = "jsonwebtoken" version = "10.2.0" @@ -2243,6 +2425,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] [[package]] name = "libbz2-rs-sys" @@ -2453,6 +2638,28 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "moka" +version = "0.12.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926" +dependencies = [ + "async-lock", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "event-listener", + "futures-util", + "loom", + "parking_lot", + "portable-atomic", + "rustc_version", + "smallvec", + "tagptr", + "thiserror 1.0.69", + "uuid", +] + [[package]] name = "multer" version = "3.1.0" @@ -2537,6 +2744,22 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -2590,6 +2813,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -2659,6 +2883,22 @@ dependencies = [ "uuid", ] +[[package]] +name = "nvisy-opendal" +version = "0.1.0" +dependencies = [ + "derive_more", + "futures", + "jiff", + "nvisy-core", + "opendal", + "serde", + "serde_json", + "thiserror 2.0.17", + "tokio", + "tracing", +] + [[package]] name = "nvisy-postgres" version = "0.1.0" @@ -2837,15 +3077,19 @@ dependencies = [ name = "nvisy-runtime" version = "0.1.0" dependencies = [ - "clap", + "derive_builder", "derive_more", + "futures", + "nvisy-core", + "nvisy-opendal", "nvisy-rt-core", - "nvisy-rt-document", "nvisy-rt-engine", - "schemars 0.9.0", "serde", + "serde_json", "thiserror 2.0.17", "tokio", + "tracing", + "uuid", ] [[package]] @@ -2868,11 +3112,10 @@ dependencies = [ "futures", "ipnet", "jiff", - "jsonwebtoken", + "jsonwebtoken 10.2.0", "nvisy-nats", "nvisy-postgres", "nvisy-rig", - "nvisy-runtime", "nvisy-webhook", "rand 0.10.0-rc.6", "regex", @@ -2940,6 +3183,37 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "opendal" +version = "0.53.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f947c4efbca344c1a125753366033c8107f552b2e3f8251815ed1908f116ca3e" +dependencies = [ + "anyhow", + "async-trait", + "backon", + "base64", + "bytes", + "chrono", + "crc32c", + "futures", + "getrandom 0.2.16", + "http", + "http-body", + "log", + "md-5", + "moka", + "percent-encoding", + "quick-xml", + "reqsign", + "reqwest", + "serde", + "serde_json", + "sha2", + "tokio", + "uuid", +] + [[package]] name = "openssl-probe" version = "0.1.6" @@ -2955,6 +3229,22 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-multimap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" +dependencies = [ + "dlv-list", + "hashbrown 0.14.5", +] + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + [[package]] name = "parking_lot" version = "0.12.5" @@ -3105,6 +3395,32 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes", + "cbc", + "der", + "pbkdf2", + "scrypt", + "sha2", + "spki", +] + [[package]] name = "pkcs8" version = "0.10.2" @@ -3112,6 +3428,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ "der", + "pkcs5", + "rand_core 0.6.4", "spki", ] @@ -3249,6 +3567,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.11.9" @@ -3463,6 +3791,37 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "reqsign" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" +dependencies = [ + "anyhow", + "async-trait", + "base64", + "chrono", + "form_urlencoded", + "getrandom 0.2.16", + "hex", + "hmac", + "home", + "http", + "jsonwebtoken 9.3.1", + "log", + "percent-encoding", + "quick-xml", + "rand 0.8.5", + "reqwest", + "rsa", + "rust-ini", + "serde", + "serde_json", + "sha1", + "sha2", + "tokio", +] + [[package]] name = "reqwest" version = "0.12.28" @@ -3562,6 +3921,37 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid", + "digest 0.10.7", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "sha2", + "signature", + "spki", + "subtle", + "zeroize", +] + +[[package]] +name = "rust-ini" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" +dependencies = [ + "cfg-if", + "ordered-multimap", +] + [[package]] name = "rust-multipart-rfc7578_2" version = "0.8.0" @@ -3692,6 +4082,15 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "schannel" version = "0.1.28" @@ -3775,6 +4174,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2", + "salsa20", + "sha2", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -4168,6 +4578,12 @@ dependencies = [ "libc", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "tar" version = "0.4.44" @@ -4289,6 +4705,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinystr" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index a29c2a5..e34bb3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "./crates/nvisy-cli", "./crates/nvisy-core", "./crates/nvisy-nats", + "./crates/nvisy-opendal", "./crates/nvisy-postgres", "./crates/nvisy-rig", "./crates/nvisy-runtime", @@ -34,6 +35,7 @@ documentation = "https://docs.rs/nvisy-server" # Internal crates nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } nvisy-nats = { path = "./crates/nvisy-nats", version = "0.1.0" } +nvisy-opendal = { path = "./crates/nvisy-opendal", version = "0.1.0" } nvisy-postgres = { path = "./crates/nvisy-postgres", version = "0.1.0" } nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" } nvisy-runtime = { path = "./crates/nvisy-runtime", version = "0.1.0" } @@ -133,3 +135,7 @@ woothee = { version = "0.13", features = [] } # AI/ML frameworks rig-core = { version = "0.28", default-features = false, features = ["reqwest-rustls"] } + +# Storage abstraction +opendal = { version = "0.53", features = [] } +chrono = { version = "0.4", features = ["serde"] } diff --git a/crates/nvisy-opendal/Cargo.toml b/crates/nvisy-opendal/Cargo.toml new file mode 100644 index 0000000..55ed6a2 --- /dev/null +++ b/crates/nvisy-opendal/Cargo.toml @@ -0,0 +1,72 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-opendal" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[features] +# Default feature set (none for minimal dependencies) +default = [] + +# Amazon S3 storage backend +s3 = ["opendal/services-s3"] + +# Google Cloud Storage backend +gcs = ["opendal/services-gcs"] + +# Azure Blob Storage backend +azblob = ["opendal/services-azblob"] + +# Google Drive backend +gdrive = ["opendal/services-gdrive"] + +# Dropbox backend +dropbox = ["opendal/services-dropbox"] + +# OneDrive backend +onedrive = ["opendal/services-onedrive"] + +# All storage backends +all-backends = ["s3", "gcs", "azblob", "gdrive", "dropbox", "onedrive"] + +[dependencies] +# Internal crates +nvisy-core = { workspace = true } + +# Async runtime +tokio = { workspace = true, features = ["rt", "sync", "io-util"] } +futures = { workspace = true, features = [] } + +# Storage +opendal = { workspace = true, features = [] } + +# Observability +tracing = { workspace = true, features = [] } + +# (De)serialization +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true, features = [] } + +# Derive macros & utilities +thiserror = { workspace = true, features = [] } +derive_more = { workspace = true, features = ["debug", "display", "from", "into"] } + +# Data types +jiff = { workspace = true, features = ["serde"] } + +[dev-dependencies] +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-opendal/README.md b/crates/nvisy-opendal/README.md new file mode 100644 index 0000000..bf01e5b --- /dev/null +++ b/crates/nvisy-opendal/README.md @@ -0,0 +1,44 @@ +# nvisy-opendal + +Storage abstraction layer for the Nvisy platform using OpenDAL for unified access +to multiple cloud storage backends. + +## Features + +- **Unified API** - Single interface for multiple storage backends +- **Cloud-Native** - Support for major cloud storage providers +- **Async Operations** - Non-blocking I/O with Tokio runtime +- **Feature Flags** - Enable only the backends you need + +## Supported Backends + +| Backend | Feature Flag | Description | +|---------|--------------|-------------| +| Amazon S3 | `s3` | S3-compatible object storage | +| Google Cloud Storage | `gcs` | GCS bucket storage | +| Azure Blob Storage | `azblob` | Azure container storage | +| Google Drive | `gdrive` | Google Drive file storage | +| Dropbox | `dropbox` | Dropbox cloud storage | +| OneDrive | `onedrive` | Microsoft OneDrive storage | + +## Usage + +Enable the backends you need in `Cargo.toml`: + +```toml +[dependencies] +nvisy-opendal = { path = "../nvisy-opendal", features = ["s3", "gcs"] } +``` + +Or enable all backends: + +```toml +[dependencies] +nvisy-opendal = { path = "../nvisy-opendal", features = ["all-backends"] } +``` + +## Key Dependencies + +- `opendal` - Unified data access layer for multiple storage services +- `tokio` - Async runtime for non-blocking I/O operations +- `jiff` - Modern date/time handling for file metadata diff --git a/crates/nvisy-opendal/src/backend.rs b/crates/nvisy-opendal/src/backend.rs new file mode 100644 index 0000000..b4294c8 --- /dev/null +++ b/crates/nvisy-opendal/src/backend.rs @@ -0,0 +1,293 @@ +//! Storage backend implementation. + +use opendal::{Operator, services}; + +use crate::TRACING_TARGET; +use crate::config::{BackendType, StorageConfig}; +use crate::error::{StorageError, StorageResult}; + +/// Unified storage backend that wraps OpenDAL operators. +#[derive(Clone)] +pub struct StorageBackend { + operator: Operator, + config: StorageConfig, +} + +impl StorageBackend { + /// Creates a new storage backend from configuration. + pub async fn new(config: StorageConfig) -> StorageResult { + let operator = Self::create_operator(&config)?; + + tracing::info!( + target: TRACING_TARGET, + backend = ?config.backend_type, + root = %config.root, + "Storage backend initialized" + ); + + Ok(Self { operator, config }) + } + + /// Returns the configuration for this backend. + pub fn config(&self) -> &StorageConfig { + &self.config + } + + /// Returns the backend type. + pub fn backend_type(&self) -> &BackendType { + &self.config.backend_type + } + + /// Reads a file from storage. + pub async fn read(&self, path: &str) -> StorageResult> { + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "Reading file" + ); + + let data = self.operator.read(path).await?.to_vec(); + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + size = data.len(), + "File read complete" + ); + + Ok(data) + } + + /// Writes data to a file in storage. + pub async fn write(&self, path: &str, data: &[u8]) -> StorageResult<()> { + tracing::debug!( + target: TRACING_TARGET, + path = %path, + size = data.len(), + "Writing file" + ); + + self.operator.write(path, data.to_vec()).await?; + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "File write complete" + ); + + Ok(()) + } + + /// Deletes a file from storage. + pub async fn delete(&self, path: &str) -> StorageResult<()> { + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "Deleting file" + ); + + self.operator.delete(path).await?; + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "File deleted" + ); + + Ok(()) + } + + /// Checks if a file exists. + pub async fn exists(&self, path: &str) -> StorageResult { + Ok(self.operator.exists(path).await?) + } + + /// Gets metadata for a file. + pub async fn stat(&self, path: &str) -> StorageResult { + let meta = self.operator.stat(path).await?; + + // Convert chrono DateTime to jiff Timestamp + let last_modified = meta + .last_modified() + .and_then(|dt| jiff::Timestamp::from_second(dt.timestamp()).ok()); + + Ok(FileMetadata { + size: meta.content_length(), + last_modified, + content_type: meta.content_type().map(|s| s.to_string()), + }) + } + + /// Lists files in a directory. + pub async fn list(&self, path: &str) -> StorageResult> { + use futures::TryStreamExt; + + let entries: Vec<_> = self.operator.lister(path).await?.try_collect().await?; + + Ok(entries.into_iter().map(|e| e.path().to_string()).collect()) + } + + /// Copies a file from one path to another. + pub async fn copy(&self, from: &str, to: &str) -> StorageResult<()> { + tracing::debug!( + target: TRACING_TARGET, + from = %from, + to = %to, + "Copying file" + ); + + self.operator.copy(from, to).await?; + + Ok(()) + } + + /// Moves a file from one path to another. + pub async fn rename(&self, from: &str, to: &str) -> StorageResult<()> { + tracing::debug!( + target: TRACING_TARGET, + from = %from, + to = %to, + "Moving file" + ); + + self.operator.rename(from, to).await?; + + Ok(()) + } + + /// Creates an OpenDAL operator based on configuration. + #[allow(unreachable_patterns)] + fn create_operator(config: &StorageConfig) -> StorageResult { + match config.backend_type { + #[cfg(feature = "s3")] + BackendType::S3 => { + let mut builder = services::S3::default().bucket(&config.root); + + if let Some(ref region) = config.region { + builder = builder.region(region); + } + + if let Some(ref endpoint) = config.endpoint { + builder = builder.endpoint(endpoint); + } + + if let Some(ref access_key_id) = config.access_key_id { + builder = builder.access_key_id(access_key_id); + } + + if let Some(ref secret_access_key) = config.secret_access_key { + builder = builder.secret_access_key(secret_access_key); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + #[cfg(feature = "gcs")] + BackendType::Gcs => { + let builder = services::Gcs::default().bucket(&config.root); + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + #[cfg(feature = "azblob")] + BackendType::AzureBlob => { + let mut builder = services::Azblob::default().container(&config.root); + + if let Some(ref account_name) = config.account_name { + builder = builder.account_name(account_name); + } + + if let Some(ref account_key) = config.account_key { + builder = builder.account_key(account_key); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + #[cfg(feature = "gdrive")] + BackendType::GoogleDrive => { + let mut builder = services::Gdrive::default().root(&config.root); + + if let Some(ref access_token) = config.access_token { + builder = builder.access_token(access_token); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + #[cfg(feature = "dropbox")] + BackendType::Dropbox => { + let mut builder = services::Dropbox::default().root(&config.root); + + if let Some(ref access_token) = config.access_token { + builder = builder.access_token(access_token); + } + + if let Some(ref refresh_token) = config.refresh_token { + builder = builder.refresh_token(refresh_token); + } + + if let Some(ref client_id) = config.access_key_id { + builder = builder.client_id(client_id); + } + + if let Some(ref client_secret) = config.secret_access_key { + builder = builder.client_secret(client_secret); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + #[cfg(feature = "onedrive")] + BackendType::OneDrive => { + let mut builder = services::Onedrive::default().root(&config.root); + + if let Some(ref access_token) = config.access_token { + builder = builder.access_token(access_token); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + // This should never be reached if the config was properly created + // with the same features enabled + #[allow(unreachable_patterns)] + _ => Err(StorageError::init(format!( + "Backend type {:?} is not supported with current features", + config.backend_type + ))), + } + } +} + +/// File metadata. +#[derive(Debug, Clone)] +pub struct FileMetadata { + /// File size in bytes. + pub size: u64, + /// Last modification time. + pub last_modified: Option, + /// Content type / MIME type. + pub content_type: Option, +} + +impl std::fmt::Debug for StorageBackend { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("StorageBackend") + .field("backend_type", &self.config.backend_type) + .field("root", &self.config.root) + .finish() + } +} diff --git a/crates/nvisy-opendal/src/config.rs b/crates/nvisy-opendal/src/config.rs new file mode 100644 index 0000000..6b4dc6f --- /dev/null +++ b/crates/nvisy-opendal/src/config.rs @@ -0,0 +1,244 @@ +//! Storage configuration types. + +use serde::{Deserialize, Serialize}; + +/// Storage backend type. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BackendType { + /// Amazon S3 compatible storage. + #[cfg(feature = "s3")] + S3, + + /// Google Cloud Storage. + #[cfg(feature = "gcs")] + Gcs, + + /// Azure Blob Storage. + #[cfg(feature = "azblob")] + AzureBlob, + + /// Google Drive. + #[cfg(feature = "gdrive")] + GoogleDrive, + + /// Dropbox. + #[cfg(feature = "dropbox")] + Dropbox, + + /// OneDrive. + #[cfg(feature = "onedrive")] + OneDrive, +} + +/// Configuration for a storage backend. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StorageConfig { + /// Type of storage backend. + pub backend_type: BackendType, + + /// Root path or bucket/container name. + pub root: String, + + /// Region (for cloud storage). + #[serde(skip_serializing_if = "Option::is_none")] + pub region: Option, + + /// Endpoint URL (for S3-compatible storage). + #[serde(skip_serializing_if = "Option::is_none")] + pub endpoint: Option, + + /// Access key ID / Client ID (for cloud storage). + #[serde(skip_serializing_if = "Option::is_none")] + pub access_key_id: Option, + + /// Secret access key / Client secret (for cloud storage). + #[serde(skip_serializing_if = "Option::is_none")] + pub secret_access_key: Option, + + /// Account name (for Azure Blob Storage). + #[serde(skip_serializing_if = "Option::is_none")] + pub account_name: Option, + + /// Account key (for Azure Blob Storage). + #[serde(skip_serializing_if = "Option::is_none")] + pub account_key: Option, + + /// OAuth access token (for Google Drive, Dropbox, OneDrive). + #[serde(skip_serializing_if = "Option::is_none")] + pub access_token: Option, + + /// OAuth refresh token (for Google Drive, Dropbox, OneDrive). + #[serde(skip_serializing_if = "Option::is_none")] + pub refresh_token: Option, +} + +impl StorageConfig { + /// Creates an S3 storage configuration. + #[cfg(feature = "s3")] + pub fn s3(bucket: impl Into, region: impl Into) -> Self { + Self { + backend_type: BackendType::S3, + root: bucket.into(), + region: Some(region.into()), + endpoint: None, + access_key_id: None, + secret_access_key: None, + account_name: None, + account_key: None, + access_token: None, + refresh_token: None, + } + } + + /// Creates an S3-compatible storage configuration with custom endpoint. + #[cfg(feature = "s3")] + pub fn s3_compatible( + bucket: impl Into, + endpoint: impl Into, + region: impl Into, + ) -> Self { + Self { + backend_type: BackendType::S3, + root: bucket.into(), + region: Some(region.into()), + endpoint: Some(endpoint.into()), + access_key_id: None, + secret_access_key: None, + account_name: None, + account_key: None, + access_token: None, + refresh_token: None, + } + } + + /// Creates a GCS storage configuration. + #[cfg(feature = "gcs")] + pub fn gcs(bucket: impl Into) -> Self { + Self { + backend_type: BackendType::Gcs, + root: bucket.into(), + region: None, + endpoint: None, + access_key_id: None, + secret_access_key: None, + account_name: None, + account_key: None, + access_token: None, + refresh_token: None, + } + } + + /// Creates an Azure Blob Storage configuration. + #[cfg(feature = "azblob")] + pub fn azure_blob(container: impl Into, account_name: impl Into) -> Self { + Self { + backend_type: BackendType::AzureBlob, + root: container.into(), + region: None, + endpoint: None, + access_key_id: None, + secret_access_key: None, + account_name: Some(account_name.into()), + account_key: None, + access_token: None, + refresh_token: None, + } + } + + /// Creates a Google Drive storage configuration. + #[cfg(feature = "gdrive")] + pub fn google_drive(root: impl Into) -> Self { + Self { + backend_type: BackendType::GoogleDrive, + root: root.into(), + region: None, + endpoint: None, + access_key_id: None, + secret_access_key: None, + account_name: None, + account_key: None, + access_token: None, + refresh_token: None, + } + } + + /// Creates a Dropbox storage configuration. + #[cfg(feature = "dropbox")] + pub fn dropbox(root: impl Into) -> Self { + Self { + backend_type: BackendType::Dropbox, + root: root.into(), + region: None, + endpoint: None, + access_key_id: None, + secret_access_key: None, + account_name: None, + account_key: None, + access_token: None, + refresh_token: None, + } + } + + /// Creates a OneDrive storage configuration. + #[cfg(feature = "onedrive")] + pub fn onedrive(root: impl Into) -> Self { + Self { + backend_type: BackendType::OneDrive, + root: root.into(), + region: None, + endpoint: None, + access_key_id: None, + secret_access_key: None, + account_name: None, + account_key: None, + access_token: None, + refresh_token: None, + } + } + + /// Sets the access credentials for S3/GCS. + #[cfg(any(feature = "s3", feature = "gcs"))] + pub fn with_credentials( + mut self, + access_key_id: impl Into, + secret_access_key: impl Into, + ) -> Self { + self.access_key_id = Some(access_key_id.into()); + self.secret_access_key = Some(secret_access_key.into()); + self + } + + /// Sets the Azure account key. + #[cfg(feature = "azblob")] + pub fn with_account_key(mut self, account_key: impl Into) -> Self { + self.account_key = Some(account_key.into()); + self + } + + /// Sets the OAuth access token for OAuth-based backends. + #[cfg(any(feature = "gdrive", feature = "dropbox", feature = "onedrive"))] + pub fn with_access_token(mut self, access_token: impl Into) -> Self { + self.access_token = Some(access_token.into()); + self + } + + /// Sets the OAuth refresh token for OAuth-based backends. + #[cfg(any(feature = "gdrive", feature = "dropbox", feature = "onedrive"))] + pub fn with_refresh_token(mut self, refresh_token: impl Into) -> Self { + self.refresh_token = Some(refresh_token.into()); + self + } + + /// Sets the client credentials for OAuth-based backends. + #[cfg(any(feature = "gdrive", feature = "dropbox", feature = "onedrive"))] + pub fn with_client_credentials( + mut self, + client_id: impl Into, + client_secret: impl Into, + ) -> Self { + self.access_key_id = Some(client_id.into()); + self.secret_access_key = Some(client_secret.into()); + self + } +} diff --git a/crates/nvisy-opendal/src/error.rs b/crates/nvisy-opendal/src/error.rs new file mode 100644 index 0000000..b9ff0b4 --- /dev/null +++ b/crates/nvisy-opendal/src/error.rs @@ -0,0 +1,98 @@ +//! Storage error types. + +/// Result type for storage operations. +pub type StorageResult = Result; + +/// Errors that can occur during storage operations. +#[derive(Debug, thiserror::Error)] +pub enum StorageError { + /// Failed to initialize the storage backend. + #[error("storage initialization failed: {0}")] + Init(String), + + /// File or object not found. + #[error("not found: {0}")] + NotFound(String), + + /// Permission denied. + #[error("permission denied: {0}")] + PermissionDenied(String), + + /// Read operation failed. + #[error("read failed: {0}")] + Read(String), + + /// Write operation failed. + #[error("write failed: {0}")] + Write(String), + + /// Delete operation failed. + #[error("delete failed: {0}")] + Delete(String), + + /// List operation failed. + #[error("list failed: {0}")] + List(String), + + /// Invalid path or URI. + #[error("invalid path: {0}")] + InvalidPath(String), + + /// Backend-specific error. + #[error("backend error: {0}")] + Backend(opendal::Error), +} + +impl StorageError { + /// Creates a new initialization error. + pub fn init(msg: impl Into) -> Self { + Self::Init(msg.into()) + } + + /// Creates a new not found error. + pub fn not_found(path: impl Into) -> Self { + Self::NotFound(path.into()) + } + + /// Creates a new permission denied error. + pub fn permission_denied(msg: impl Into) -> Self { + Self::PermissionDenied(msg.into()) + } + + /// Creates a new read error. + pub fn read(msg: impl Into) -> Self { + Self::Read(msg.into()) + } + + /// Creates a new write error. + pub fn write(msg: impl Into) -> Self { + Self::Write(msg.into()) + } + + /// Creates a new delete error. + pub fn delete(msg: impl Into) -> Self { + Self::Delete(msg.into()) + } + + /// Creates a new list error. + pub fn list(msg: impl Into) -> Self { + Self::List(msg.into()) + } + + /// Creates a new invalid path error. + pub fn invalid_path(msg: impl Into) -> Self { + Self::InvalidPath(msg.into()) + } +} + +impl From for StorageError { + fn from(err: opendal::Error) -> Self { + use opendal::ErrorKind; + + match err.kind() { + ErrorKind::NotFound => Self::NotFound(err.to_string()), + ErrorKind::PermissionDenied => Self::PermissionDenied(err.to_string()), + _ => Self::Backend(err), + } + } +} diff --git a/crates/nvisy-opendal/src/lib.rs b/crates/nvisy-opendal/src/lib.rs new file mode 100644 index 0000000..f937a09 --- /dev/null +++ b/crates/nvisy-opendal/src/lib.rs @@ -0,0 +1,17 @@ +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +mod backend; +mod config; +mod error; + +#[doc(hidden)] +pub mod prelude; + +pub use backend::{FileMetadata, StorageBackend}; +pub use config::{BackendType, StorageConfig}; +pub use error::{StorageError, StorageResult}; + +/// Tracing target for storage operations. +pub const TRACING_TARGET: &str = "nvisy_opendal"; diff --git a/crates/nvisy-opendal/src/prelude.rs b/crates/nvisy-opendal/src/prelude.rs new file mode 100644 index 0000000..b7068c7 --- /dev/null +++ b/crates/nvisy-opendal/src/prelude.rs @@ -0,0 +1,5 @@ +//! Prelude module for convenient imports. + +pub use crate::backend::{FileMetadata, StorageBackend}; +pub use crate::config::{BackendType, StorageConfig}; +pub use crate::error::{StorageError, StorageResult}; diff --git a/crates/nvisy-postgres/src/model/chat_operation.rs b/crates/nvisy-postgres/src/model/chat_operation.rs deleted file mode 100644 index 4a1584a..0000000 --- a/crates/nvisy-postgres/src/model/chat_operation.rs +++ /dev/null @@ -1,186 +0,0 @@ -//! Chat operation model for PostgreSQL database operations. -//! -//! This module provides models for tracking document operations (diffs) produced -//! by tool calls. Operations represent the actual changes to be applied to documents, -//! supporting apply/revert functionality for undo capabilities. -//! -//! ## Models -//! -//! - [`ChatOperation`] - Main operation model with diff details -//! - [`NewChatOperation`] - Data structure for creating new operations -//! - [`UpdateChatOperation`] - Data structure for updating existing operations - -use diesel::prelude::*; -use jiff_diesel::Timestamp; -use uuid::Uuid; - -use crate::schema::chat_operations; -use crate::types::HasCreatedAt; - -/// Chat operation model representing a document operation (diff). -/// -/// This model tracks individual operations produced by tool calls that can be -/// applied to or reverted from documents. Operations store position-based diffs -/// rather than content, enabling efficient undo/redo functionality. -#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = chat_operations)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct ChatOperation { - /// Unique operation identifier. - pub id: Uuid, - /// Reference to the tool call that produced this operation. - pub tool_call_id: Uuid, - /// Reference to the file being modified. - pub file_id: Uuid, - /// Optional reference to a specific chunk within the file. - pub chunk_id: Option, - /// Type of operation (insert, replace, delete, format, merge, split, etc.). - pub operation_type: String, - /// The diff specification as JSON (positions, not content). - pub operation_diff: serde_json::Value, - /// Whether this operation has been applied to the document. - pub applied: bool, - /// Whether this operation was reverted by the user. - pub reverted: bool, - /// Timestamp when the operation was created. - pub created_at: Timestamp, - /// Timestamp when the operation was applied. - pub applied_at: Option, -} - -/// Data structure for creating a new chat operation. -/// -/// Contains all the information necessary to record a new document operation. -/// Operations are created as unapplied by default and can be applied later. -#[derive(Debug, Clone, Insertable)] -#[diesel(table_name = chat_operations)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewChatOperation { - /// Reference to the tool call that produced this operation. - pub tool_call_id: Uuid, - /// Reference to the file being modified. - pub file_id: Uuid, - /// Optional reference to a specific chunk. - pub chunk_id: Option, - /// Type of operation. - pub operation_type: String, - /// The diff specification as JSON. - pub operation_diff: Option, - /// Optional initial applied state. - pub applied: Option, - /// Optional initial reverted state. - pub reverted: Option, -} - -/// Data structure for updating an existing chat operation. -/// -/// Contains optional fields for modifying operation properties. Primarily -/// used to mark operations as applied or reverted. -#[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = chat_operations)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateChatOperation { - /// Updated applied state. - pub applied: Option, - /// Updated reverted state. - pub reverted: Option, - /// Updated applied timestamp. - pub applied_at: Option>, -} - -impl ChatOperation { - /// Returns whether the operation has been applied. - #[inline] - pub fn is_applied(&self) -> bool { - self.applied - } - - /// Returns whether the operation has been reverted. - #[inline] - pub fn is_reverted(&self) -> bool { - self.reverted - } - - /// Returns whether the operation is pending (not yet applied). - #[inline] - pub fn is_pending(&self) -> bool { - !self.applied - } - - /// Returns whether the operation can be applied. - #[inline] - pub fn can_apply(&self) -> bool { - !self.applied - } - - /// Returns whether the operation can be reverted. - #[inline] - pub fn can_revert(&self) -> bool { - self.applied && !self.reverted - } - - /// Returns whether the operation targets a specific chunk. - #[inline] - pub fn has_chunk(&self) -> bool { - self.chunk_id.is_some() - } - - /// Returns whether the operation has diff data. - pub fn has_diff(&self) -> bool { - !self - .operation_diff - .as_object() - .is_none_or(|obj| obj.is_empty()) - } - - /// Returns the time between creation and application, if applied. - pub fn time_to_apply(&self) -> Option { - self.applied_at.map(|applied| { - let created: jiff::Timestamp = self.created_at.into(); - let applied: jiff::Timestamp = applied.into(); - applied.since(created).unwrap_or_default() - }) - } - - /// Returns whether this is an insert operation. - #[inline] - pub fn is_insert(&self) -> bool { - self.operation_type == "insert" - } - - /// Returns whether this is a replace operation. - #[inline] - pub fn is_replace(&self) -> bool { - self.operation_type == "replace" - } - - /// Returns whether this is a delete operation. - #[inline] - pub fn is_delete(&self) -> bool { - self.operation_type == "delete" - } - - /// Returns whether this is a format operation. - #[inline] - pub fn is_format(&self) -> bool { - self.operation_type == "format" - } - - /// Returns whether this is a merge operation. - #[inline] - pub fn is_merge(&self) -> bool { - self.operation_type == "merge" - } - - /// Returns whether this is a split operation. - #[inline] - pub fn is_split(&self) -> bool { - self.operation_type == "split" - } -} - -impl HasCreatedAt for ChatOperation { - fn created_at(&self) -> jiff::Timestamp { - self.created_at.into() - } -} diff --git a/crates/nvisy-postgres/src/model/chat_session.rs b/crates/nvisy-postgres/src/model/chat_session.rs deleted file mode 100644 index e7e9591..0000000 --- a/crates/nvisy-postgres/src/model/chat_session.rs +++ /dev/null @@ -1,168 +0,0 @@ -//! Chat session model for PostgreSQL database operations. -//! -//! This module provides models for managing LLM-assisted document editing sessions. -//! Sessions track the interaction between users and AI models during document editing, -//! including message counts, token usage, and model configuration. -//! -//! ## Models -//! -//! - [`ChatSession`] - Main session model with full configuration and status -//! - [`NewChatSession`] - Data structure for creating new sessions -//! - [`UpdateChatSession`] - Data structure for updating existing sessions - -use diesel::prelude::*; -use jiff_diesel::Timestamp; -use uuid::Uuid; - -use crate::schema::chat_sessions; -use crate::types::{ChatSessionStatus, HasCreatedAt, HasOwnership, HasUpdatedAt}; - -/// Chat session model representing an LLM-assisted document editing session. -/// -/// This model manages the lifecycle of editing sessions where users interact with -/// AI models to edit documents. Each session tracks the primary file being edited, -/// model configuration, and usage statistics like message and token counts. -#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = chat_sessions)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct ChatSession { - /// Unique session identifier. - pub id: Uuid, - /// Reference to the workspace this session belongs to. - pub workspace_id: Uuid, - /// Account that created and owns this session. - pub account_id: Uuid, - /// Primary file being edited in this session. - pub primary_file_id: Uuid, - /// User-friendly session name. - pub display_name: String, - /// Current lifecycle status of the session. - pub session_status: ChatSessionStatus, - /// LLM configuration (model, temperature, max tokens, etc.). - pub model_config: serde_json::Value, - /// Total number of messages exchanged in this session. - pub message_count: i32, - /// Total tokens used in this session. - pub token_count: i32, - /// Timestamp when this session was created. - pub created_at: Timestamp, - /// Timestamp when this session was last modified. - pub updated_at: Timestamp, -} - -/// Data structure for creating a new chat session. -/// -/// Contains all the information necessary to create a new editing session. -/// Most fields have sensible defaults, allowing sessions to be created with -/// minimal required information. -#[derive(Debug, Clone, Insertable)] -#[diesel(table_name = chat_sessions)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewChatSession { - /// Reference to the workspace this session will belong to. - pub workspace_id: Uuid, - /// Account creating this session. - pub account_id: Uuid, - /// Primary file to be edited in this session. - pub primary_file_id: Uuid, - /// Optional user-friendly session name. - pub display_name: Option, - /// Optional initial session status. - pub session_status: Option, - /// Optional LLM configuration. - pub model_config: Option, -} - -/// Data structure for updating an existing chat session. -/// -/// Contains optional fields for modifying session properties. Only the -/// fields that need to be changed should be set to Some(value), while -/// unchanged fields remain None to preserve their current values. -#[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = chat_sessions)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateChatSession { - /// Updated session display name. - pub display_name: Option, - /// Updated session status. - pub session_status: Option, - /// Updated LLM configuration. - pub model_config: Option, - /// Updated message count. - pub message_count: Option, - /// Updated token count. - pub token_count: Option, -} - -impl ChatSession { - /// Returns whether the session is currently active. - #[inline] - pub fn is_active(&self) -> bool { - self.session_status.is_active() - } - - /// Returns whether the session is paused. - #[inline] - pub fn is_paused(&self) -> bool { - self.session_status.is_paused() - } - - /// Returns whether the session is archived. - #[inline] - pub fn is_archived(&self) -> bool { - self.session_status.is_archived() - } - - /// Returns whether the session can accept new input. - #[inline] - pub fn can_accept_input(&self) -> bool { - self.session_status.can_accept_input() - } - - /// Returns whether the session has any messages. - #[inline] - pub fn has_messages(&self) -> bool { - self.message_count > 0 - } - - /// Returns whether the session has used any tokens. - #[inline] - pub fn has_token_usage(&self) -> bool { - self.token_count > 0 - } - - /// Returns whether the session has model configuration. - pub fn has_model_config(&self) -> bool { - !self - .model_config - .as_object() - .is_none_or(|obj| obj.is_empty()) - } - - /// Returns the average tokens per message, if any messages exist. - pub fn avg_tokens_per_message(&self) -> Option { - if self.message_count > 0 { - Some(self.token_count as f64 / self.message_count as f64) - } else { - None - } - } -} - -impl HasCreatedAt for ChatSession { - fn created_at(&self) -> jiff::Timestamp { - self.created_at.into() - } -} - -impl HasUpdatedAt for ChatSession { - fn updated_at(&self) -> jiff::Timestamp { - self.updated_at.into() - } -} - -impl HasOwnership for ChatSession { - fn created_by(&self) -> Uuid { - self.account_id - } -} diff --git a/crates/nvisy-postgres/src/model/chat_tool_call.rs b/crates/nvisy-postgres/src/model/chat_tool_call.rs deleted file mode 100644 index aaaf164..0000000 --- a/crates/nvisy-postgres/src/model/chat_tool_call.rs +++ /dev/null @@ -1,155 +0,0 @@ -//! Chat tool call model for PostgreSQL database operations. -//! -//! This module provides models for tracking tool invocations within chat sessions. -//! Tool calls represent individual operations performed by the LLM, such as -//! merging, splitting, redacting, or translating document content. -//! -//! ## Models -//! -//! - [`ChatToolCall`] - Main tool call model with execution details -//! - [`NewChatToolCall`] - Data structure for creating new tool calls -//! - [`UpdateChatToolCall`] - Data structure for updating existing tool calls - -use diesel::prelude::*; -use jiff_diesel::Timestamp; -use uuid::Uuid; - -use crate::schema::chat_tool_calls; -use crate::types::{ChatToolStatus, HasCreatedAt}; - -/// Chat tool call model representing a tool invocation within a session. -/// -/// This model tracks individual tool calls made during editing sessions, -/// including the tool name, input parameters, output results, and execution -/// status. Tool calls are linked to specific files and optionally to chunks. -#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = chat_tool_calls)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct ChatToolCall { - /// Unique tool call identifier. - pub id: Uuid, - /// Reference to the chat session this tool call belongs to. - pub session_id: Uuid, - /// Reference to the file being operated on. - pub file_id: Uuid, - /// Optional reference to a specific chunk within the file. - pub chunk_id: Option, - /// Name of the tool being invoked. - pub tool_name: String, - /// Tool input parameters as JSON. - pub tool_input: serde_json::Value, - /// Tool output results as JSON. - pub tool_output: serde_json::Value, - /// Current execution status of the tool call. - pub tool_status: ChatToolStatus, - /// Timestamp when the tool call was created/started. - pub started_at: Timestamp, - /// Timestamp when the tool execution completed. - pub completed_at: Option, -} - -/// Data structure for creating a new chat tool call. -/// -/// Contains all the information necessary to record a new tool invocation. -/// The tool status defaults to pending, and output is populated upon completion. -#[derive(Debug, Clone, Insertable)] -#[diesel(table_name = chat_tool_calls)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewChatToolCall { - /// Reference to the chat session. - pub session_id: Uuid, - /// Reference to the file being operated on. - pub file_id: Uuid, - /// Optional reference to a specific chunk. - pub chunk_id: Option, - /// Name of the tool being invoked. - pub tool_name: String, - /// Tool input parameters as JSON. - pub tool_input: Option, - /// Optional initial tool output. - pub tool_output: Option, - /// Optional initial tool status. - pub tool_status: Option, -} - -/// Data structure for updating an existing chat tool call. -/// -/// Contains optional fields for modifying tool call properties. Primarily -/// used to update the status and output upon completion or cancellation. -#[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = chat_tool_calls)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateChatToolCall { - /// Updated tool output results. - pub tool_output: Option, - /// Updated execution status. - pub tool_status: Option, - /// Updated completion timestamp. - pub completed_at: Option>, -} - -impl ChatToolCall { - /// Returns whether the tool call is pending execution. - #[inline] - pub fn is_pending(&self) -> bool { - self.tool_status.is_pending() - } - - /// Returns whether the tool is currently running. - #[inline] - pub fn is_running(&self) -> bool { - self.tool_status.is_running() - } - - /// Returns whether the tool execution completed successfully. - #[inline] - pub fn is_completed(&self) -> bool { - self.tool_status.is_completed() - } - - /// Returns whether the tool execution was cancelled. - #[inline] - pub fn is_cancelled(&self) -> bool { - self.tool_status.is_cancelled() - } - - /// Returns whether the tool is in a final state. - #[inline] - pub fn is_final(&self) -> bool { - self.tool_status.is_final() - } - - /// Returns whether the tool call targets a specific chunk. - #[inline] - pub fn has_chunk(&self) -> bool { - self.chunk_id.is_some() - } - - /// Returns whether the tool has input parameters. - pub fn has_input(&self) -> bool { - !self.tool_input.as_object().is_none_or(|obj| obj.is_empty()) - } - - /// Returns whether the tool has output results. - pub fn has_output(&self) -> bool { - !self - .tool_output - .as_object() - .is_none_or(|obj| obj.is_empty()) - } - - /// Returns the execution duration if the tool has completed. - pub fn execution_duration(&self) -> Option { - self.completed_at.map(|completed| { - let started: jiff::Timestamp = self.started_at.into(); - let completed: jiff::Timestamp = completed.into(); - completed.since(started).unwrap_or_default() - }) - } -} - -impl HasCreatedAt for ChatToolCall { - fn created_at(&self) -> jiff::Timestamp { - self.started_at.into() - } -} diff --git a/crates/nvisy-postgres/src/model/document.rs b/crates/nvisy-postgres/src/model/document.rs deleted file mode 100644 index ff92e47..0000000 --- a/crates/nvisy-postgres/src/model/document.rs +++ /dev/null @@ -1,143 +0,0 @@ -//! Main document model for PostgreSQL database operations. - -use diesel::prelude::*; -use jiff_diesel::Timestamp; -use uuid::Uuid; - -use crate::schema::documents; -use crate::types::{HasCreatedAt, HasDeletedAt, HasUpdatedAt, Tags}; - -/// Main document model representing a document within a workspace. -#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = documents)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct Document { - /// Unique document identifier. - pub id: Uuid, - /// Reference to the workspace this document belongs to. - pub workspace_id: Uuid, - /// Reference to the account that owns this document. - pub account_id: Uuid, - /// Human-readable document name. - pub display_name: String, - /// Detailed description of the document. - pub description: Option, - /// Tags for document classification and search. - pub tags: Vec>, - /// Additional document metadata. - pub metadata: serde_json::Value, - /// Timestamp when the document was created. - pub created_at: Timestamp, - /// Timestamp when the document was last updated. - pub updated_at: Timestamp, - /// Timestamp when the document was soft-deleted. - pub deleted_at: Option, -} - -/// Data for creating a new document. -#[derive(Debug, Default, Clone, Insertable)] -#[diesel(table_name = documents)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocument { - /// Workspace ID. - pub workspace_id: Uuid, - /// Account ID. - pub account_id: Uuid, - /// Document name. - pub display_name: Option, - /// Document description. - pub description: Option, - /// Document tags. - pub tags: Option>>, - /// Metadata. - pub metadata: Option, -} - -/// Data for updating a document. -#[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = documents)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocument { - /// Document name. - pub display_name: Option, - /// Document description. - pub description: Option>, - /// Document tags. - pub tags: Option>>, - /// Metadata. - pub metadata: Option, -} - -impl Document { - /// Returns the flattened tags (removing None values). - pub fn tags(&self) -> Vec { - let tags = self.tags.clone(); - tags.into_iter().flatten().collect() - } - - /// Returns whether the document is deleted. - pub fn is_deleted(&self) -> bool { - self.deleted_at.is_some() - } - - /// Returns whether the document has tags. - pub fn has_tags(&self) -> bool { - !self.tags.is_empty() - } - - /// Returns whether the document contains a specific tag. - pub fn has_tag(&self, tag: &str) -> bool { - self.tags - .iter() - .any(|t| t.as_ref() == Some(&tag.to_string())) - } - - /// Returns the flattened tags (removing None values). - pub fn get_tags(&self) -> Vec { - self.tags.iter().filter_map(|tag| tag.clone()).collect() - } - - /// Returns the tags as a Tags helper. - pub fn tags_helper(&self) -> Tags { - Tags::from_optional_strings(self.tags.clone()) - } - - /// Returns whether the document has a description. - pub fn has_description(&self) -> bool { - self.description - .as_deref() - .is_some_and(|desc| !desc.is_empty()) - } - - /// Returns whether the document has custom metadata. - pub fn has_metadata(&self) -> bool { - !self.metadata.as_object().is_none_or(|obj| obj.is_empty()) - } - - /// Returns the document's display name or a default. - pub fn display_name_or_default(&self) -> &str { - if self.display_name.is_empty() { - "Untitled Document" - } else { - &self.display_name - } - } -} - -impl HasCreatedAt for Document { - fn created_at(&self) -> jiff::Timestamp { - self.created_at.into() - } -} - -impl HasUpdatedAt for Document { - fn updated_at(&self) -> jiff::Timestamp { - self.updated_at.into() - } -} - -impl HasDeletedAt for Document { - fn deleted_at(&self) -> Option { - self.deleted_at.map(Into::into) - } -} diff --git a/crates/nvisy-postgres/src/model/document_comment.rs b/crates/nvisy-postgres/src/model/document_comment.rs deleted file mode 100644 index 91d2c20..0000000 --- a/crates/nvisy-postgres/src/model/document_comment.rs +++ /dev/null @@ -1,152 +0,0 @@ -//! Document comment model for PostgreSQL database operations. - -use diesel::prelude::*; -use jiff_diesel::Timestamp; -use uuid::Uuid; - -use crate::schema::document_comments; -use crate::types::EDIT_GRACE_PERIOD_SECONDS; -use crate::types::{HasCreatedAt, HasDeletedAt, HasUpdatedAt}; - -/// Document comment model representing user discussions on files. -#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = document_comments)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct DocumentComment { - /// Unique comment identifier. - pub id: Uuid, - /// Reference to the parent file. - pub file_id: Uuid, - /// Reference to the account that authored this comment. - pub account_id: Uuid, - /// Parent comment for threaded replies (NULL for top-level comments). - pub parent_comment_id: Option, - /// Account being replied to (@mention). - pub reply_to_account_id: Option, - /// Comment text content. - pub content: String, - /// Additional comment metadata. - pub metadata: serde_json::Value, - /// Timestamp when the comment was created. - pub created_at: Timestamp, - /// Timestamp when the comment was last updated. - pub updated_at: Timestamp, - /// Timestamp when the comment was soft-deleted. - pub deleted_at: Option, -} - -/// Data for creating a new document comment. -#[derive(Debug, Default, Clone, Insertable)] -#[diesel(table_name = document_comments)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocumentComment { - /// File ID. - pub file_id: Uuid, - /// Account ID. - pub account_id: Uuid, - /// Parent comment ID for replies. - pub parent_comment_id: Option, - /// Reply to account ID (@mention). - pub reply_to_account_id: Option, - /// Comment content. - pub content: String, - /// Metadata. - pub metadata: Option, -} - -/// Data for updating a document comment. -#[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = document_comments)] -#[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocumentComment { - /// Comment content. - pub content: Option, - /// Metadata. - pub metadata: Option, -} - -impl DocumentComment { - /// Returns the comment content, or `None` if the comment is deleted. - pub fn get_content(&self) -> Option { - if self.is_deleted() { - None - } else { - Some(self.content.clone()) - } - } - - /// Returns whether this is a top-level comment (not a reply). - pub fn is_top_level(&self) -> bool { - self.parent_comment_id.is_none() - } - - /// Returns whether this is a reply to another comment. - pub fn is_reply(&self) -> bool { - self.parent_comment_id.is_some() - } - - /// Returns whether this comment mentions another account. - pub fn has_mention(&self) -> bool { - self.reply_to_account_id.is_some() - } - - /// Returns whether this comment is deleted. - pub fn is_deleted(&self) -> bool { - self.deleted_at.is_some() - } - - /// Returns whether this comment has been edited. - pub fn is_edited(&self) -> bool { - let duration = - jiff::Timestamp::from(self.updated_at) - jiff::Timestamp::from(self.created_at); - duration.get_seconds() > EDIT_GRACE_PERIOD_SECONDS - } -} - -impl NewDocumentComment { - /// Creates a new comment on a file. - pub fn for_file(file_id: Uuid, account_id: Uuid, content: String) -> Self { - Self { - file_id, - account_id, - content, - ..Default::default() - } - } - - /// Sets the parent comment ID for threaded replies. - pub fn with_parent(mut self, parent_comment_id: Uuid) -> Self { - self.parent_comment_id = Some(parent_comment_id); - self - } - - /// Sets the reply-to account ID for @mentions. - pub fn with_reply_to(mut self, reply_to_account_id: Uuid) -> Self { - self.reply_to_account_id = Some(reply_to_account_id); - self - } - - /// Sets custom metadata for the comment. - pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self { - self.metadata = Some(metadata); - self - } -} - -impl HasCreatedAt for DocumentComment { - fn created_at(&self) -> jiff::Timestamp { - self.created_at.into() - } -} - -impl HasUpdatedAt for DocumentComment { - fn updated_at(&self) -> jiff::Timestamp { - self.updated_at.into() - } -} - -impl HasDeletedAt for DocumentComment { - fn deleted_at(&self) -> Option { - self.deleted_at.map(Into::into) - } -} diff --git a/crates/nvisy-postgres/src/model/document_file.rs b/crates/nvisy-postgres/src/model/file.rs similarity index 56% rename from crates/nvisy-postgres/src/model/document_file.rs rename to crates/nvisy-postgres/src/model/file.rs index 19896b2..dc22d7f 100644 --- a/crates/nvisy-postgres/src/model/document_file.rs +++ b/crates/nvisy-postgres/src/model/file.rs @@ -1,30 +1,24 @@ -//! Document file model for PostgreSQL database operations. +//! File model for PostgreSQL database operations. use diesel::prelude::*; use jiff_diesel::Timestamp; use uuid::Uuid; -use crate::schema::document_files; -use crate::types::RECENTLY_UPLOADED_HOURS; -use crate::types::{ - ContentSegmentation, FileSource, HasCreatedAt, HasDeletedAt, HasUpdatedAt, ProcessingStatus, - RequireMode, -}; +use crate::schema::files; +use crate::types::{FileSource, HasCreatedAt, HasDeletedAt, HasUpdatedAt, RECENTLY_UPLOADED_HOURS}; -/// Document file model representing a file attached to a document. +/// File model representing a file stored in the system. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = document_files)] +#[diesel(table_name = files)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct DocumentFile { +pub struct File { /// Unique file identifier. pub id: Uuid, - /// Reference to the workspace this file belongs to (required). + /// Reference to the workspace this file belongs to. pub workspace_id: Uuid, - /// Reference to the document this file belongs to (optional). - pub document_id: Option, /// Reference to the account that owns this file. pub account_id: Uuid, - /// Parent file reference for hierarchical relationships or version chains. + /// Parent file reference for version chains. pub parent_id: Option, /// Version number (1 for original, increments for new versions). pub version_number: i32, @@ -34,22 +28,12 @@ pub struct DocumentFile { pub original_filename: String, /// File extension (without the dot). pub file_extension: String, + /// MIME type of the file. + pub mime_type: Option, /// Classification tags. pub tags: Vec>, /// How the file was created (uploaded, imported, generated). pub source: FileSource, - /// Processing mode requirements. - pub require_mode: RequireMode, - /// Processing priority (higher numbers = higher priority). - pub processing_priority: i32, - /// Current processing status. - pub processing_status: ProcessingStatus, - /// Whether file content has been indexed for search. - pub is_indexed: bool, - /// Content segmentation strategy. - pub content_segmentation: ContentSegmentation, - /// Whether to enable visual content processing. - pub visual_support: bool, /// File size in bytes. pub file_size_bytes: i64, /// SHA-256 hash of the file. @@ -68,18 +52,16 @@ pub struct DocumentFile { pub deleted_at: Option, } -/// Data for creating a new document file. +/// Data for creating a new file. #[derive(Debug, Default, Clone, Insertable)] -#[diesel(table_name = document_files)] +#[diesel(table_name = files)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocumentFile { +pub struct NewFile { /// Workspace ID (required). pub workspace_id: Uuid, - /// Document ID (optional). - pub document_id: Option, /// Account ID. pub account_id: Uuid, - /// Parent file ID (for derived files or version chains). + /// Parent file ID (for version chains). pub parent_id: Option, /// Display name. pub display_name: Option, @@ -87,75 +69,46 @@ pub struct NewDocumentFile { pub original_filename: Option, /// File extension. pub file_extension: Option, - /// Tags + /// MIME type. + pub mime_type: Option, + /// Tags. pub tags: Option>>, /// How the file was created. pub source: Option, - /// Require mode - pub require_mode: Option, - /// Processing priority - pub processing_priority: Option, - /// Processing status - pub processing_status: Option, - /// Is indexed flag. - pub is_indexed: Option, - /// Content segmentation - pub content_segmentation: Option, - /// Visual support - pub visual_support: Option, - /// File size in bytes + /// File size in bytes. pub file_size_bytes: i64, - /// SHA-256 hash + /// SHA-256 hash. pub file_hash_sha256: Vec, - /// Storage path + /// Storage path. pub storage_path: String, - /// Storage bucket + /// Storage bucket. pub storage_bucket: String, - /// Metadata + /// Metadata. pub metadata: Option, } -/// Data for updating a document file. +/// Data for updating a file. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = document_files)] +#[diesel(table_name = files)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocumentFile { - // Note: workspace_id is required and should not be updated after creation - /// Document ID - pub document_id: Option>, - /// Storage path (for moving files between buckets) - pub storage_path: Option, - /// Display name +pub struct UpdateFile { + /// Display name. pub display_name: Option, - /// Parent file ID + /// Parent file ID. pub parent_id: Option>, - /// Tags + /// Tags. pub tags: Option>>, /// How the file was created. pub source: Option, - /// Require mode - pub require_mode: Option, - /// Processing priority - pub processing_priority: Option, - /// Processing status - pub processing_status: Option, - /// Is indexed flag - pub is_indexed: Option, - /// Content segmentation - pub content_segmentation: Option, - /// Visual support - pub visual_support: Option, - /// File size in bytes - pub file_size_bytes: Option, - /// SHA-256 hash - pub file_hash_sha256: Option>, - /// Metadata + /// MIME type. + pub mime_type: Option>, + /// Metadata. pub metadata: Option, - /// Soft delete timestamp + /// Soft delete timestamp. pub deleted_at: Option>, } -impl DocumentFile { +impl File { /// Returns whether the file was uploaded recently. pub fn is_recently_uploaded(&self) -> bool { self.was_created_within(jiff::Span::new().hours(RECENTLY_UPLOADED_HOURS)) @@ -166,26 +119,6 @@ impl DocumentFile { self.deleted_at.is_some() } - /// Returns whether the file is ready for use. - pub fn is_ready(&self) -> bool { - self.processing_status.is_ready() - } - - /// Returns whether the file is currently being processed. - pub fn is_processing(&self) -> bool { - self.processing_status.is_processing() - } - - /// Returns whether the file has completed processing. - pub fn is_processed(&self) -> bool { - self.processing_status.is_final() - } - - /// Returns whether the file processing was canceled. - pub fn is_canceled(&self) -> bool { - self.processing_status.is_canceled() - } - /// Returns the file size in a human-readable format. pub fn file_size_human(&self) -> String { let bytes = self.file_size_bytes as f64; @@ -249,41 +182,30 @@ impl DocumentFile { .collect() } - /// Returns the processing priority level description. - pub fn priority_description(&self) -> &'static str { - match self.processing_priority { - p if p >= 9 => "Critical", - p if p >= 7 => "High", - p if p >= 5 => "Medium", - p if p >= 3 => "Low", - _ => "Minimal", - } - } - /// Returns whether this is the original version (version 1). pub fn is_original_version(&self) -> bool { self.version_number == 1 } /// Returns whether this file is a newer version of another file. - pub fn is_version_of(&self, other: &DocumentFile) -> bool { + pub fn is_version_of(&self, other: &File) -> bool { self.parent_id == Some(other.id) && self.version_number > other.version_number } } -impl HasCreatedAt for DocumentFile { +impl HasCreatedAt for File { fn created_at(&self) -> jiff::Timestamp { self.created_at.into() } } -impl HasUpdatedAt for DocumentFile { +impl HasUpdatedAt for File { fn updated_at(&self) -> jiff::Timestamp { self.updated_at.into() } } -impl HasDeletedAt for DocumentFile { +impl HasDeletedAt for File { fn deleted_at(&self) -> Option { self.deleted_at.map(Into::into) } diff --git a/crates/nvisy-postgres/src/model/document_annotation.rs b/crates/nvisy-postgres/src/model/file_annotation.rs similarity index 76% rename from crates/nvisy-postgres/src/model/document_annotation.rs rename to crates/nvisy-postgres/src/model/file_annotation.rs index 3017bd0..ab7bae2 100644 --- a/crates/nvisy-postgres/src/model/document_annotation.rs +++ b/crates/nvisy-postgres/src/model/file_annotation.rs @@ -1,26 +1,26 @@ -//! Document annotation model for PostgreSQL database operations. +//! File annotation model for PostgreSQL database operations. use diesel::prelude::*; use jiff_diesel::Timestamp; use uuid::Uuid; -use crate::schema::document_annotations; +use crate::schema::file_annotations; use crate::types::{AnnotationType, HasCreatedAt, HasDeletedAt, HasUpdatedAt}; -/// Document annotation model representing user annotations on document content. +/// File annotation model representing user annotations on file content. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] -#[diesel(table_name = document_annotations)] +#[diesel(table_name = file_annotations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct DocumentAnnotation { +pub struct FileAnnotation { /// Unique annotation identifier. pub id: Uuid, - /// Reference to the document file this annotation belongs to. - pub document_file_id: Uuid, + /// Reference to the file this annotation belongs to. + pub file_id: Uuid, /// Reference to the account that created this annotation. pub account_id: Uuid, /// Annotation text content. pub content: String, - /// Type of annotation (note, highlight). + /// Type of annotation (annotation, highlight). pub annotation_type: AnnotationType, /// Extended metadata including position/location. pub metadata: serde_json::Value, @@ -32,13 +32,13 @@ pub struct DocumentAnnotation { pub deleted_at: Option, } -/// Data for creating a new document annotation. +/// Data for creating a new file annotation. #[derive(Debug, Clone, Insertable)] -#[diesel(table_name = document_annotations)] +#[diesel(table_name = file_annotations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocumentAnnotation { - /// Document file ID. - pub document_file_id: Uuid, +pub struct NewFileAnnotation { + /// File ID. + pub file_id: Uuid, /// Account ID. pub account_id: Uuid, /// Annotation content. @@ -49,20 +49,22 @@ pub struct NewDocumentAnnotation { pub metadata: Option, } -/// Data for updating a document annotation. +/// Data for updating a file annotation. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = document_annotations)] +#[diesel(table_name = file_annotations)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocumentAnnotation { +pub struct UpdateFileAnnotation { /// Annotation content. pub content: Option, /// Annotation type. pub annotation_type: Option, /// Metadata. pub metadata: Option, + /// Soft delete timestamp. + pub deleted_at: Option>, } -impl DocumentAnnotation { +impl FileAnnotation { /// Returns whether the annotation was created recently. pub fn is_recent(&self) -> bool { self.was_created_within(jiff::Span::new().hours(24)) @@ -104,19 +106,19 @@ impl DocumentAnnotation { } } -impl HasCreatedAt for DocumentAnnotation { +impl HasCreatedAt for FileAnnotation { fn created_at(&self) -> jiff::Timestamp { self.created_at.into() } } -impl HasUpdatedAt for DocumentAnnotation { +impl HasUpdatedAt for FileAnnotation { fn updated_at(&self) -> jiff::Timestamp { self.updated_at.into() } } -impl HasDeletedAt for DocumentAnnotation { +impl HasDeletedAt for FileAnnotation { fn deleted_at(&self) -> Option { self.deleted_at.map(Into::into) } diff --git a/crates/nvisy-postgres/src/model/document_chunk.rs b/crates/nvisy-postgres/src/model/file_chunk.rs similarity index 74% rename from crates/nvisy-postgres/src/model/document_chunk.rs rename to crates/nvisy-postgres/src/model/file_chunk.rs index 86b53eb..c26ce84 100644 --- a/crates/nvisy-postgres/src/model/document_chunk.rs +++ b/crates/nvisy-postgres/src/model/file_chunk.rs @@ -1,22 +1,22 @@ -//! Document chunk model for PostgreSQL database operations. +//! File chunk model for PostgreSQL database operations. use diesel::prelude::*; use jiff_diesel::Timestamp; use pgvector::Vector; use uuid::Uuid; -use crate::schema::document_chunks; +use crate::schema::file_chunks; use crate::types::{HasCreatedAt, HasUpdatedAt}; -/// Document chunk model representing a text segment from a document file. +/// File chunk model representing a text segment from a file. /// /// Chunks are used for semantic search via vector embeddings. Each chunk -/// represents a portion of a document with its embedding vector for +/// represents a portion of a file with its embedding vector for /// similarity search. #[derive(Debug, Clone, Queryable, Selectable)] -#[diesel(table_name = document_chunks)] +#[diesel(table_name = file_chunks)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct DocumentChunk { +pub struct FileChunk { /// Unique chunk identifier. pub id: Uuid, /// Reference to the file this chunk belongs to. @@ -41,11 +41,11 @@ pub struct DocumentChunk { pub updated_at: Timestamp, } -/// Data for creating a new document chunk. +/// Data for creating a new file chunk. #[derive(Debug, Clone, Insertable)] -#[diesel(table_name = document_chunks)] +#[diesel(table_name = file_chunks)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct NewDocumentChunk { +pub struct NewFileChunk { /// File ID (required). pub file_id: Uuid, /// Chunk index within the file. @@ -56,19 +56,19 @@ pub struct NewDocumentChunk { pub content_size: Option, /// Token count. pub token_count: Option, - /// Vector embedding. + /// Vector embedding (required). pub embedding: Vector, - /// Embedding model name. - pub embedding_model: Option, + /// Embedding model name (required). + pub embedding_model: String, /// Metadata. pub metadata: Option, } -/// Data for updating a document chunk. +/// Data for updating a file chunk. #[derive(Debug, Clone, Default, AsChangeset)] -#[diesel(table_name = document_chunks)] +#[diesel(table_name = file_chunks)] #[diesel(check_for_backend(diesel::pg::Pg))] -pub struct UpdateDocumentChunk { +pub struct UpdateFileChunk { /// Token count. pub token_count: Option, /// Vector embedding. @@ -79,7 +79,7 @@ pub struct UpdateDocumentChunk { pub metadata: Option, } -impl DocumentChunk { +impl FileChunk { /// Returns whether the chunk has custom metadata. pub fn has_metadata(&self) -> bool { !self.metadata.as_object().is_none_or(|obj| obj.is_empty()) @@ -91,32 +91,32 @@ impl DocumentChunk { } } -impl HasCreatedAt for DocumentChunk { +impl HasCreatedAt for FileChunk { fn created_at(&self) -> jiff::Timestamp { self.created_at.into() } } -impl HasUpdatedAt for DocumentChunk { +impl HasUpdatedAt for FileChunk { fn updated_at(&self) -> jiff::Timestamp { self.updated_at.into() } } -/// A document chunk with its similarity score. +/// A file chunk with its similarity score. /// /// Returned from similarity search queries. #[derive(Debug, Clone)] -pub struct ScoredDocumentChunk { - /// The document chunk. - pub chunk: DocumentChunk, +pub struct ScoredFileChunk { + /// The file chunk. + pub chunk: FileChunk, /// Similarity score (0.0 to 1.0, higher is more similar). pub score: f64, } -impl ScoredDocumentChunk { +impl ScoredFileChunk { /// Returns a reference to the chunk. - pub fn chunk(&self) -> &DocumentChunk { + pub fn chunk(&self) -> &FileChunk { &self.chunk } @@ -126,7 +126,7 @@ impl ScoredDocumentChunk { } /// Consumes self and returns the inner chunk. - pub fn into_chunk(self) -> DocumentChunk { + pub fn into_chunk(self) -> FileChunk { self.chunk } } diff --git a/crates/nvisy-postgres/src/model/mod.rs b/crates/nvisy-postgres/src/model/mod.rs index 2f5b02e..ce70186 100644 --- a/crates/nvisy-postgres/src/model/mod.rs +++ b/crates/nvisy-postgres/src/model/mod.rs @@ -7,11 +7,11 @@ mod account; mod account_action_token; mod account_api_token; mod account_notification; -mod document; -mod document_annotation; -mod document_chunk; -mod document_comment; -mod document_file; +mod file; +mod file_annotation; +mod file_chunk; +mod pipeline; +mod pipeline_run; mod workspace; mod workspace_activity; @@ -21,10 +21,6 @@ mod workspace_invite; mod workspace_member; mod workspace_webhook; -mod chat_operation; -mod chat_session; -mod chat_tool_call; - // Account models pub use account::{Account, NewAccount, UpdateAccount}; pub use account_action_token::{ @@ -34,16 +30,16 @@ pub use account_api_token::{AccountApiToken, NewAccountApiToken, UpdateAccountAp pub use account_notification::{ AccountNotification, NewAccountNotification, UpdateAccountNotification, }; -// Document models -pub use document::{Document, NewDocument, UpdateDocument}; -pub use document_annotation::{ - DocumentAnnotation, NewDocumentAnnotation, UpdateDocumentAnnotation, -}; -pub use document_chunk::{ - DocumentChunk, NewDocumentChunk, ScoredDocumentChunk, UpdateDocumentChunk, -}; -pub use document_comment::{DocumentComment, NewDocumentComment, UpdateDocumentComment}; -pub use document_file::{DocumentFile, NewDocumentFile, UpdateDocumentFile}; + +// File models +pub use file::{File, NewFile, UpdateFile}; +pub use file_annotation::{FileAnnotation, NewFileAnnotation, UpdateFileAnnotation}; +pub use file_chunk::{FileChunk, NewFileChunk, ScoredFileChunk, UpdateFileChunk}; + +// Pipeline models +pub use pipeline::{NewPipeline, Pipeline, UpdatePipeline}; +pub use pipeline_run::{NewPipelineRun, PipelineRun, UpdatePipelineRun}; + // Workspace models pub use workspace::{NewWorkspace, UpdateWorkspace, Workspace}; pub use workspace_activity::{NewWorkspaceActivity, WorkspaceActivity}; @@ -56,7 +52,3 @@ pub use workspace_integration_run::{ pub use workspace_invite::{NewWorkspaceInvite, UpdateWorkspaceInvite, WorkspaceInvite}; pub use workspace_member::{NewWorkspaceMember, UpdateWorkspaceMember, WorkspaceMember}; pub use workspace_webhook::{NewWorkspaceWebhook, UpdateWorkspaceWebhook, WorkspaceWebhook}; -// Chat models -pub use chat_operation::{ChatOperation, NewChatOperation, UpdateChatOperation}; -pub use chat_session::{ChatSession, NewChatSession, UpdateChatSession}; -pub use chat_tool_call::{ChatToolCall, NewChatToolCall, UpdateChatToolCall}; diff --git a/crates/nvisy-postgres/src/model/pipeline.rs b/crates/nvisy-postgres/src/model/pipeline.rs new file mode 100644 index 0000000..399e067 --- /dev/null +++ b/crates/nvisy-postgres/src/model/pipeline.rs @@ -0,0 +1,147 @@ +//! Pipeline model for PostgreSQL database operations. + +use diesel::prelude::*; +use jiff_diesel::Timestamp; +use uuid::Uuid; + +use crate::schema::pipelines; +use crate::types::{HasCreatedAt, HasDeletedAt, HasUpdatedAt, PipelineStatus}; + +/// Pipeline model representing a workflow definition in the system. +#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] +#[diesel(table_name = pipelines)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct Pipeline { + /// Unique pipeline identifier. + pub id: Uuid, + /// Reference to the workspace this pipeline belongs to. + pub workspace_id: Uuid, + /// Reference to the account that created this pipeline. + pub account_id: Uuid, + /// Pipeline name. + pub name: String, + /// Pipeline description. + pub description: Option, + /// Pipeline lifecycle status. + pub status: PipelineStatus, + /// Pipeline definition (steps, input/output schemas, etc.). + pub definition: serde_json::Value, + /// Extended metadata. + pub metadata: serde_json::Value, + /// Timestamp when the pipeline was created. + pub created_at: Timestamp, + /// Timestamp when the pipeline was last updated. + pub updated_at: Timestamp, + /// Timestamp when the pipeline was soft-deleted. + pub deleted_at: Option, +} + +/// Data for creating a new pipeline. +#[derive(Debug, Default, Clone, Insertable)] +#[diesel(table_name = pipelines)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct NewPipeline { + /// Workspace ID (required). + pub workspace_id: Uuid, + /// Account ID (required). + pub account_id: Uuid, + /// Pipeline name. + pub name: String, + /// Pipeline description. + pub description: Option, + /// Pipeline status. + pub status: Option, + /// Pipeline definition. + pub definition: Option, + /// Metadata. + pub metadata: Option, +} + +/// Data for updating a pipeline. +#[derive(Debug, Clone, Default, AsChangeset)] +#[diesel(table_name = pipelines)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct UpdatePipeline { + /// Pipeline name. + pub name: Option, + /// Pipeline description. + pub description: Option>, + /// Pipeline status. + pub status: Option, + /// Pipeline definition. + pub definition: Option, + /// Metadata. + pub metadata: Option, + /// Soft delete timestamp. + pub deleted_at: Option>, +} + +impl Pipeline { + /// Returns whether the pipeline is deleted. + pub fn is_deleted(&self) -> bool { + self.deleted_at.is_some() + } + + /// Returns whether the pipeline is in draft status. + pub fn is_draft(&self) -> bool { + self.status.is_draft() + } + + /// Returns whether the pipeline is active. + pub fn is_active(&self) -> bool { + self.status.is_active() + } + + /// Returns whether the pipeline is disabled. + pub fn is_disabled(&self) -> bool { + self.status.is_disabled() + } + + /// Returns whether the pipeline can be executed. + pub fn is_runnable(&self) -> bool { + self.status.is_runnable() && !self.is_deleted() + } + + /// Returns whether the pipeline can be edited. + pub fn is_editable(&self) -> bool { + self.status.is_editable() && !self.is_deleted() + } + + /// Returns whether the pipeline has a description. + pub fn has_description(&self) -> bool { + self.description.as_ref().is_some_and(|d| !d.is_empty()) + } + + /// Returns whether the pipeline has custom metadata. + pub fn has_metadata(&self) -> bool { + !self.metadata.as_object().is_none_or(|obj| obj.is_empty()) + } + + /// Returns the steps from the definition, if any. + pub fn steps(&self) -> Option<&Vec> { + self.definition.get("steps")?.as_array() + } + + /// Returns the number of steps in the pipeline. + pub fn step_count(&self) -> usize { + self.steps().map_or(0, |s| s.len()) + } +} + +impl HasCreatedAt for Pipeline { + fn created_at(&self) -> jiff::Timestamp { + self.created_at.into() + } +} + +impl HasUpdatedAt for Pipeline { + fn updated_at(&self) -> jiff::Timestamp { + self.updated_at.into() + } +} + +impl HasDeletedAt for Pipeline { + fn deleted_at(&self) -> Option { + self.deleted_at.map(Into::into) + } +} diff --git a/crates/nvisy-postgres/src/model/pipeline_run.rs b/crates/nvisy-postgres/src/model/pipeline_run.rs new file mode 100644 index 0000000..f6509af --- /dev/null +++ b/crates/nvisy-postgres/src/model/pipeline_run.rs @@ -0,0 +1,177 @@ +//! Pipeline run model for PostgreSQL database operations. + +use diesel::prelude::*; +use jiff_diesel::Timestamp; +use uuid::Uuid; + +use crate::schema::pipeline_runs; +use crate::types::{HasCreatedAt, PipelineRunStatus, PipelineTriggerType}; + +/// Pipeline run model representing an execution instance of a pipeline. +#[derive(Debug, Clone, PartialEq, Queryable, Selectable)] +#[diesel(table_name = pipeline_runs)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct PipelineRun { + /// Unique run identifier. + pub id: Uuid, + /// Reference to the pipeline definition. + pub pipeline_id: Uuid, + /// Reference to the workspace. + pub workspace_id: Uuid, + /// Account that triggered the run. + pub account_id: Uuid, + /// How the run was initiated. + pub trigger_type: PipelineTriggerType, + /// Current execution status. + pub status: PipelineRunStatus, + /// Runtime input configuration. + pub input_config: serde_json::Value, + /// Runtime output configuration. + pub output_config: serde_json::Value, + /// Pipeline definition snapshot at run time. + pub definition_snapshot: serde_json::Value, + /// Error details if run failed. + pub error: Option, + /// Run metrics (duration, resources, etc.). + pub metrics: serde_json::Value, + /// When execution started. + pub started_at: Option, + /// When execution completed. + pub completed_at: Option, + /// When run was created/queued. + pub created_at: Timestamp, +} + +/// Data for creating a new pipeline run. +#[derive(Debug, Default, Clone, Insertable)] +#[diesel(table_name = pipeline_runs)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct NewPipelineRun { + /// Pipeline ID (required). + pub pipeline_id: Uuid, + /// Workspace ID (required). + pub workspace_id: Uuid, + /// Account ID (required). + pub account_id: Uuid, + /// Trigger type. + pub trigger_type: Option, + /// Initial status. + pub status: Option, + /// Input configuration. + pub input_config: Option, + /// Output configuration. + pub output_config: Option, + /// Definition snapshot. + pub definition_snapshot: serde_json::Value, + /// Metrics. + pub metrics: Option, +} + +/// Data for updating a pipeline run. +#[derive(Debug, Clone, Default, AsChangeset)] +#[diesel(table_name = pipeline_runs)] +#[diesel(check_for_backend(diesel::pg::Pg))] +pub struct UpdatePipelineRun { + /// Execution status. + pub status: Option, + /// Output configuration. + pub output_config: Option, + /// Error details. + pub error: Option>, + /// Metrics. + pub metrics: Option, + /// When execution started. + pub started_at: Option>, + /// When execution completed. + pub completed_at: Option>, +} + +impl PipelineRun { + /// Returns whether the run is queued. + pub fn is_queued(&self) -> bool { + self.status.is_queued() + } + + /// Returns whether the run is currently running. + pub fn is_running(&self) -> bool { + self.status.is_running() + } + + /// Returns whether the run completed successfully. + pub fn is_completed(&self) -> bool { + self.status.is_completed() + } + + /// Returns whether the run failed. + pub fn is_failed(&self) -> bool { + self.status.is_failed() + } + + /// Returns whether the run was cancelled. + pub fn is_cancelled(&self) -> bool { + self.status.is_cancelled() + } + + /// Returns whether the run is still active (queued or running). + pub fn is_active(&self) -> bool { + self.status.is_active() + } + + /// Returns whether the run has finished (completed, failed, or cancelled). + pub fn is_finished(&self) -> bool { + self.status.is_finished() + } + + /// Returns whether the run has an error. + pub fn has_error(&self) -> bool { + self.error.is_some() + } + + /// Returns the error message if present. + pub fn error_message(&self) -> Option<&str> { + self.error + .as_ref() + .and_then(|e| e.get("message")) + .and_then(|m| m.as_str()) + } + + /// Returns the duration of the run in seconds, if available. + pub fn duration_seconds(&self) -> Option { + let started = self.started_at?; + let completed = self.completed_at?; + let started_ts: jiff::Timestamp = started.into(); + let completed_ts: jiff::Timestamp = completed.into(); + Some(completed_ts.duration_since(started_ts).as_secs_f64()) + } + + /// Returns whether the run was manually triggered. + pub fn is_manual(&self) -> bool { + self.trigger_type.is_manual() + } + + /// Returns whether the run was triggered automatically. + pub fn is_automatic(&self) -> bool { + self.trigger_type.is_automatic() + } + + /// Returns whether the run can be retried. + pub fn is_retriable(&self) -> bool { + self.status.is_retriable() + } + + /// Returns the steps from the definition snapshot. + pub fn steps(&self) -> Option<&Vec> { + self.definition_snapshot.get("steps")?.as_array() + } + + /// Returns the number of steps in the run. + pub fn step_count(&self) -> usize { + self.steps().map_or(0, |s| s.len()) + } +} + +impl HasCreatedAt for PipelineRun { + fn created_at(&self) -> jiff::Timestamp { + self.created_at.into() + } +} diff --git a/crates/nvisy-postgres/src/query/chat_operation.rs b/crates/nvisy-postgres/src/query/chat_operation.rs deleted file mode 100644 index af2107f..0000000 --- a/crates/nvisy-postgres/src/query/chat_operation.rs +++ /dev/null @@ -1,371 +0,0 @@ -//! Chat operation repository for managing document operations (diffs). - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use uuid::Uuid; - -use crate::model::{ChatOperation, NewChatOperation, UpdateChatOperation}; -use crate::types::{CursorPage, CursorPagination, OffsetPagination}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for chat operation database operations. -/// -/// Handles document operation tracking including CRUD operations, apply/revert -/// state management, and querying by tool call or file. -pub trait ChatOperationRepository { - /// Creates a new chat operation. - fn create_chat_operation( - &mut self, - operation: NewChatOperation, - ) -> impl Future> + Send; - - /// Creates multiple chat operations in a batch. - fn create_chat_operations( - &mut self, - operations: Vec, - ) -> impl Future>> + Send; - - /// Finds a chat operation by its unique identifier. - fn find_chat_operation_by_id( - &mut self, - operation_id: Uuid, - ) -> impl Future>> + Send; - - /// Updates an existing chat operation. - fn update_chat_operation( - &mut self, - operation_id: Uuid, - changes: UpdateChatOperation, - ) -> impl Future> + Send; - - /// Deletes a chat operation. - fn delete_chat_operation( - &mut self, - operation_id: Uuid, - ) -> impl Future> + Send; - - /// Lists operations for a tool call. - fn list_tool_call_operations( - &mut self, - tool_call_id: Uuid, - ) -> impl Future>> + Send; - - /// Lists operations for a file with offset pagination. - fn offset_list_file_operations( - &mut self, - file_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists operations for a file with cursor pagination. - fn cursor_list_file_operations( - &mut self, - file_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists pending (unapplied) operations for a file. - fn list_pending_file_operations( - &mut self, - file_id: Uuid, - ) -> impl Future>> + Send; - - /// Marks an operation as applied. - fn apply_chat_operation( - &mut self, - operation_id: Uuid, - ) -> impl Future> + Send; - - /// Marks multiple operations as applied. - fn apply_chat_operations( - &mut self, - operation_ids: Vec, - ) -> impl Future>> + Send; - - /// Marks an operation as reverted. - fn revert_chat_operation( - &mut self, - operation_id: Uuid, - ) -> impl Future> + Send; - - /// Counts operations by status for a file. - fn count_file_operations( - &mut self, - file_id: Uuid, - ) -> impl Future> + Send; -} - -/// Counts of operations by status for a file. -#[derive(Debug, Clone, Default)] -pub struct FileOperationCounts { - /// Total number of operations. - pub total: i64, - /// Number of applied operations. - pub applied: i64, - /// Number of pending (unapplied) operations. - pub pending: i64, - /// Number of reverted operations. - pub reverted: i64, -} - -impl ChatOperationRepository for PgConnection { - async fn create_chat_operation( - &mut self, - operation: NewChatOperation, - ) -> PgResult { - use schema::chat_operations; - - let operation = diesel::insert_into(chat_operations::table) - .values(&operation) - .returning(ChatOperation::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(operation) - } - - async fn create_chat_operations( - &mut self, - operations: Vec, - ) -> PgResult> { - use schema::chat_operations; - - let operations = diesel::insert_into(chat_operations::table) - .values(&operations) - .returning(ChatOperation::as_returning()) - .get_results(self) - .await - .map_err(PgError::from)?; - - Ok(operations) - } - - async fn find_chat_operation_by_id( - &mut self, - operation_id: Uuid, - ) -> PgResult> { - use schema::chat_operations::dsl::*; - - let operation = chat_operations - .filter(id.eq(operation_id)) - .select(ChatOperation::as_select()) - .first(self) - .await - .optional() - .map_err(PgError::from)?; - - Ok(operation) - } - - async fn update_chat_operation( - &mut self, - operation_id: Uuid, - changes: UpdateChatOperation, - ) -> PgResult { - use schema::chat_operations::dsl::*; - - let operation = diesel::update(chat_operations) - .filter(id.eq(operation_id)) - .set(&changes) - .returning(ChatOperation::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(operation) - } - - async fn delete_chat_operation(&mut self, operation_id: Uuid) -> PgResult<()> { - use schema::chat_operations::dsl::*; - - diesel::delete(chat_operations) - .filter(id.eq(operation_id)) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(()) - } - - async fn list_tool_call_operations(&mut self, tc_id: Uuid) -> PgResult> { - use schema::chat_operations::{self, dsl}; - - let operations = chat_operations::table - .filter(dsl::tool_call_id.eq(tc_id)) - .select(ChatOperation::as_select()) - .order(dsl::created_at.asc()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(operations) - } - - async fn offset_list_file_operations( - &mut self, - f_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::chat_operations::{self, dsl}; - - let operations = chat_operations::table - .filter(dsl::file_id.eq(f_id)) - .select(ChatOperation::as_select()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(operations) - } - - async fn cursor_list_file_operations( - &mut self, - f_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use schema::chat_operations::{self, dsl}; - - let total = if pagination.include_count { - Some( - chat_operations::table - .filter(dsl::file_id.eq(f_id)) - .count() - .get_result::(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let limit = pagination.limit + 1; - - let items: Vec = if let Some(cursor) = &pagination.after { - let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); - - chat_operations::table - .filter(dsl::file_id.eq(f_id)) - .filter( - dsl::created_at - .lt(&cursor_time) - .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), - ) - .select(ChatOperation::as_select()) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(limit) - .load(self) - .await - .map_err(PgError::from)? - } else { - chat_operations::table - .filter(dsl::file_id.eq(f_id)) - .select(ChatOperation::as_select()) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(limit) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new( - items, - total, - pagination.limit, - |op: &ChatOperation| (op.created_at.into(), op.id), - )) - } - - async fn list_pending_file_operations(&mut self, f_id: Uuid) -> PgResult> { - use schema::chat_operations::{self, dsl}; - - let operations = chat_operations::table - .filter(dsl::file_id.eq(f_id)) - .filter(dsl::applied.eq(false)) - .select(ChatOperation::as_select()) - .order(dsl::created_at.asc()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(operations) - } - - async fn apply_chat_operation(&mut self, operation_id: Uuid) -> PgResult { - let changes = UpdateChatOperation { - applied: Some(true), - applied_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), - ..Default::default() - }; - - self.update_chat_operation(operation_id, changes).await - } - - async fn apply_chat_operations( - &mut self, - operation_ids: Vec, - ) -> PgResult> { - use schema::chat_operations::dsl::*; - - let now = jiff_diesel::Timestamp::from(jiff::Timestamp::now()); - - let operations = diesel::update(chat_operations) - .filter(id.eq_any(&operation_ids)) - .set((applied.eq(true), applied_at.eq(Some(now)))) - .returning(ChatOperation::as_returning()) - .get_results(self) - .await - .map_err(PgError::from)?; - - Ok(operations) - } - - async fn revert_chat_operation(&mut self, operation_id: Uuid) -> PgResult { - let changes = UpdateChatOperation { - reverted: Some(true), - ..Default::default() - }; - - self.update_chat_operation(operation_id, changes).await - } - - async fn count_file_operations(&mut self, f_id: Uuid) -> PgResult { - use diesel::dsl::count_star; - use schema::chat_operations::{self, dsl}; - - let total = chat_operations::table - .filter(dsl::file_id.eq(f_id)) - .select(count_star()) - .get_result::(self) - .await - .map_err(PgError::from)?; - - let applied_count = chat_operations::table - .filter(dsl::file_id.eq(f_id)) - .filter(dsl::applied.eq(true)) - .select(count_star()) - .get_result::(self) - .await - .map_err(PgError::from)?; - - let reverted_count = chat_operations::table - .filter(dsl::file_id.eq(f_id)) - .filter(dsl::reverted.eq(true)) - .select(count_star()) - .get_result::(self) - .await - .map_err(PgError::from)?; - - Ok(FileOperationCounts { - total, - applied: applied_count, - pending: total - applied_count, - reverted: reverted_count, - }) - } -} diff --git a/crates/nvisy-postgres/src/query/chat_session.rs b/crates/nvisy-postgres/src/query/chat_session.rs deleted file mode 100644 index 5831af2..0000000 --- a/crates/nvisy-postgres/src/query/chat_session.rs +++ /dev/null @@ -1,291 +0,0 @@ -//! Chat session repository for managing LLM-assisted editing sessions. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use uuid::Uuid; - -use crate::model::{ChatSession, NewChatSession, UpdateChatSession}; -use crate::types::{ChatSessionStatus, CursorPage, CursorPagination, OffsetPagination}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for chat session database operations. -/// -/// Handles LLM-assisted editing session management including CRUD operations, -/// status tracking, and usage statistics updates. -pub trait ChatSessionRepository { - /// Creates a new chat session with the provided configuration. - fn create_chat_session( - &mut self, - session: NewChatSession, - ) -> impl Future> + Send; - - /// Finds a chat session by its unique identifier. - fn find_chat_session_by_id( - &mut self, - session_id: Uuid, - ) -> impl Future>> + Send; - - /// Updates an existing chat session. - fn update_chat_session( - &mut self, - session_id: Uuid, - changes: UpdateChatSession, - ) -> impl Future> + Send; - - /// Deletes a chat session by archiving it. - fn delete_chat_session( - &mut self, - session_id: Uuid, - ) -> impl Future> + Send; - - /// Lists chat sessions for a workspace with offset pagination. - fn offset_list_chat_sessions( - &mut self, - workspace_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists chat sessions for a workspace with cursor pagination. - fn cursor_list_chat_sessions( - &mut self, - workspace_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists chat sessions for an account with offset pagination. - fn offset_list_account_chat_sessions( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists active chat sessions for a file. - fn list_file_chat_sessions( - &mut self, - file_id: Uuid, - ) -> impl Future>> + Send; - - /// Updates the status of a chat session. - fn update_chat_session_status( - &mut self, - session_id: Uuid, - new_status: ChatSessionStatus, - ) -> impl Future> + Send; - - /// Increments the message and token counts for a session. - fn increment_chat_session_usage( - &mut self, - session_id: Uuid, - messages: i32, - tokens: i32, - ) -> impl Future> + Send; -} - -impl ChatSessionRepository for PgConnection { - async fn create_chat_session(&mut self, session: NewChatSession) -> PgResult { - use schema::chat_sessions; - - let session = diesel::insert_into(chat_sessions::table) - .values(&session) - .returning(ChatSession::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(session) - } - - async fn find_chat_session_by_id(&mut self, session_id: Uuid) -> PgResult> { - use schema::chat_sessions::dsl::*; - - let session = chat_sessions - .filter(id.eq(session_id)) - .select(ChatSession::as_select()) - .first(self) - .await - .optional() - .map_err(PgError::from)?; - - Ok(session) - } - - async fn update_chat_session( - &mut self, - session_id: Uuid, - changes: UpdateChatSession, - ) -> PgResult { - use schema::chat_sessions::dsl::*; - - let session = diesel::update(chat_sessions) - .filter(id.eq(session_id)) - .set(&changes) - .returning(ChatSession::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(session) - } - - async fn delete_chat_session(&mut self, session_id: Uuid) -> PgResult<()> { - use schema::chat_sessions::dsl::*; - - diesel::update(chat_sessions) - .filter(id.eq(session_id)) - .set(session_status.eq(ChatSessionStatus::Archived)) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(()) - } - - async fn offset_list_chat_sessions( - &mut self, - ws_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::chat_sessions::{self, dsl}; - - let sessions = chat_sessions::table - .filter(dsl::workspace_id.eq(ws_id)) - .select(ChatSession::as_select()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(sessions) - } - - async fn cursor_list_chat_sessions( - &mut self, - ws_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use schema::chat_sessions::{self, dsl}; - - let total = if pagination.include_count { - Some( - chat_sessions::table - .filter(dsl::workspace_id.eq(ws_id)) - .count() - .get_result::(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let limit = pagination.limit + 1; - - let items: Vec = if let Some(cursor) = &pagination.after { - let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); - - chat_sessions::table - .filter(dsl::workspace_id.eq(ws_id)) - .filter( - dsl::created_at - .lt(&cursor_time) - .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), - ) - .select(ChatSession::as_select()) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(limit) - .load(self) - .await - .map_err(PgError::from)? - } else { - chat_sessions::table - .filter(dsl::workspace_id.eq(ws_id)) - .select(ChatSession::as_select()) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(limit) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new( - items, - total, - pagination.limit, - |s: &ChatSession| (s.created_at.into(), s.id), - )) - } - - async fn offset_list_account_chat_sessions( - &mut self, - acc_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::chat_sessions::{self, dsl}; - - let sessions = chat_sessions::table - .filter(dsl::account_id.eq(acc_id)) - .select(ChatSession::as_select()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(sessions) - } - - async fn list_file_chat_sessions(&mut self, file_id: Uuid) -> PgResult> { - use schema::chat_sessions::{self, dsl}; - - let sessions = chat_sessions::table - .filter(dsl::primary_file_id.eq(file_id)) - .filter(dsl::session_status.ne(ChatSessionStatus::Archived)) - .select(ChatSession::as_select()) - .order(dsl::created_at.desc()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(sessions) - } - - async fn update_chat_session_status( - &mut self, - session_id: Uuid, - new_status: ChatSessionStatus, - ) -> PgResult { - let changes = UpdateChatSession { - session_status: Some(new_status), - ..Default::default() - }; - - self.update_chat_session(session_id, changes).await - } - - async fn increment_chat_session_usage( - &mut self, - session_id: Uuid, - messages: i32, - tokens: i32, - ) -> PgResult { - use schema::chat_sessions::dsl::*; - - let session = diesel::update(chat_sessions) - .filter(id.eq(session_id)) - .set(( - message_count.eq(message_count + messages), - token_count.eq(token_count + tokens), - )) - .returning(ChatSession::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(session) - } -} diff --git a/crates/nvisy-postgres/src/query/chat_tool_call.rs b/crates/nvisy-postgres/src/query/chat_tool_call.rs deleted file mode 100644 index d1bf40b..0000000 --- a/crates/nvisy-postgres/src/query/chat_tool_call.rs +++ /dev/null @@ -1,310 +0,0 @@ -//! Chat tool call repository for managing tool invocations within sessions. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use uuid::Uuid; - -use crate::model::{ChatToolCall, NewChatToolCall, UpdateChatToolCall}; -use crate::types::{ChatToolStatus, CursorPage, CursorPagination, OffsetPagination}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for chat tool call database operations. -/// -/// Handles tool invocation tracking including CRUD operations, status updates, -/// and querying by session, file, or status. -pub trait ChatToolCallRepository { - /// Creates a new chat tool call. - fn create_chat_tool_call( - &mut self, - tool_call: NewChatToolCall, - ) -> impl Future> + Send; - - /// Finds a chat tool call by its unique identifier. - fn find_chat_tool_call_by_id( - &mut self, - tool_call_id: Uuid, - ) -> impl Future>> + Send; - - /// Updates an existing chat tool call. - fn update_chat_tool_call( - &mut self, - tool_call_id: Uuid, - changes: UpdateChatToolCall, - ) -> impl Future> + Send; - - /// Deletes a chat tool call. - fn delete_chat_tool_call( - &mut self, - tool_call_id: Uuid, - ) -> impl Future> + Send; - - /// Lists tool calls for a session with offset pagination. - fn offset_list_session_tool_calls( - &mut self, - session_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists tool calls for a session with cursor pagination. - fn cursor_list_session_tool_calls( - &mut self, - session_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists tool calls for a file with offset pagination. - fn offset_list_file_tool_calls( - &mut self, - file_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists pending or running tool calls for a session. - fn list_active_session_tool_calls( - &mut self, - session_id: Uuid, - ) -> impl Future>> + Send; - - /// Updates the status of a tool call. - fn update_chat_tool_call_status( - &mut self, - tool_call_id: Uuid, - new_status: ChatToolStatus, - ) -> impl Future> + Send; - - /// Marks a tool call as completed with the given output. - fn complete_chat_tool_call( - &mut self, - tool_call_id: Uuid, - output: serde_json::Value, - ) -> impl Future> + Send; - - /// Cancels a pending or running tool call. - fn cancel_chat_tool_call( - &mut self, - tool_call_id: Uuid, - ) -> impl Future> + Send; -} - -impl ChatToolCallRepository for PgConnection { - async fn create_chat_tool_call( - &mut self, - tool_call: NewChatToolCall, - ) -> PgResult { - use schema::chat_tool_calls; - - let tool_call = diesel::insert_into(chat_tool_calls::table) - .values(&tool_call) - .returning(ChatToolCall::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(tool_call) - } - - async fn find_chat_tool_call_by_id( - &mut self, - tool_call_id: Uuid, - ) -> PgResult> { - use schema::chat_tool_calls::dsl::*; - - let tool_call = chat_tool_calls - .filter(id.eq(tool_call_id)) - .select(ChatToolCall::as_select()) - .first(self) - .await - .optional() - .map_err(PgError::from)?; - - Ok(tool_call) - } - - async fn update_chat_tool_call( - &mut self, - tool_call_id: Uuid, - changes: UpdateChatToolCall, - ) -> PgResult { - use schema::chat_tool_calls::dsl::*; - - let tool_call = diesel::update(chat_tool_calls) - .filter(id.eq(tool_call_id)) - .set(&changes) - .returning(ChatToolCall::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(tool_call) - } - - async fn delete_chat_tool_call(&mut self, tool_call_id: Uuid) -> PgResult<()> { - use schema::chat_tool_calls::dsl::*; - - diesel::delete(chat_tool_calls) - .filter(id.eq(tool_call_id)) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(()) - } - - async fn offset_list_session_tool_calls( - &mut self, - sess_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::chat_tool_calls::{self, dsl}; - - let tool_calls = chat_tool_calls::table - .filter(dsl::session_id.eq(sess_id)) - .select(ChatToolCall::as_select()) - .order(dsl::started_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(tool_calls) - } - - async fn cursor_list_session_tool_calls( - &mut self, - sess_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use schema::chat_tool_calls::{self, dsl}; - - let total = if pagination.include_count { - Some( - chat_tool_calls::table - .filter(dsl::session_id.eq(sess_id)) - .count() - .get_result::(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let limit = pagination.limit + 1; - - let items: Vec = if let Some(cursor) = &pagination.after { - let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); - - chat_tool_calls::table - .filter(dsl::session_id.eq(sess_id)) - .filter( - dsl::started_at - .lt(&cursor_time) - .or(dsl::started_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), - ) - .select(ChatToolCall::as_select()) - .order((dsl::started_at.desc(), dsl::id.desc())) - .limit(limit) - .load(self) - .await - .map_err(PgError::from)? - } else { - chat_tool_calls::table - .filter(dsl::session_id.eq(sess_id)) - .select(ChatToolCall::as_select()) - .order((dsl::started_at.desc(), dsl::id.desc())) - .limit(limit) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new( - items, - total, - pagination.limit, - |tc: &ChatToolCall| (tc.started_at.into(), tc.id), - )) - } - - async fn offset_list_file_tool_calls( - &mut self, - f_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::chat_tool_calls::{self, dsl}; - - let tool_calls = chat_tool_calls::table - .filter(dsl::file_id.eq(f_id)) - .select(ChatToolCall::as_select()) - .order(dsl::started_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(tool_calls) - } - - async fn list_active_session_tool_calls( - &mut self, - sess_id: Uuid, - ) -> PgResult> { - use schema::chat_tool_calls::{self, dsl}; - - let tool_calls = chat_tool_calls::table - .filter(dsl::session_id.eq(sess_id)) - .filter( - dsl::tool_status - .eq(ChatToolStatus::Pending) - .or(dsl::tool_status.eq(ChatToolStatus::Running)), - ) - .select(ChatToolCall::as_select()) - .order(dsl::started_at.asc()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(tool_calls) - } - - async fn update_chat_tool_call_status( - &mut self, - tool_call_id: Uuid, - new_status: ChatToolStatus, - ) -> PgResult { - let changes = UpdateChatToolCall { - tool_status: Some(new_status), - ..Default::default() - }; - - self.update_chat_tool_call(tool_call_id, changes).await - } - - async fn complete_chat_tool_call( - &mut self, - tool_call_id: Uuid, - output: serde_json::Value, - ) -> PgResult { - let changes = UpdateChatToolCall { - tool_output: Some(output), - tool_status: Some(ChatToolStatus::Completed), - completed_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), - }; - - self.update_chat_tool_call(tool_call_id, changes).await - } - - async fn cancel_chat_tool_call(&mut self, tool_call_id: Uuid) -> PgResult { - let changes = UpdateChatToolCall { - tool_status: Some(ChatToolStatus::Cancelled), - completed_at: Some(Some(jiff_diesel::Timestamp::from(jiff::Timestamp::now()))), - ..Default::default() - }; - - self.update_chat_tool_call(tool_call_id, changes).await - } -} diff --git a/crates/nvisy-postgres/src/query/document.rs b/crates/nvisy-postgres/src/query/document.rs deleted file mode 100644 index 857f0ba..0000000 --- a/crates/nvisy-postgres/src/query/document.rs +++ /dev/null @@ -1,340 +0,0 @@ -//! Document repository for managing document operations. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use pgtrgm::expression_methods::TrgmExpressionMethods; -use uuid::Uuid; - -use crate::model::{Document, NewDocument, UpdateDocument}; -use crate::types::{CursorPage, CursorPagination, OffsetPagination}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for document database operations. -/// -/// Handles document lifecycle management including creation, updates, -/// and search functionality. -pub trait DocumentRepository { - /// Creates a new document with the provided metadata. - fn create_document( - &mut self, - new_document: NewDocument, - ) -> impl Future> + Send; - - /// Finds a document by its unique identifier. - fn find_document_by_id( - &mut self, - document_id: Uuid, - ) -> impl Future>> + Send; - - /// Lists documents associated with a specific workspace with offset pagination. - fn offset_list_workspace_documents( - &mut self, - workspace_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists documents associated with a specific workspace with cursor pagination. - fn cursor_list_workspace_documents( - &mut self, - workspace_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists documents created by a specific account with offset pagination. - fn offset_list_account_documents( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists documents created by a specific account with cursor pagination. - fn cursor_list_account_documents( - &mut self, - account_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Updates a document with new information and metadata. - fn update_document( - &mut self, - document_id: Uuid, - updates: UpdateDocument, - ) -> impl Future> + Send; - - /// Soft deletes a document by setting the deletion timestamp. - fn delete_document(&mut self, document_id: Uuid) -> impl Future> + Send; - - /// Lists all documents with offset pagination. - fn offset_list_documents( - &mut self, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Searches documents by name or description with optional workspace filtering. - fn search_documents( - &mut self, - search_query: &str, - workspace_id: Option, - pagination: OffsetPagination, - ) -> impl Future>> + Send; -} - -impl DocumentRepository for PgConnection { - async fn create_document(&mut self, new_document: NewDocument) -> PgResult { - use schema::documents; - - let document = diesel::insert_into(documents::table) - .values(&new_document) - .returning(Document::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(document) - } - - async fn find_document_by_id(&mut self, document_id: Uuid) -> PgResult> { - use schema::documents::{self, dsl}; - - let document = documents::table - .filter(dsl::id.eq(document_id)) - .filter(dsl::deleted_at.is_null()) - .select(Document::as_select()) - .first(self) - .await - .optional() - .map_err(PgError::from)?; - - Ok(document) - } - - async fn offset_list_workspace_documents( - &mut self, - workspace_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::documents::{self, dsl}; - - let documents = documents::table - .filter(dsl::workspace_id.eq(workspace_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::updated_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(documents) - } - - async fn cursor_list_workspace_documents( - &mut self, - workspace_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use diesel::dsl::count_star; - use schema::documents::{self, dsl}; - - let base_filter = dsl::workspace_id - .eq(workspace_id) - .and(dsl::deleted_at.is_null()); - - let total = if pagination.include_count { - Some( - documents::table - .filter(base_filter) - .select(count_star()) - .get_result(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let items = if let Some(cursor) = &pagination.after { - let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - documents::table - .filter(base_filter) - .filter( - dsl::updated_at - .lt(cursor_ts) - .or(dsl::updated_at.eq(cursor_ts).and(dsl::id.lt(cursor.id))), - ) - .order((dsl::updated_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)? - } else { - documents::table - .filter(base_filter) - .order((dsl::updated_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new(items, total, pagination.limit, |d| { - (d.updated_at.into(), d.id) - })) - } - - async fn offset_list_account_documents( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::documents::{self, dsl}; - - let documents = documents::table - .filter(dsl::account_id.eq(account_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::updated_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(documents) - } - - async fn cursor_list_account_documents( - &mut self, - account_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use diesel::dsl::count_star; - use schema::documents::{self, dsl}; - - let base_filter = dsl::account_id - .eq(account_id) - .and(dsl::deleted_at.is_null()); - - let total = if pagination.include_count { - Some( - documents::table - .filter(base_filter) - .select(count_star()) - .get_result(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let items = if let Some(cursor) = &pagination.after { - let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - documents::table - .filter(base_filter) - .filter( - dsl::updated_at - .lt(cursor_ts) - .or(dsl::updated_at.eq(cursor_ts).and(dsl::id.lt(cursor.id))), - ) - .order((dsl::updated_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)? - } else { - documents::table - .filter(base_filter) - .order((dsl::updated_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new(items, total, pagination.limit, |d| { - (d.updated_at.into(), d.id) - })) - } - - async fn update_document( - &mut self, - document_id: Uuid, - updates: UpdateDocument, - ) -> PgResult { - use schema::documents::{self, dsl}; - - let document = diesel::update(documents::table.filter(dsl::id.eq(document_id))) - .set(&updates) - .returning(Document::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(document) - } - - async fn delete_document(&mut self, document_id: Uuid) -> PgResult<()> { - use diesel::dsl::now; - use schema::documents::{self, dsl}; - - diesel::update(documents::table.filter(dsl::id.eq(document_id))) - .set(dsl::deleted_at.eq(now)) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(()) - } - - async fn offset_list_documents( - &mut self, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::documents::{self, dsl}; - - let documents = documents::table - .filter(dsl::deleted_at.is_null()) - .order(dsl::updated_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(Document::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(documents) - } - - async fn search_documents( - &mut self, - search_query: &str, - workspace_id: Option, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::documents::{self, dsl}; - - let mut query = documents::table - .filter(dsl::deleted_at.is_null()) - .filter(dsl::display_name.trgm_similar_to(search_query)) - .order(dsl::display_name.asc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(Document::as_select()) - .into_boxed(); - - if let Some(ws_id) = workspace_id { - query = query.filter(dsl::workspace_id.eq(ws_id)); - } - - let documents = query.load(self).await.map_err(PgError::from)?; - Ok(documents) - } -} diff --git a/crates/nvisy-postgres/src/query/document_chunk.rs b/crates/nvisy-postgres/src/query/document_chunk.rs deleted file mode 100644 index b9197d0..0000000 --- a/crates/nvisy-postgres/src/query/document_chunk.rs +++ /dev/null @@ -1,380 +0,0 @@ -//! Document chunks repository for managing document text segments and embeddings. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use pgvector::Vector; -use uuid::Uuid; - -use crate::model::{DocumentChunk, NewDocumentChunk, ScoredDocumentChunk, UpdateDocumentChunk}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for document chunk database operations. -/// -/// Handles chunk lifecycle management including creation, embedding updates, -/// and semantic similarity search via pgvector. -pub trait DocumentChunkRepository { - /// Creates multiple document chunks in a single transaction. - fn create_document_chunks( - &mut self, - new_chunks: Vec, - ) -> impl Future>> + Send; - - /// Updates a chunk with new data. - fn update_document_chunk( - &mut self, - chunk_id: Uuid, - updates: UpdateDocumentChunk, - ) -> impl Future> + Send; - - /// Deletes all chunks for a file. - fn delete_document_file_chunks( - &mut self, - file_id: Uuid, - ) -> impl Future> + Send; - - /// Deletes all chunks for all files of a document. - fn delete_document_chunks( - &mut self, - document_id: Uuid, - ) -> impl Future> + Send; - - /// Lists all chunks for a specific file ordered by chunk index. - fn list_document_file_chunks( - &mut self, - file_id: Uuid, - ) -> impl Future>> + Send; - - /// Searches for similar chunks using cosine similarity. - /// - /// Returns chunks ordered by similarity (most similar first). - fn search_similar_document_chunks( - &mut self, - query_embedding: Vector, - limit: i64, - ) -> impl Future>> + Send; - - /// Searches for similar chunks within specific files. - fn search_similar_document_chunks_in_files( - &mut self, - query_embedding: Vector, - file_ids: &[Uuid], - limit: i64, - ) -> impl Future>> + Send; - - /// Searches for similar chunks within all files of specific documents. - fn search_similar_document_chunks_in_documents( - &mut self, - query_embedding: Vector, - document_ids: &[Uuid], - limit: i64, - ) -> impl Future>> + Send; - - /// Searches for similar chunks within specific files with score filtering. - /// - /// Returns chunks with similarity score >= min_score, ordered by similarity. - fn search_scored_chunks_in_files( - &mut self, - query_embedding: Vector, - file_ids: &[Uuid], - min_score: f64, - limit: i64, - ) -> impl Future>> + Send; - - /// Searches for similar chunks within all files of specific documents with score filtering. - /// - /// Returns chunks with similarity score >= min_score, ordered by similarity. - fn search_scored_chunks_in_documents( - &mut self, - query_embedding: Vector, - document_ids: &[Uuid], - min_score: f64, - limit: i64, - ) -> impl Future>> + Send; - - /// Gets the total chunk count for a file. - fn count_document_file_chunks( - &mut self, - file_id: Uuid, - ) -> impl Future> + Send; -} - -impl DocumentChunkRepository for PgConnection { - async fn create_document_chunks( - &mut self, - new_chunks: Vec, - ) -> PgResult> { - use schema::document_chunks; - - if new_chunks.is_empty() { - return Ok(vec![]); - } - - let chunks = diesel::insert_into(document_chunks::table) - .values(&new_chunks) - .returning(DocumentChunk::as_returning()) - .get_results(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn update_document_chunk( - &mut self, - chunk_id: Uuid, - updates: UpdateDocumentChunk, - ) -> PgResult { - use schema::document_chunks::{self, dsl}; - - let chunk = diesel::update(document_chunks::table.filter(dsl::id.eq(chunk_id))) - .set(&updates) - .returning(DocumentChunk::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(chunk) - } - - async fn delete_document_file_chunks(&mut self, file_id: Uuid) -> PgResult { - use schema::document_chunks::{self, dsl}; - - let affected = diesel::delete(document_chunks::table.filter(dsl::file_id.eq(file_id))) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(affected) - } - - async fn delete_document_chunks(&mut self, document_id: Uuid) -> PgResult { - use schema::document_chunks::{self, dsl}; - use schema::document_files; - - // Get all file IDs for this document - let file_ids: Vec = document_files::table - .filter(document_files::document_id.eq(document_id)) - .select(document_files::id) - .load(self) - .await - .map_err(PgError::from)?; - - if file_ids.is_empty() { - return Ok(0); - } - - // Delete all chunks for those files - let affected = diesel::delete(document_chunks::table.filter(dsl::file_id.eq_any(file_ids))) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(affected) - } - - async fn list_document_file_chunks(&mut self, file_id: Uuid) -> PgResult> { - use schema::document_chunks::{self, dsl}; - - let chunks = document_chunks::table - .filter(dsl::file_id.eq(file_id)) - .order(dsl::chunk_index.asc()) - .select(DocumentChunk::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn search_similar_document_chunks( - &mut self, - query_embedding: Vector, - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - - let chunks = document_chunks::table - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(DocumentChunk::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn search_similar_document_chunks_in_files( - &mut self, - query_embedding: Vector, - file_ids: &[Uuid], - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - - if file_ids.is_empty() { - return Ok(vec![]); - } - - let chunks = document_chunks::table - .filter(dsl::file_id.eq_any(file_ids)) - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(DocumentChunk::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn search_similar_document_chunks_in_documents( - &mut self, - query_embedding: Vector, - document_ids: &[Uuid], - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - use schema::document_files; - - if document_ids.is_empty() { - return Ok(vec![]); - } - - // Get all file IDs for the given documents - let file_ids: Vec = document_files::table - .filter(document_files::document_id.eq_any(document_ids)) - .select(document_files::id) - .load(self) - .await - .map_err(PgError::from)?; - - if file_ids.is_empty() { - return Ok(vec![]); - } - - let chunks = document_chunks::table - .filter(dsl::file_id.eq_any(file_ids)) - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(DocumentChunk::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks) - } - - async fn search_scored_chunks_in_files( - &mut self, - query_embedding: Vector, - file_ids: &[Uuid], - min_score: f64, - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - - if file_ids.is_empty() { - return Ok(vec![]); - } - - // Cosine distance ranges from 0 (identical) to 2 (opposite) - // Score = 1 - distance, so min_score threshold means max_distance = 1 - min_score - let max_distance = 1.0 - min_score; - - let chunks: Vec<(DocumentChunk, f64)> = document_chunks::table - .filter(dsl::file_id.eq_any(file_ids)) - .filter( - dsl::embedding - .cosine_distance(&query_embedding) - .le(max_distance), - ) - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(( - DocumentChunk::as_select(), - (1.0.into_sql::() - - dsl::embedding.cosine_distance(&query_embedding)), - )) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks - .into_iter() - .map(|(chunk, score)| ScoredDocumentChunk { chunk, score }) - .collect()) - } - - async fn search_scored_chunks_in_documents( - &mut self, - query_embedding: Vector, - document_ids: &[Uuid], - min_score: f64, - limit: i64, - ) -> PgResult> { - use pgvector::VectorExpressionMethods; - use schema::document_chunks::{self, dsl}; - use schema::document_files; - - if document_ids.is_empty() { - return Ok(vec![]); - } - - // Get all file IDs for the given documents - let file_ids: Vec = document_files::table - .filter(document_files::document_id.eq_any(document_ids)) - .select(document_files::id) - .load(self) - .await - .map_err(PgError::from)?; - - if file_ids.is_empty() { - return Ok(vec![]); - } - - let max_distance = 1.0 - min_score; - - let chunks: Vec<(DocumentChunk, f64)> = document_chunks::table - .filter(dsl::file_id.eq_any(file_ids)) - .filter( - dsl::embedding - .cosine_distance(&query_embedding) - .le(max_distance), - ) - .order(dsl::embedding.cosine_distance(&query_embedding)) - .limit(limit) - .select(( - DocumentChunk::as_select(), - (1.0.into_sql::() - - dsl::embedding.cosine_distance(&query_embedding)), - )) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(chunks - .into_iter() - .map(|(chunk, score)| ScoredDocumentChunk { chunk, score }) - .collect()) - } - - async fn count_document_file_chunks(&mut self, file_id: Uuid) -> PgResult { - use schema::document_chunks::{self, dsl}; - - let count: i64 = document_chunks::table - .filter(dsl::file_id.eq(file_id)) - .count() - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(count) - } -} diff --git a/crates/nvisy-postgres/src/query/document_comment.rs b/crates/nvisy-postgres/src/query/document_comment.rs deleted file mode 100644 index 880ba18..0000000 --- a/crates/nvisy-postgres/src/query/document_comment.rs +++ /dev/null @@ -1,316 +0,0 @@ -//! Document comments repository for managing collaborative commenting operations. - -use std::future::Future; - -use diesel::prelude::*; -use diesel_async::RunQueryDsl; -use uuid::Uuid; - -use crate::model::{DocumentComment, NewDocumentComment, UpdateDocumentComment}; -use crate::types::{CursorPage, CursorPagination, OffsetPagination}; -use crate::{PgConnection, PgError, PgResult, schema}; - -/// Repository for document comment database operations. -/// -/// Handles comment lifecycle management including creation, threading, replies, -/// and mention tracking. -pub trait DocumentCommentRepository { - /// Creates a new document comment. - fn create_document_comment( - &mut self, - new_comment: NewDocumentComment, - ) -> impl Future> + Send; - - /// Finds a document comment by its unique identifier. - fn find_document_comment_by_id( - &mut self, - comment_id: Uuid, - ) -> impl Future>> + Send; - - /// Lists document comments for a file with offset pagination. - fn offset_list_file_document_comments( - &mut self, - file_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists document comments for a file with cursor pagination. - fn cursor_list_file_document_comments( - &mut self, - file_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists document comments created by an account with offset pagination. - fn offset_list_account_document_comments( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Lists document comments created by an account with cursor pagination. - fn cursor_list_account_document_comments( - &mut self, - account_id: Uuid, - pagination: CursorPagination, - ) -> impl Future>> + Send; - - /// Lists document comments mentioning an account with offset pagination. - fn offset_list_document_comments_mentioning_account( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; - - /// Updates a document comment. - fn update_document_comment( - &mut self, - comment_id: Uuid, - updates: UpdateDocumentComment, - ) -> impl Future> + Send; - - /// Soft deletes a document comment. - fn delete_document_comment( - &mut self, - comment_id: Uuid, - ) -> impl Future> + Send; -} - -impl DocumentCommentRepository for PgConnection { - async fn create_document_comment( - &mut self, - new_comment: NewDocumentComment, - ) -> PgResult { - use schema::document_comments; - - let comment = diesel::insert_into(document_comments::table) - .values(&new_comment) - .returning(DocumentComment::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(comment) - } - - async fn find_document_comment_by_id( - &mut self, - comment_id: Uuid, - ) -> PgResult> { - use schema::document_comments::{self, dsl}; - - let comment = document_comments::table - .filter(dsl::id.eq(comment_id)) - .filter(dsl::deleted_at.is_null()) - .select(DocumentComment::as_select()) - .first(self) - .await - .optional() - .map_err(PgError::from)?; - - Ok(comment) - } - - async fn offset_list_file_document_comments( - &mut self, - file_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_comments::{self, dsl}; - - let comments = document_comments::table - .filter(dsl::file_id.eq(file_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(comments) - } - - async fn cursor_list_file_document_comments( - &mut self, - file_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use diesel::dsl::count_star; - use schema::document_comments::{self, dsl}; - - let base_filter = dsl::file_id.eq(file_id).and(dsl::deleted_at.is_null()); - - let total = if pagination.include_count { - Some( - document_comments::table - .filter(base_filter) - .select(count_star()) - .get_result(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let items = if let Some(cursor) = &pagination.after { - let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - document_comments::table - .filter(base_filter) - .filter( - dsl::created_at - .lt(cursor_ts) - .or(dsl::created_at.eq(cursor_ts).and(dsl::id.lt(cursor.id))), - ) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)? - } else { - document_comments::table - .filter(base_filter) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new(items, total, pagination.limit, |c| { - (c.created_at.into(), c.id) - })) - } - - async fn offset_list_account_document_comments( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_comments::{self, dsl}; - - let comments = document_comments::table - .filter(dsl::account_id.eq(account_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(comments) - } - - async fn cursor_list_account_document_comments( - &mut self, - account_id: Uuid, - pagination: CursorPagination, - ) -> PgResult> { - use diesel::dsl::count_star; - use schema::document_comments::{self, dsl}; - - let base_filter = dsl::account_id - .eq(account_id) - .and(dsl::deleted_at.is_null()); - - let total = if pagination.include_count { - Some( - document_comments::table - .filter(base_filter) - .select(count_star()) - .get_result(self) - .await - .map_err(PgError::from)?, - ) - } else { - None - }; - - let items = if let Some(cursor) = &pagination.after { - let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - document_comments::table - .filter(base_filter) - .filter( - dsl::created_at - .lt(cursor_ts) - .or(dsl::created_at.eq(cursor_ts).and(dsl::id.lt(cursor.id))), - ) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)? - } else { - document_comments::table - .filter(base_filter) - .order((dsl::created_at.desc(), dsl::id.desc())) - .limit(pagination.fetch_limit()) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)? - }; - - Ok(CursorPage::new(items, total, pagination.limit, |c| { - (c.created_at.into(), c.id) - })) - } - - async fn offset_list_document_comments_mentioning_account( - &mut self, - account_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_comments::{self, dsl}; - - let comments = document_comments::table - .filter(dsl::reply_to_account_id.eq(account_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentComment::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(comments) - } - - async fn update_document_comment( - &mut self, - comment_id: Uuid, - updates: UpdateDocumentComment, - ) -> PgResult { - use schema::document_comments::{self, dsl}; - - let comment = diesel::update(document_comments::table.filter(dsl::id.eq(comment_id))) - .set(&updates) - .returning(DocumentComment::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; - - Ok(comment) - } - - async fn delete_document_comment(&mut self, comment_id: Uuid) -> PgResult<()> { - use diesel::dsl::now; - use schema::document_comments::{self, dsl}; - - diesel::update(document_comments::table.filter(dsl::id.eq(comment_id))) - .set(dsl::deleted_at.eq(now)) - .execute(self) - .await - .map_err(PgError::from)?; - - Ok(()) - } -} diff --git a/crates/nvisy-postgres/src/query/document_file.rs b/crates/nvisy-postgres/src/query/file.rs similarity index 64% rename from crates/nvisy-postgres/src/query/document_file.rs rename to crates/nvisy-postgres/src/query/file.rs index 0b92498..7758958 100644 --- a/crates/nvisy-postgres/src/query/document_file.rs +++ b/crates/nvisy-postgres/src/query/file.rs @@ -1,4 +1,4 @@ -//! Document files repository for managing uploaded document files. +//! Files repository for managing uploaded files. use std::future::Future; @@ -8,29 +8,26 @@ use diesel_async::RunQueryDsl; use pgtrgm::expression_methods::TrgmExpressionMethods; use uuid::Uuid; -use crate::model::{DocumentFile, NewDocumentFile, UpdateDocumentFile}; +use crate::model::{File, NewFile, UpdateFile}; use crate::types::{ CursorPage, CursorPagination, FileFilter, FileSortBy, FileSortField, OffsetPagination, - ProcessingStatus, SortOrder, + SortOrder, }; use crate::{PgConnection, PgError, PgResult, schema}; -/// Repository for document file database operations. +/// Repository for file database operations. /// -/// Handles file lifecycle management including upload tracking, processing -/// status updates, virus scanning, storage management, and cleanup operations. -pub trait DocumentFileRepository { - /// Creates a new document file record. - fn create_document_file( - &mut self, - new_file: NewDocumentFile, - ) -> impl Future> + Send; +/// Handles file lifecycle management including upload tracking, +/// storage management, and cleanup operations. +pub trait FileRepository { + /// Creates a new file record. + fn create_file(&mut self, new_file: NewFile) -> impl Future> + Send; /// Finds a file by its unique identifier. - fn find_document_file_by_id( + fn find_file_by_id( &mut self, file_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Finds a file by ID within a specific workspace. /// @@ -39,36 +36,29 @@ pub trait DocumentFileRepository { &mut self, workspace_id: Uuid, file_id: Uuid, - ) -> impl Future>> + Send; - - /// Lists all files associated with a document with offset pagination. - fn offset_list_document_files( - &mut self, - document_id: Uuid, - pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists all files uploaded by a specific account with offset pagination. fn offset_list_account_files( &mut self, account_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Updates a file with new metadata or settings. - fn update_document_file( + fn update_file( &mut self, file_id: Uuid, - updates: UpdateDocumentFile, - ) -> impl Future> + Send; + updates: UpdateFile, + ) -> impl Future> + Send; /// Soft deletes a file by setting the deletion timestamp. - fn delete_document_file(&mut self, file_id: Uuid) -> impl Future> + Send; + fn delete_file(&mut self, file_id: Uuid) -> impl Future> + Send; /// Soft deletes multiple files in a workspace by setting deletion timestamps. /// /// Returns the number of files deleted. - fn delete_document_files( + fn delete_files( &mut self, workspace_id: Uuid, file_ids: &[Uuid], @@ -83,7 +73,7 @@ pub trait DocumentFileRepository { pagination: OffsetPagination, sort_by: FileSortBy, filter: FileFilter, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists all files in a workspace with cursor pagination and optional filtering. fn cursor_list_workspace_files( @@ -91,20 +81,13 @@ pub trait DocumentFileRepository { workspace_id: Uuid, pagination: CursorPagination, filter: FileFilter, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Finds files with a matching SHA-256 hash. fn find_files_by_hash( &mut self, file_hash: &[u8], - ) -> impl Future>> + Send; - - /// Finds files with a specific processing status. - fn find_files_by_status( - &mut self, - status: ProcessingStatus, - pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Calculates total storage usage for an account. fn get_account_storage_usage( @@ -113,10 +96,10 @@ pub trait DocumentFileRepository { ) -> impl Future> + Send; /// Finds multiple files by their IDs. - fn find_document_files_by_ids( + fn find_files_by_ids( &mut self, file_ids: &[Uuid], - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Lists all versions of a file (the file itself and all files that have it as parent). /// @@ -124,7 +107,7 @@ pub trait DocumentFileRepository { fn list_file_versions( &mut self, file_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Finds the latest version of a file by traversing the version chain. /// @@ -133,7 +116,7 @@ pub trait DocumentFileRepository { fn find_latest_version( &mut self, file_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; /// Gets the next version number for creating a new version of a file. fn get_next_version_number( @@ -142,13 +125,13 @@ pub trait DocumentFileRepository { ) -> impl Future> + Send; } -impl DocumentFileRepository for PgConnection { - async fn create_document_file(&mut self, new_file: NewDocumentFile) -> PgResult { - use schema::document_files; +impl FileRepository for PgConnection { + async fn create_file(&mut self, new_file: NewFile) -> PgResult { + use schema::files; - let file = diesel::insert_into(document_files::table) + let file = diesel::insert_into(files::table) .values(&new_file) - .returning(DocumentFile::as_returning()) + .returning(File::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -156,13 +139,13 @@ impl DocumentFileRepository for PgConnection { Ok(file) } - async fn find_document_file_by_id(&mut self, file_id: Uuid) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn find_file_by_id(&mut self, file_id: Uuid) -> PgResult> { + use schema::files::{self, dsl}; - let file = document_files::table + let file = files::table .filter(dsl::id.eq(file_id)) .filter(dsl::deleted_at.is_null()) - .select(DocumentFile::as_select()) + .select(File::as_select()) .first(self) .await .optional() @@ -175,14 +158,14 @@ impl DocumentFileRepository for PgConnection { &mut self, workspace_id: Uuid, file_id: Uuid, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + ) -> PgResult> { + use schema::files::{self, dsl}; - let file = document_files::table + let file = files::table .filter(dsl::id.eq(file_id)) .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()) - .select(DocumentFile::as_select()) + .select(File::as_select()) .first(self) .await .optional() @@ -191,41 +174,20 @@ impl DocumentFileRepository for PgConnection { Ok(file) } - async fn offset_list_document_files( - &mut self, - document_id: Uuid, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_files::{self, dsl}; - - let files = document_files::table - .filter(dsl::document_id.eq(document_id)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentFile::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(files) - } - async fn offset_list_account_files( &mut self, account_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + ) -> PgResult> { + use schema::files::{self, dsl}; - let files = document_files::table + let files = files::table .filter(dsl::account_id.eq(account_id)) .filter(dsl::deleted_at.is_null()) .order(dsl::created_at.desc()) .limit(pagination.limit) .offset(pagination.offset) - .select(DocumentFile::as_select()) + .select(File::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -233,16 +195,12 @@ impl DocumentFileRepository for PgConnection { Ok(files) } - async fn update_document_file( - &mut self, - file_id: Uuid, - updates: UpdateDocumentFile, - ) -> PgResult { - use schema::document_files::{self, dsl}; + async fn update_file(&mut self, file_id: Uuid, updates: UpdateFile) -> PgResult { + use schema::files::{self, dsl}; - let file = diesel::update(document_files::table.filter(dsl::id.eq(file_id))) + let file = diesel::update(files::table.filter(dsl::id.eq(file_id))) .set(&updates) - .returning(DocumentFile::as_returning()) + .returning(File::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -250,11 +208,11 @@ impl DocumentFileRepository for PgConnection { Ok(file) } - async fn delete_document_file(&mut self, file_id: Uuid) -> PgResult<()> { + async fn delete_file(&mut self, file_id: Uuid) -> PgResult<()> { use diesel::dsl::now; - use schema::document_files::{self, dsl}; + use schema::files::{self, dsl}; - diesel::update(document_files::table.filter(dsl::id.eq(file_id))) + diesel::update(files::table.filter(dsl::id.eq(file_id))) .set(dsl::deleted_at.eq(now)) .execute(self) .await @@ -263,16 +221,12 @@ impl DocumentFileRepository for PgConnection { Ok(()) } - async fn delete_document_files( - &mut self, - workspace_id: Uuid, - file_ids: &[Uuid], - ) -> PgResult { + async fn delete_files(&mut self, workspace_id: Uuid, file_ids: &[Uuid]) -> PgResult { use diesel::dsl::now; - use schema::document_files::{self, dsl}; + use schema::files::{self, dsl}; let count = diesel::update( - document_files::table + files::table .filter(dsl::id.eq_any(file_ids)) .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()), @@ -291,11 +245,11 @@ impl DocumentFileRepository for PgConnection { pagination: OffsetPagination, sort_by: FileSortBy, filter: FileFilter, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + ) -> PgResult> { + use schema::files::{self, dsl}; // Build base query - let mut query = document_files::table + let mut query = files::table .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()) .into_boxed(); @@ -318,7 +272,7 @@ impl DocumentFileRepository for PgConnection { }; let files = query - .select(DocumentFile::as_select()) + .select(File::as_select()) .limit(pagination.limit) .offset(pagination.offset) .load(self) @@ -333,15 +287,15 @@ impl DocumentFileRepository for PgConnection { workspace_id: Uuid, pagination: CursorPagination, filter: FileFilter, - ) -> PgResult> { - use schema::document_files::{self, dsl}; + ) -> PgResult> { + use schema::files::{self, dsl}; // Precompute filter values let search_term = filter.search_term().map(|s| s.to_string()); let extensions: Vec = filter.extensions().iter().map(|s| s.to_string()).collect(); // Build base query with filters - let mut base_query = document_files::table + let mut base_query = files::table .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()) .into_boxed(); @@ -369,7 +323,7 @@ impl DocumentFileRepository for PgConnection { }; // Rebuild query for fetching items (can't reuse boxed query after count) - let mut query = document_files::table + let mut query = files::table .filter(dsl::workspace_id.eq(workspace_id)) .filter(dsl::deleted_at.is_null()) .into_boxed(); @@ -387,7 +341,7 @@ impl DocumentFileRepository for PgConnection { let limit = pagination.limit + 1; // Apply cursor filter if present - let items: Vec = if let Some(cursor) = &pagination.after { + let items: Vec = if let Some(cursor) = &pagination.after { let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); query @@ -396,7 +350,7 @@ impl DocumentFileRepository for PgConnection { .lt(&cursor_time) .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), ) - .select(DocumentFile::as_select()) + .select(File::as_select()) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(limit) .load(self) @@ -404,7 +358,7 @@ impl DocumentFileRepository for PgConnection { .map_err(PgError::from)? } else { query - .select(DocumentFile::as_select()) + .select(File::as_select()) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(limit) .load(self) @@ -416,38 +370,17 @@ impl DocumentFileRepository for PgConnection { items, total, pagination.limit, - |f: &DocumentFile| (f.created_at.into(), f.id), + |f: &File| (f.created_at.into(), f.id), )) } - async fn find_files_by_hash(&mut self, file_hash: &[u8]) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn find_files_by_hash(&mut self, file_hash: &[u8]) -> PgResult> { + use schema::files::{self, dsl}; - let files = document_files::table + let files = files::table .filter(dsl::file_hash_sha256.eq(file_hash)) .filter(dsl::deleted_at.is_null()) - .select(DocumentFile::as_select()) - .load(self) - .await - .map_err(PgError::from)?; - - Ok(files) - } - - async fn find_files_by_status( - &mut self, - status: ProcessingStatus, - pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_files::{self, dsl}; - - let files = document_files::table - .filter(dsl::processing_status.eq(status)) - .filter(dsl::deleted_at.is_null()) - .order(dsl::created_at.desc()) - .limit(pagination.limit) - .offset(pagination.offset) - .select(DocumentFile::as_select()) + .select(File::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -456,9 +389,9 @@ impl DocumentFileRepository for PgConnection { } async fn get_account_storage_usage(&mut self, account_id: Uuid) -> PgResult { - use schema::document_files::{self, dsl}; + use schema::files::{self, dsl}; - let usage: Option = document_files::table + let usage: Option = files::table .filter(dsl::account_id.eq(account_id)) .filter(dsl::deleted_at.is_null()) .select(diesel::dsl::sum(dsl::file_size_bytes)) @@ -469,16 +402,13 @@ impl DocumentFileRepository for PgConnection { Ok(usage.unwrap_or_else(|| BigDecimal::from(0))) } - async fn find_document_files_by_ids( - &mut self, - file_ids: &[Uuid], - ) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn find_files_by_ids(&mut self, file_ids: &[Uuid]) -> PgResult> { + use schema::files::{self, dsl}; - let files = document_files::table + let files = files::table .filter(dsl::id.eq_any(file_ids)) .filter(dsl::deleted_at.is_null()) - .select(DocumentFile::as_select()) + .select(File::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -486,16 +416,16 @@ impl DocumentFileRepository for PgConnection { Ok(files) } - async fn list_file_versions(&mut self, file_id: Uuid) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn list_file_versions(&mut self, file_id: Uuid) -> PgResult> { + use schema::files::{self, dsl}; // Get the original file and all files that have it (or its descendants) as parent // This query gets the file itself plus all files where parent_id = file_id - let files = document_files::table + let files = files::table .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) .filter(dsl::deleted_at.is_null()) .order(dsl::version_number.desc()) - .select(DocumentFile::as_select()) + .select(File::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -503,16 +433,16 @@ impl DocumentFileRepository for PgConnection { Ok(files) } - async fn find_latest_version(&mut self, file_id: Uuid) -> PgResult> { - use schema::document_files::{self, dsl}; + async fn find_latest_version(&mut self, file_id: Uuid) -> PgResult> { + use schema::files::{self, dsl}; // Find the file with highest version_number that has file_id as parent, // or the file itself if no newer versions exist - let latest = document_files::table + let latest = files::table .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) .filter(dsl::deleted_at.is_null()) .order(dsl::version_number.desc()) - .select(DocumentFile::as_select()) + .select(File::as_select()) .first(self) .await .optional() @@ -523,10 +453,10 @@ impl DocumentFileRepository for PgConnection { async fn get_next_version_number(&mut self, file_id: Uuid) -> PgResult { use diesel::dsl::max; - use schema::document_files::{self, dsl}; + use schema::files::{self, dsl}; // Get the max version_number from the file and its versions - let max_version: Option = document_files::table + let max_version: Option = files::table .filter(dsl::id.eq(file_id).or(dsl::parent_id.eq(file_id))) .filter(dsl::deleted_at.is_null()) .select(max(dsl::version_number)) diff --git a/crates/nvisy-postgres/src/query/document_annotation.rs b/crates/nvisy-postgres/src/query/file_annotation.rs similarity index 54% rename from crates/nvisy-postgres/src/query/document_annotation.rs rename to crates/nvisy-postgres/src/query/file_annotation.rs index 052c4a7..a5fca6c 100644 --- a/crates/nvisy-postgres/src/query/document_annotation.rs +++ b/crates/nvisy-postgres/src/query/file_annotation.rs @@ -1,4 +1,4 @@ -//! Document annotations repository for managing user annotations on documents. +//! File annotations repository for managing user annotations on files. use std::future::Future; @@ -6,79 +6,79 @@ use diesel::prelude::*; use diesel_async::RunQueryDsl; use uuid::Uuid; -use crate::model::{DocumentAnnotation, NewDocumentAnnotation, UpdateDocumentAnnotation}; +use crate::model::{FileAnnotation, NewFileAnnotation, UpdateFileAnnotation}; use crate::types::{CursorPage, CursorPagination, OffsetPagination}; use crate::{PgConnection, PgError, PgResult, schema}; -/// Repository for document annotation database operations. +/// Repository for file annotation database operations. /// /// Handles annotation lifecycle management including creation, updates, /// filtering by type, and retrieval across files and accounts. -pub trait DocumentAnnotationRepository { - /// Creates a new document annotation. - fn create_document_annotation( +pub trait FileAnnotationRepository { + /// Creates a new file annotation. + fn create_file_annotation( &mut self, - new_annotation: NewDocumentAnnotation, - ) -> impl Future> + Send; + new_annotation: NewFileAnnotation, + ) -> impl Future> + Send; - /// Finds a document annotation by its unique identifier. - fn find_document_annotation_by_id( + /// Finds a file annotation by its unique identifier. + fn find_file_annotation_by_id( &mut self, annotation_id: Uuid, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Lists document annotations for a file with offset pagination. - fn offset_list_file_document_annotations( + /// Lists file annotations for a file with offset pagination. + fn offset_list_file_annotations( &mut self, file_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Lists document annotations for a file with cursor pagination. - fn cursor_list_file_document_annotations( + /// Lists file annotations for a file with cursor pagination. + fn cursor_list_file_annotations( &mut self, file_id: Uuid, pagination: CursorPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Lists document annotations created by an account with offset pagination. - fn offset_list_account_document_annotations( + /// Lists file annotations created by an account with offset pagination. + fn offset_list_account_file_annotations( &mut self, account_id: Uuid, pagination: OffsetPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Lists document annotations created by an account with cursor pagination. - fn cursor_list_account_document_annotations( + /// Lists file annotations created by an account with cursor pagination. + fn cursor_list_account_file_annotations( &mut self, account_id: Uuid, pagination: CursorPagination, - ) -> impl Future>> + Send; + ) -> impl Future>> + Send; - /// Updates a document annotation. - fn update_document_annotation( + /// Updates a file annotation. + fn update_file_annotation( &mut self, annotation_id: Uuid, - updates: UpdateDocumentAnnotation, - ) -> impl Future> + Send; + updates: UpdateFileAnnotation, + ) -> impl Future> + Send; - /// Soft deletes a document annotation. - fn delete_document_annotation( + /// Soft deletes a file annotation. + fn delete_file_annotation( &mut self, annotation_id: Uuid, ) -> impl Future> + Send; } -impl DocumentAnnotationRepository for PgConnection { - async fn create_document_annotation( +impl FileAnnotationRepository for PgConnection { + async fn create_file_annotation( &mut self, - new_annotation: NewDocumentAnnotation, - ) -> PgResult { - use schema::document_annotations; + new_annotation: NewFileAnnotation, + ) -> PgResult { + use schema::file_annotations; - let annotation = diesel::insert_into(document_annotations::table) + let annotation = diesel::insert_into(file_annotations::table) .values(&new_annotation) - .returning(DocumentAnnotation::as_returning()) + .returning(FileAnnotation::as_returning()) .get_result(self) .await .map_err(PgError::from)?; @@ -86,16 +86,16 @@ impl DocumentAnnotationRepository for PgConnection { Ok(annotation) } - async fn find_document_annotation_by_id( + async fn find_file_annotation_by_id( &mut self, annotation_id: Uuid, - ) -> PgResult> { - use schema::document_annotations::{self, dsl}; + ) -> PgResult> { + use schema::file_annotations::{self, dsl}; - let annotation = document_annotations::table + let annotation = file_annotations::table .filter(dsl::id.eq(annotation_id)) .filter(dsl::deleted_at.is_null()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .first(self) .await .optional() @@ -104,20 +104,20 @@ impl DocumentAnnotationRepository for PgConnection { Ok(annotation) } - async fn offset_list_file_document_annotations( + async fn offset_list_file_annotations( &mut self, file_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_annotations::{self, dsl}; + ) -> PgResult> { + use schema::file_annotations::{self, dsl}; - let annotations = document_annotations::table - .filter(dsl::document_file_id.eq(file_id)) + let annotations = file_annotations::table + .filter(dsl::file_id.eq(file_id)) .filter(dsl::deleted_at.is_null()) .order(dsl::created_at.desc()) .limit(pagination.limit) .offset(pagination.offset) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -125,21 +125,19 @@ impl DocumentAnnotationRepository for PgConnection { Ok(annotations) } - async fn cursor_list_file_document_annotations( + async fn cursor_list_file_annotations( &mut self, file_id: Uuid, pagination: CursorPagination, - ) -> PgResult> { + ) -> PgResult> { use diesel::dsl::count_star; - use schema::document_annotations::{self, dsl}; + use schema::file_annotations::{self, dsl}; - let base_filter = dsl::document_file_id - .eq(file_id) - .and(dsl::deleted_at.is_null()); + let base_filter = dsl::file_id.eq(file_id).and(dsl::deleted_at.is_null()); let total = if pagination.include_count { Some( - document_annotations::table + file_annotations::table .filter(base_filter) .select(count_star()) .get_result(self) @@ -152,7 +150,7 @@ impl DocumentAnnotationRepository for PgConnection { let items = if let Some(cursor) = &pagination.after { let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - document_annotations::table + file_annotations::table .filter(base_filter) .filter( dsl::created_at @@ -161,16 +159,16 @@ impl DocumentAnnotationRepository for PgConnection { ) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(pagination.fetch_limit()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)? } else { - document_annotations::table + file_annotations::table .filter(base_filter) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(pagination.fetch_limit()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)? @@ -181,20 +179,20 @@ impl DocumentAnnotationRepository for PgConnection { })) } - async fn offset_list_account_document_annotations( + async fn offset_list_account_file_annotations( &mut self, account_id: Uuid, pagination: OffsetPagination, - ) -> PgResult> { - use schema::document_annotations::{self, dsl}; + ) -> PgResult> { + use schema::file_annotations::{self, dsl}; - let annotations = document_annotations::table + let annotations = file_annotations::table .filter(dsl::account_id.eq(account_id)) .filter(dsl::deleted_at.is_null()) .order(dsl::created_at.desc()) .limit(pagination.limit) .offset(pagination.offset) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)?; @@ -202,13 +200,13 @@ impl DocumentAnnotationRepository for PgConnection { Ok(annotations) } - async fn cursor_list_account_document_annotations( + async fn cursor_list_account_file_annotations( &mut self, account_id: Uuid, pagination: CursorPagination, - ) -> PgResult> { + ) -> PgResult> { use diesel::dsl::count_star; - use schema::document_annotations::{self, dsl}; + use schema::file_annotations::{self, dsl}; let base_filter = dsl::account_id .eq(account_id) @@ -216,7 +214,7 @@ impl DocumentAnnotationRepository for PgConnection { let total = if pagination.include_count { Some( - document_annotations::table + file_annotations::table .filter(base_filter) .select(count_star()) .get_result(self) @@ -229,7 +227,7 @@ impl DocumentAnnotationRepository for PgConnection { let items = if let Some(cursor) = &pagination.after { let cursor_ts = jiff_diesel::Timestamp::from(cursor.timestamp); - document_annotations::table + file_annotations::table .filter(base_filter) .filter( dsl::created_at @@ -238,16 +236,16 @@ impl DocumentAnnotationRepository for PgConnection { ) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(pagination.fetch_limit()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)? } else { - document_annotations::table + file_annotations::table .filter(base_filter) .order((dsl::created_at.desc(), dsl::id.desc())) .limit(pagination.fetch_limit()) - .select(DocumentAnnotation::as_select()) + .select(FileAnnotation::as_select()) .load(self) .await .map_err(PgError::from)? @@ -258,29 +256,28 @@ impl DocumentAnnotationRepository for PgConnection { })) } - async fn update_document_annotation( + async fn update_file_annotation( &mut self, annotation_id: Uuid, - updates: UpdateDocumentAnnotation, - ) -> PgResult { - use schema::document_annotations::{self, dsl}; - - let annotation = - diesel::update(document_annotations::table.filter(dsl::id.eq(annotation_id))) - .set(&updates) - .returning(DocumentAnnotation::as_returning()) - .get_result(self) - .await - .map_err(PgError::from)?; + updates: UpdateFileAnnotation, + ) -> PgResult { + use schema::file_annotations::{self, dsl}; + + let annotation = diesel::update(file_annotations::table.filter(dsl::id.eq(annotation_id))) + .set(&updates) + .returning(FileAnnotation::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; Ok(annotation) } - async fn delete_document_annotation(&mut self, annotation_id: Uuid) -> PgResult<()> { + async fn delete_file_annotation(&mut self, annotation_id: Uuid) -> PgResult<()> { use diesel::dsl::now; - use schema::document_annotations::{self, dsl}; + use schema::file_annotations::{self, dsl}; - diesel::update(document_annotations::table.filter(dsl::id.eq(annotation_id))) + diesel::update(file_annotations::table.filter(dsl::id.eq(annotation_id))) .set(dsl::deleted_at.eq(now)) .execute(self) .await diff --git a/crates/nvisy-postgres/src/query/file_chunk.rs b/crates/nvisy-postgres/src/query/file_chunk.rs new file mode 100644 index 0000000..edf9797 --- /dev/null +++ b/crates/nvisy-postgres/src/query/file_chunk.rs @@ -0,0 +1,338 @@ +//! File chunks repository for managing text segments and embeddings. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use pgvector::Vector; +use uuid::Uuid; + +use crate::model::{FileChunk, NewFileChunk, ScoredFileChunk, UpdateFileChunk}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for file chunk database operations. +/// +/// Handles chunk lifecycle management including creation, embedding updates, +/// and semantic similarity search via pgvector. +pub trait FileChunkRepository { + /// Creates multiple file chunks in a single transaction. + fn create_file_chunks( + &mut self, + new_chunks: Vec, + ) -> impl Future>> + Send; + + /// Updates a chunk with new data. + fn update_file_chunk( + &mut self, + chunk_id: Uuid, + updates: UpdateFileChunk, + ) -> impl Future> + Send; + + /// Deletes all chunks for a file. + fn delete_file_chunks(&mut self, file_id: Uuid) + -> impl Future> + Send; + + /// Lists all chunks for a specific file ordered by chunk index. + fn list_file_chunks( + &mut self, + file_id: Uuid, + ) -> impl Future>> + Send; + + /// Searches for similar chunks using cosine similarity. + /// + /// Returns chunks ordered by similarity (most similar first). + fn search_similar_chunks( + &mut self, + query_embedding: Vector, + limit: i64, + ) -> impl Future>> + Send; + + /// Searches for similar chunks within specific files. + fn search_similar_chunks_in_files( + &mut self, + query_embedding: Vector, + file_ids: &[Uuid], + limit: i64, + ) -> impl Future>> + Send; + + /// Searches for similar chunks within a workspace. + fn search_similar_chunks_in_workspace( + &mut self, + query_embedding: Vector, + workspace_id: Uuid, + limit: i64, + ) -> impl Future>> + Send; + + /// Searches for similar chunks within specific files with score filtering. + /// + /// Returns chunks with similarity score >= min_score, ordered by similarity. + fn search_scored_chunks_in_files( + &mut self, + query_embedding: Vector, + file_ids: &[Uuid], + min_score: f64, + limit: i64, + ) -> impl Future>> + Send; + + /// Searches for similar chunks within a workspace with score filtering. + /// + /// Returns chunks with similarity score >= min_score, ordered by similarity. + fn search_scored_chunks_in_workspace( + &mut self, + query_embedding: Vector, + workspace_id: Uuid, + min_score: f64, + limit: i64, + ) -> impl Future>> + Send; + + /// Gets the total chunk count for a file. + fn count_file_chunks(&mut self, file_id: Uuid) -> impl Future> + Send; +} + +impl FileChunkRepository for PgConnection { + async fn create_file_chunks( + &mut self, + new_chunks: Vec, + ) -> PgResult> { + use schema::file_chunks; + + if new_chunks.is_empty() { + return Ok(vec![]); + } + + let chunks = diesel::insert_into(file_chunks::table) + .values(&new_chunks) + .returning(FileChunk::as_returning()) + .get_results(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn update_file_chunk( + &mut self, + chunk_id: Uuid, + updates: UpdateFileChunk, + ) -> PgResult { + use schema::file_chunks::{self, dsl}; + + let chunk = diesel::update(file_chunks::table.filter(dsl::id.eq(chunk_id))) + .set(&updates) + .returning(FileChunk::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(chunk) + } + + async fn delete_file_chunks(&mut self, file_id: Uuid) -> PgResult { + use schema::file_chunks::{self, dsl}; + + let affected = diesel::delete(file_chunks::table.filter(dsl::file_id.eq(file_id))) + .execute(self) + .await + .map_err(PgError::from)?; + + Ok(affected) + } + + async fn list_file_chunks(&mut self, file_id: Uuid) -> PgResult> { + use schema::file_chunks::{self, dsl}; + + let chunks = file_chunks::table + .filter(dsl::file_id.eq(file_id)) + .order(dsl::chunk_index.asc()) + .select(FileChunk::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn search_similar_chunks( + &mut self, + query_embedding: Vector, + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + + let chunks = file_chunks::table + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(FileChunk::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn search_similar_chunks_in_files( + &mut self, + query_embedding: Vector, + file_ids: &[Uuid], + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + + if file_ids.is_empty() { + return Ok(vec![]); + } + + let chunks = file_chunks::table + .filter(dsl::file_id.eq_any(file_ids)) + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(FileChunk::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn search_similar_chunks_in_workspace( + &mut self, + query_embedding: Vector, + workspace_id: Uuid, + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + use schema::files; + + // Get all file IDs for the workspace + let file_ids: Vec = files::table + .filter(files::workspace_id.eq(workspace_id)) + .filter(files::deleted_at.is_null()) + .select(files::id) + .load(self) + .await + .map_err(PgError::from)?; + + if file_ids.is_empty() { + return Ok(vec![]); + } + + let chunks = file_chunks::table + .filter(dsl::file_id.eq_any(file_ids)) + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(FileChunk::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks) + } + + async fn search_scored_chunks_in_files( + &mut self, + query_embedding: Vector, + file_ids: &[Uuid], + min_score: f64, + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + + if file_ids.is_empty() { + return Ok(vec![]); + } + + // Cosine distance ranges from 0 (identical) to 2 (opposite) + // Score = 1 - distance, so min_score threshold means max_distance = 1 - min_score + let max_distance = 1.0 - min_score; + + let chunks: Vec<(FileChunk, f64)> = file_chunks::table + .filter(dsl::file_id.eq_any(file_ids)) + .filter( + dsl::embedding + .cosine_distance(&query_embedding) + .le(max_distance), + ) + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(( + FileChunk::as_select(), + (1.0.into_sql::() + - dsl::embedding.cosine_distance(&query_embedding)), + )) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks + .into_iter() + .map(|(chunk, score)| ScoredFileChunk { chunk, score }) + .collect()) + } + + async fn search_scored_chunks_in_workspace( + &mut self, + query_embedding: Vector, + workspace_id: Uuid, + min_score: f64, + limit: i64, + ) -> PgResult> { + use pgvector::VectorExpressionMethods; + use schema::file_chunks::{self, dsl}; + use schema::files; + + // Get all file IDs for the workspace + let file_ids: Vec = files::table + .filter(files::workspace_id.eq(workspace_id)) + .filter(files::deleted_at.is_null()) + .select(files::id) + .load(self) + .await + .map_err(PgError::from)?; + + if file_ids.is_empty() { + return Ok(vec![]); + } + + let max_distance = 1.0 - min_score; + + let chunks: Vec<(FileChunk, f64)> = file_chunks::table + .filter(dsl::file_id.eq_any(file_ids)) + .filter( + dsl::embedding + .cosine_distance(&query_embedding) + .le(max_distance), + ) + .order(dsl::embedding.cosine_distance(&query_embedding)) + .limit(limit) + .select(( + FileChunk::as_select(), + (1.0.into_sql::() + - dsl::embedding.cosine_distance(&query_embedding)), + )) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(chunks + .into_iter() + .map(|(chunk, score)| ScoredFileChunk { chunk, score }) + .collect()) + } + + async fn count_file_chunks(&mut self, file_id: Uuid) -> PgResult { + use schema::file_chunks::{self, dsl}; + + let count: i64 = file_chunks::table + .filter(dsl::file_id.eq(file_id)) + .count() + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(count) + } +} diff --git a/crates/nvisy-postgres/src/query/mod.rs b/crates/nvisy-postgres/src/query/mod.rs index a0830f4..f0e525e 100644 --- a/crates/nvisy-postgres/src/query/mod.rs +++ b/crates/nvisy-postgres/src/query/mod.rs @@ -18,11 +18,12 @@ mod account_action_token; mod account_api_token; mod account_notification; -mod document; -mod document_annotation; -mod document_chunk; -mod document_comment; -mod document_file; +mod file; +mod file_annotation; +mod file_chunk; + +mod pipeline; +mod pipeline_run; mod workspace; mod workspace_activity; @@ -32,22 +33,18 @@ mod workspace_invite; mod workspace_member; mod workspace_webhook; -mod chat_operation; -mod chat_session; -mod chat_tool_call; - pub use account::AccountRepository; pub use account_action_token::AccountActionTokenRepository; pub use account_api_token::AccountApiTokenRepository; pub use account_notification::AccountNotificationRepository; -pub use chat_operation::{ChatOperationRepository, FileOperationCounts}; -pub use chat_session::ChatSessionRepository; -pub use chat_tool_call::ChatToolCallRepository; -pub use document::DocumentRepository; -pub use document_annotation::DocumentAnnotationRepository; -pub use document_chunk::DocumentChunkRepository; -pub use document_comment::DocumentCommentRepository; -pub use document_file::DocumentFileRepository; + +pub use file::FileRepository; +pub use file_annotation::FileAnnotationRepository; +pub use file_chunk::FileChunkRepository; + +pub use pipeline::PipelineRepository; +pub use pipeline_run::PipelineRunRepository; + pub use workspace::WorkspaceRepository; pub use workspace_activity::WorkspaceActivityRepository; pub use workspace_integration::WorkspaceIntegrationRepository; diff --git a/crates/nvisy-postgres/src/query/pipeline.rs b/crates/nvisy-postgres/src/query/pipeline.rs new file mode 100644 index 0000000..c45332e --- /dev/null +++ b/crates/nvisy-postgres/src/query/pipeline.rs @@ -0,0 +1,361 @@ +//! Pipelines repository for managing workflow definitions. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use pgtrgm::expression_methods::TrgmExpressionMethods; +use uuid::Uuid; + +use crate::model::{NewPipeline, Pipeline, UpdatePipeline}; +use crate::types::{CursorPage, CursorPagination, OffsetPagination, PipelineStatus}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for pipeline database operations. +/// +/// Handles pipeline lifecycle management including creation, updates, +/// status transitions, and queries. +pub trait PipelineRepository { + /// Creates a new pipeline record. + fn create_pipeline( + &mut self, + new_pipeline: NewPipeline, + ) -> impl Future> + Send; + + /// Finds a pipeline by its unique identifier. + fn find_pipeline_by_id( + &mut self, + pipeline_id: Uuid, + ) -> impl Future>> + Send; + + /// Finds a pipeline by ID within a specific workspace. + /// + /// Provides workspace-scoped access control at the database level. + fn find_workspace_pipeline( + &mut self, + workspace_id: Uuid, + pipeline_id: Uuid, + ) -> impl Future>> + Send; + + /// Lists all pipelines in a workspace with offset pagination. + fn offset_list_workspace_pipelines( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists all pipelines in a workspace with cursor pagination. + fn cursor_list_workspace_pipelines( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + search_term: Option<&str>, + ) -> impl Future>> + Send; + + /// Lists all pipelines created by an account with offset pagination. + fn offset_list_account_pipelines( + &mut self, + account_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists active pipelines in a workspace. + fn list_active_workspace_pipelines( + &mut self, + workspace_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates a pipeline with new data. + fn update_pipeline( + &mut self, + pipeline_id: Uuid, + updates: UpdatePipeline, + ) -> impl Future> + Send; + + /// Soft deletes a pipeline by setting the deletion timestamp. + fn delete_pipeline(&mut self, pipeline_id: Uuid) -> impl Future> + Send; + + /// Counts pipelines in a workspace by status. + fn count_workspace_pipelines_by_status( + &mut self, + workspace_id: Uuid, + status: PipelineStatus, + ) -> impl Future> + Send; + + /// Searches pipelines by name using trigram similarity. + fn search_pipelines_by_name( + &mut self, + workspace_id: Uuid, + search_term: &str, + limit: i64, + ) -> impl Future>> + Send; +} + +impl PipelineRepository for PgConnection { + async fn create_pipeline(&mut self, new_pipeline: NewPipeline) -> PgResult { + use schema::pipelines; + + let pipeline = diesel::insert_into(pipelines::table) + .values(&new_pipeline) + .returning(Pipeline::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(pipeline) + } + + async fn find_pipeline_by_id(&mut self, pipeline_id: Uuid) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipeline = pipelines::table + .filter(dsl::id.eq(pipeline_id)) + .filter(dsl::deleted_at.is_null()) + .select(Pipeline::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(pipeline) + } + + async fn find_workspace_pipeline( + &mut self, + workspace_id: Uuid, + pipeline_id: Uuid, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipeline = pipelines::table + .filter(dsl::id.eq(pipeline_id)) + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::deleted_at.is_null()) + .select(Pipeline::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(pipeline) + } + + async fn offset_list_workspace_pipelines( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipelines = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::deleted_at.is_null()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .select(Pipeline::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(pipelines) + } + + async fn cursor_list_workspace_pipelines( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + search_term: Option<&str>, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + // Build base query with filters + let mut base_query = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::deleted_at.is_null()) + .into_boxed(); + + // Apply status filter + if let Some(status) = status_filter { + base_query = base_query.filter(dsl::status.eq(status)); + } + + // Apply search filter + if let Some(term) = search_term { + base_query = base_query.filter(dsl::name.trgm_similar_to(term)); + } + + let total = if pagination.include_count { + Some( + base_query + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + // Rebuild query for fetching items + let mut query = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::deleted_at.is_null()) + .into_boxed(); + + if let Some(status) = status_filter { + query = query.filter(dsl::status.eq(status)); + } + + if let Some(term) = search_term { + query = query.filter(dsl::name.trgm_similar_to(term)); + } + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + query + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(Pipeline::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + query + .select(Pipeline::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |p: &Pipeline| (p.created_at.into(), p.id), + )) + } + + async fn offset_list_account_pipelines( + &mut self, + account_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipelines = pipelines::table + .filter(dsl::account_id.eq(account_id)) + .filter(dsl::deleted_at.is_null()) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .select(Pipeline::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(pipelines) + } + + async fn list_active_workspace_pipelines( + &mut self, + workspace_id: Uuid, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipelines = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::status.eq(PipelineStatus::Active)) + .filter(dsl::deleted_at.is_null()) + .order(dsl::name.asc()) + .select(Pipeline::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(pipelines) + } + + async fn update_pipeline( + &mut self, + pipeline_id: Uuid, + updates: UpdatePipeline, + ) -> PgResult { + use schema::pipelines::{self, dsl}; + + let pipeline = diesel::update(pipelines::table.filter(dsl::id.eq(pipeline_id))) + .set(&updates) + .returning(Pipeline::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(pipeline) + } + + async fn delete_pipeline(&mut self, pipeline_id: Uuid) -> PgResult<()> { + use diesel::dsl::now; + use schema::pipelines::{self, dsl}; + + diesel::update(pipelines::table.filter(dsl::id.eq(pipeline_id))) + .set(dsl::deleted_at.eq(now)) + .execute(self) + .await + .map_err(PgError::from)?; + + Ok(()) + } + + async fn count_workspace_pipelines_by_status( + &mut self, + workspace_id: Uuid, + status: PipelineStatus, + ) -> PgResult { + use schema::pipelines::{self, dsl}; + + let count = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::status.eq(status)) + .filter(dsl::deleted_at.is_null()) + .count() + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(count) + } + + async fn search_pipelines_by_name( + &mut self, + workspace_id: Uuid, + search_term: &str, + limit: i64, + ) -> PgResult> { + use schema::pipelines::{self, dsl}; + + let pipelines = pipelines::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter(dsl::name.trgm_similar_to(search_term)) + .filter(dsl::deleted_at.is_null()) + .order(dsl::name.asc()) + .limit(limit) + .select(Pipeline::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(pipelines) + } +} diff --git a/crates/nvisy-postgres/src/query/pipeline_run.rs b/crates/nvisy-postgres/src/query/pipeline_run.rs new file mode 100644 index 0000000..4794df6 --- /dev/null +++ b/crates/nvisy-postgres/src/query/pipeline_run.rs @@ -0,0 +1,536 @@ +//! Pipeline runs repository for managing pipeline execution instances. + +use std::future::Future; + +use diesel::prelude::*; +use diesel_async::RunQueryDsl; +use uuid::Uuid; + +use crate::model::{NewPipelineRun, PipelineRun, UpdatePipelineRun}; +use crate::types::{CursorPage, CursorPagination, OffsetPagination, PipelineRunStatus}; +use crate::{PgConnection, PgError, PgResult, schema}; + +/// Repository for pipeline run database operations. +/// +/// Handles pipeline run lifecycle management including creation, status updates, +/// completion tracking, and queries. +pub trait PipelineRunRepository { + /// Creates a new pipeline run record. + fn create_pipeline_run( + &mut self, + new_run: NewPipelineRun, + ) -> impl Future> + Send; + + /// Finds a pipeline run by its unique identifier. + fn find_pipeline_run_by_id( + &mut self, + run_id: Uuid, + ) -> impl Future>> + Send; + + /// Finds a pipeline run by ID within a specific workspace. + fn find_workspace_pipeline_run( + &mut self, + workspace_id: Uuid, + run_id: Uuid, + ) -> impl Future>> + Send; + + /// Lists all runs for a specific pipeline with offset pagination. + fn offset_list_pipeline_runs( + &mut self, + pipeline_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists all runs for a specific pipeline with cursor pagination. + fn cursor_list_pipeline_runs( + &mut self, + pipeline_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + ) -> impl Future>> + Send; + + /// Lists all runs in a workspace with offset pagination. + fn offset_list_workspace_runs( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> impl Future>> + Send; + + /// Lists all runs in a workspace with cursor pagination. + fn cursor_list_workspace_runs( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + ) -> impl Future>> + Send; + + /// Lists active runs (queued or running) in a workspace. + fn list_active_workspace_runs( + &mut self, + workspace_id: Uuid, + ) -> impl Future>> + Send; + + /// Lists active runs (queued or running) for a specific pipeline. + fn list_active_pipeline_runs( + &mut self, + pipeline_id: Uuid, + ) -> impl Future>> + Send; + + /// Updates a pipeline run with new data. + fn update_pipeline_run( + &mut self, + run_id: Uuid, + updates: UpdatePipelineRun, + ) -> impl Future> + Send; + + /// Marks a run as started. + fn start_pipeline_run( + &mut self, + run_id: Uuid, + ) -> impl Future> + Send; + + /// Marks a run as completed successfully. + fn complete_pipeline_run( + &mut self, + run_id: Uuid, + output_config: serde_json::Value, + metrics: serde_json::Value, + ) -> impl Future> + Send; + + /// Marks a run as failed with error details. + fn fail_pipeline_run( + &mut self, + run_id: Uuid, + error: serde_json::Value, + metrics: serde_json::Value, + ) -> impl Future> + Send; + + /// Marks a run as cancelled. + fn cancel_pipeline_run( + &mut self, + run_id: Uuid, + ) -> impl Future> + Send; + + /// Counts runs for a pipeline by status. + fn count_pipeline_runs_by_status( + &mut self, + pipeline_id: Uuid, + status: PipelineRunStatus, + ) -> impl Future> + Send; + + /// Gets the most recent run for a pipeline. + fn find_latest_pipeline_run( + &mut self, + pipeline_id: Uuid, + ) -> impl Future>> + Send; +} + +impl PipelineRunRepository for PgConnection { + async fn create_pipeline_run(&mut self, new_run: NewPipelineRun) -> PgResult { + use schema::pipeline_runs; + + let run = diesel::insert_into(pipeline_runs::table) + .values(&new_run) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn find_pipeline_run_by_id(&mut self, run_id: Uuid) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let run = pipeline_runs::table + .filter(dsl::id.eq(run_id)) + .select(PipelineRun::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(run) + } + + async fn find_workspace_pipeline_run( + &mut self, + workspace_id: Uuid, + run_id: Uuid, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let run = pipeline_runs::table + .filter(dsl::id.eq(run_id)) + .filter(dsl::workspace_id.eq(workspace_id)) + .select(PipelineRun::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(run) + } + + async fn offset_list_pipeline_runs( + &mut self, + pipeline_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let runs = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .select(PipelineRun::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(runs) + } + + async fn cursor_list_pipeline_runs( + &mut self, + pipeline_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + // Build base query with filters + let mut base_query = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .into_boxed(); + + if let Some(status) = status_filter { + base_query = base_query.filter(dsl::status.eq(status)); + } + + let total = if pagination.include_count { + Some( + base_query + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + // Rebuild query for fetching items + let mut query = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .into_boxed(); + + if let Some(status) = status_filter { + query = query.filter(dsl::status.eq(status)); + } + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + query + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(PipelineRun::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + query + .select(PipelineRun::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |r: &PipelineRun| (r.created_at.into(), r.id), + )) + } + + async fn offset_list_workspace_runs( + &mut self, + workspace_id: Uuid, + pagination: OffsetPagination, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let runs = pipeline_runs::table + .filter(dsl::workspace_id.eq(workspace_id)) + .order(dsl::created_at.desc()) + .limit(pagination.limit) + .offset(pagination.offset) + .select(PipelineRun::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(runs) + } + + async fn cursor_list_workspace_runs( + &mut self, + workspace_id: Uuid, + pagination: CursorPagination, + status_filter: Option, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + // Build base query with filters + let mut base_query = pipeline_runs::table + .filter(dsl::workspace_id.eq(workspace_id)) + .into_boxed(); + + if let Some(status) = status_filter { + base_query = base_query.filter(dsl::status.eq(status)); + } + + let total = if pagination.include_count { + Some( + base_query + .count() + .get_result::(self) + .await + .map_err(PgError::from)?, + ) + } else { + None + }; + + // Rebuild query for fetching items + let mut query = pipeline_runs::table + .filter(dsl::workspace_id.eq(workspace_id)) + .into_boxed(); + + if let Some(status) = status_filter { + query = query.filter(dsl::status.eq(status)); + } + + let limit = pagination.limit + 1; + + let items: Vec = if let Some(cursor) = &pagination.after { + let cursor_time = jiff_diesel::Timestamp::from(cursor.timestamp); + + query + .filter( + dsl::created_at + .lt(&cursor_time) + .or(dsl::created_at.eq(&cursor_time).and(dsl::id.lt(cursor.id))), + ) + .select(PipelineRun::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + } else { + query + .select(PipelineRun::as_select()) + .order((dsl::created_at.desc(), dsl::id.desc())) + .limit(limit) + .load(self) + .await + .map_err(PgError::from)? + }; + + Ok(CursorPage::new( + items, + total, + pagination.limit, + |r: &PipelineRun| (r.created_at.into(), r.id), + )) + } + + async fn list_active_workspace_runs( + &mut self, + workspace_id: Uuid, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let runs = pipeline_runs::table + .filter(dsl::workspace_id.eq(workspace_id)) + .filter( + dsl::status + .eq(PipelineRunStatus::Queued) + .or(dsl::status.eq(PipelineRunStatus::Running)), + ) + .order(dsl::created_at.desc()) + .select(PipelineRun::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(runs) + } + + async fn list_active_pipeline_runs(&mut self, pipeline_id: Uuid) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let runs = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .filter( + dsl::status + .eq(PipelineRunStatus::Queued) + .or(dsl::status.eq(PipelineRunStatus::Running)), + ) + .order(dsl::created_at.desc()) + .select(PipelineRun::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(runs) + } + + async fn update_pipeline_run( + &mut self, + run_id: Uuid, + updates: UpdatePipelineRun, + ) -> PgResult { + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(&updates) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn start_pipeline_run(&mut self, run_id: Uuid) -> PgResult { + use diesel::dsl::now; + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(( + dsl::status.eq(PipelineRunStatus::Running), + dsl::started_at.eq(now), + )) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn complete_pipeline_run( + &mut self, + run_id: Uuid, + output_config: serde_json::Value, + metrics: serde_json::Value, + ) -> PgResult { + use diesel::dsl::now; + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(( + dsl::status.eq(PipelineRunStatus::Completed), + dsl::output_config.eq(output_config), + dsl::metrics.eq(metrics), + dsl::completed_at.eq(now), + )) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn fail_pipeline_run( + &mut self, + run_id: Uuid, + error: serde_json::Value, + metrics: serde_json::Value, + ) -> PgResult { + use diesel::dsl::now; + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(( + dsl::status.eq(PipelineRunStatus::Failed), + dsl::error.eq(Some(error)), + dsl::metrics.eq(metrics), + dsl::completed_at.eq(now), + )) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn cancel_pipeline_run(&mut self, run_id: Uuid) -> PgResult { + use diesel::dsl::now; + use schema::pipeline_runs::{self, dsl}; + + let run = diesel::update(pipeline_runs::table.filter(dsl::id.eq(run_id))) + .set(( + dsl::status.eq(PipelineRunStatus::Cancelled), + dsl::completed_at.eq(now), + )) + .returning(PipelineRun::as_returning()) + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(run) + } + + async fn count_pipeline_runs_by_status( + &mut self, + pipeline_id: Uuid, + status: PipelineRunStatus, + ) -> PgResult { + use schema::pipeline_runs::{self, dsl}; + + let count = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .filter(dsl::status.eq(status)) + .count() + .get_result(self) + .await + .map_err(PgError::from)?; + + Ok(count) + } + + async fn find_latest_pipeline_run( + &mut self, + pipeline_id: Uuid, + ) -> PgResult> { + use schema::pipeline_runs::{self, dsl}; + + let run = pipeline_runs::table + .filter(dsl::pipeline_id.eq(pipeline_id)) + .order(dsl::created_at.desc()) + .select(PipelineRun::as_select()) + .first(self) + .await + .optional() + .map_err(PgError::from)?; + + Ok(run) + } +} diff --git a/crates/nvisy-postgres/src/schema.rs b/crates/nvisy-postgres/src/schema.rs index ca630c1..75809e8 100644 --- a/crates/nvisy-postgres/src/schema.rs +++ b/crates/nvisy-postgres/src/schema.rs @@ -17,10 +17,6 @@ pub mod sql_types { #[diesel(postgres_type(name = "api_token_type"))] pub struct ApiTokenType; - #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "content_segmentation"))] - pub struct ContentSegmentation; - #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] #[diesel(postgres_type(name = "file_source"))] pub struct FileSource; @@ -42,24 +38,20 @@ pub mod sql_types { pub struct NotificationEvent; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "processing_status"))] - pub struct ProcessingStatus; - - #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "require_mode"))] - pub struct RequireMode; + #[diesel(postgres_type(name = "pipeline_run_status"))] + pub struct PipelineRunStatus; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "run_type"))] - pub struct RunType; + #[diesel(postgres_type(name = "pipeline_status"))] + pub struct PipelineStatus; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "chat_session_status"))] - pub struct ChatSessionStatus; + #[diesel(postgres_type(name = "pipeline_trigger_type"))] + pub struct PipelineTriggerType; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] - #[diesel(postgres_type(name = "chat_tool_status"))] - pub struct ChatToolStatus; + #[diesel(postgres_type(name = "run_type"))] + pub struct RunType; #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] #[diesel(postgres_type(name = "webhook_event"))] @@ -166,9 +158,9 @@ diesel::table! { use pgvector::sql_types::*; use super::sql_types::AnnotationType; - document_annotations (id) { + file_annotations (id) { id -> Uuid, - document_file_id -> Uuid, + file_id -> Uuid, account_id -> Uuid, content -> Text, annotation_type -> AnnotationType, @@ -183,7 +175,7 @@ diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; - document_chunks (id) { + file_chunks (id) { id -> Uuid, file_id -> Uuid, chunk_index -> Int4, @@ -201,47 +193,20 @@ diesel::table! { diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; - - document_comments (id) { - id -> Uuid, - file_id -> Uuid, - account_id -> Uuid, - parent_comment_id -> Nullable, - reply_to_account_id -> Nullable, - content -> Text, - metadata -> Jsonb, - created_at -> Timestamptz, - updated_at -> Timestamptz, - deleted_at -> Nullable, - } -} - -diesel::table! { - use diesel::sql_types::*; - use pgvector::sql_types::*; - use super::sql_types::RequireMode; - use super::sql_types::ProcessingStatus; - use super::sql_types::ContentSegmentation; use super::sql_types::FileSource; - document_files (id) { + files (id) { id -> Uuid, workspace_id -> Uuid, - document_id -> Nullable, account_id -> Uuid, parent_id -> Nullable, version_number -> Int4, display_name -> Text, original_filename -> Text, file_extension -> Text, + mime_type -> Nullable, tags -> Array>, source -> FileSource, - require_mode -> RequireMode, - processing_priority -> Int4, - processing_status -> ProcessingStatus, - is_indexed -> Bool, - content_segmentation -> ContentSegmentation, - visual_support -> Bool, file_size_bytes -> Int8, file_hash_sha256 -> Bytea, storage_path -> Text, @@ -256,75 +221,44 @@ diesel::table! { diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; + use super::sql_types::PipelineTriggerType; + use super::sql_types::PipelineRunStatus; - documents (id) { + pipeline_runs (id) { id -> Uuid, + pipeline_id -> Uuid, workspace_id -> Uuid, account_id -> Uuid, - display_name -> Text, - description -> Nullable, - tags -> Array>, - metadata -> Jsonb, - created_at -> Timestamptz, - updated_at -> Timestamptz, - deleted_at -> Nullable, - } -} - -diesel::table! { - use diesel::sql_types::*; - use pgvector::sql_types::*; - - chat_operations (id) { - id -> Uuid, - tool_call_id -> Uuid, - file_id -> Uuid, - chunk_id -> Nullable, - operation_type -> Text, - operation_diff -> Jsonb, - applied -> Bool, - reverted -> Bool, + trigger_type -> PipelineTriggerType, + status -> PipelineRunStatus, + input_config -> Jsonb, + output_config -> Jsonb, + definition_snapshot -> Jsonb, + error -> Nullable, + metrics -> Jsonb, + started_at -> Nullable, + completed_at -> Nullable, created_at -> Timestamptz, - applied_at -> Nullable, } } diesel::table! { use diesel::sql_types::*; use pgvector::sql_types::*; - use super::sql_types::ChatSessionStatus; + use super::sql_types::PipelineStatus; - chat_sessions (id) { + pipelines (id) { id -> Uuid, workspace_id -> Uuid, account_id -> Uuid, - primary_file_id -> Uuid, - display_name -> Text, - session_status -> ChatSessionStatus, - model_config -> Jsonb, - message_count -> Int4, - token_count -> Int4, + name -> Text, + description -> Nullable, + status -> PipelineStatus, + definition -> Jsonb, + metadata -> Jsonb, created_at -> Timestamptz, updated_at -> Timestamptz, - } -} - -diesel::table! { - use diesel::sql_types::*; - use pgvector::sql_types::*; - use super::sql_types::ChatToolStatus; - - chat_tool_calls (id) { - id -> Uuid, - session_id -> Uuid, - file_id -> Uuid, - chunk_id -> Nullable, - tool_name -> Text, - tool_input -> Jsonb, - tool_output -> Jsonb, - tool_status -> ChatToolStatus, - started_at -> Timestamptz, - completed_at -> Nullable, + deleted_at -> Nullable, } } @@ -481,24 +415,16 @@ diesel::table! { diesel::joinable!(account_action_tokens -> accounts (account_id)); diesel::joinable!(account_api_tokens -> accounts (account_id)); diesel::joinable!(account_notifications -> accounts (account_id)); -diesel::joinable!(document_annotations -> accounts (account_id)); -diesel::joinable!(document_annotations -> document_files (document_file_id)); -diesel::joinable!(document_chunks -> document_files (file_id)); -diesel::joinable!(document_comments -> document_files (file_id)); -diesel::joinable!(document_files -> accounts (account_id)); -diesel::joinable!(document_files -> documents (document_id)); -diesel::joinable!(document_files -> workspaces (workspace_id)); -diesel::joinable!(documents -> accounts (account_id)); -diesel::joinable!(documents -> workspaces (workspace_id)); -diesel::joinable!(chat_operations -> document_chunks (chunk_id)); -diesel::joinable!(chat_operations -> document_files (file_id)); -diesel::joinable!(chat_operations -> chat_tool_calls (tool_call_id)); -diesel::joinable!(chat_sessions -> accounts (account_id)); -diesel::joinable!(chat_sessions -> document_files (primary_file_id)); -diesel::joinable!(chat_sessions -> workspaces (workspace_id)); -diesel::joinable!(chat_tool_calls -> document_chunks (chunk_id)); -diesel::joinable!(chat_tool_calls -> document_files (file_id)); -diesel::joinable!(chat_tool_calls -> chat_sessions (session_id)); +diesel::joinable!(file_annotations -> accounts (account_id)); +diesel::joinable!(file_annotations -> files (file_id)); +diesel::joinable!(file_chunks -> files (file_id)); +diesel::joinable!(files -> accounts (account_id)); +diesel::joinable!(files -> workspaces (workspace_id)); +diesel::joinable!(pipeline_runs -> accounts (account_id)); +diesel::joinable!(pipeline_runs -> pipelines (pipeline_id)); +diesel::joinable!(pipeline_runs -> workspaces (workspace_id)); +diesel::joinable!(pipelines -> accounts (account_id)); +diesel::joinable!(pipelines -> workspaces (workspace_id)); diesel::joinable!(workspace_activities -> accounts (account_id)); diesel::joinable!(workspace_activities -> workspaces (workspace_id)); diesel::joinable!(workspace_integration_runs -> accounts (account_id)); @@ -518,14 +444,11 @@ diesel::allow_tables_to_appear_in_same_query!( account_api_tokens, account_notifications, accounts, - chat_operations, - chat_sessions, - chat_tool_calls, - document_annotations, - document_chunks, - document_comments, - document_files, - documents, + file_annotations, + file_chunks, + files, + pipeline_runs, + pipelines, workspace_activities, workspace_integration_runs, workspace_integrations, diff --git a/crates/nvisy-postgres/src/types/constraint/chat_operations.rs b/crates/nvisy-postgres/src/types/constraint/chat_operations.rs deleted file mode 100644 index 99db5ec..0000000 --- a/crates/nvisy-postgres/src/types/constraint/chat_operations.rs +++ /dev/null @@ -1,61 +0,0 @@ -//! Chat operations table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Chat operations table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum ChatOperationConstraints { - // Operation validation constraints - #[strum(serialize = "chat_operations_operation_type_length")] - OperationTypeLength, - #[strum(serialize = "chat_operations_operation_diff_size")] - OperationDiffSize, - - // Operation business logic constraints - #[strum(serialize = "chat_operations_revert_requires_applied")] - RevertRequiresApplied, - - // Operation chronological constraints - #[strum(serialize = "chat_operations_applied_after_created")] - AppliedAfterCreated, -} - -impl ChatOperationConstraints { - /// Creates a new [`ChatOperationConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - ChatOperationConstraints::OperationTypeLength - | ChatOperationConstraints::OperationDiffSize => ConstraintCategory::Validation, - - ChatOperationConstraints::RevertRequiresApplied => ConstraintCategory::BusinessLogic, - - ChatOperationConstraints::AppliedAfterCreated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: ChatOperationConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for ChatOperationConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/chat_sessions.rs b/crates/nvisy-postgres/src/types/constraint/chat_sessions.rs deleted file mode 100644 index 05dc8b9..0000000 --- a/crates/nvisy-postgres/src/types/constraint/chat_sessions.rs +++ /dev/null @@ -1,61 +0,0 @@ -//! Chat sessions table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Chat sessions table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum ChatSessionConstraints { - // Session validation constraints - #[strum(serialize = "chat_sessions_display_name_length")] - DisplayNameLength, - #[strum(serialize = "chat_sessions_model_config_size")] - ModelConfigSize, - #[strum(serialize = "chat_sessions_message_count_min")] - MessageCountMin, - #[strum(serialize = "chat_sessions_token_count_min")] - TokenCountMin, - - // Session chronological constraints - #[strum(serialize = "chat_sessions_updated_after_created")] - UpdatedAfterCreated, -} - -impl ChatSessionConstraints { - /// Creates a new [`ChatSessionConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - ChatSessionConstraints::DisplayNameLength - | ChatSessionConstraints::ModelConfigSize - | ChatSessionConstraints::MessageCountMin - | ChatSessionConstraints::TokenCountMin => ConstraintCategory::Validation, - - ChatSessionConstraints::UpdatedAfterCreated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: ChatSessionConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for ChatSessionConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/chat_tool_calls.rs b/crates/nvisy-postgres/src/types/constraint/chat_tool_calls.rs deleted file mode 100644 index c836dea..0000000 --- a/crates/nvisy-postgres/src/types/constraint/chat_tool_calls.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! Chat tool calls table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Chat tool calls table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum ChatToolCallConstraints { - // Tool call validation constraints - #[strum(serialize = "chat_tool_calls_tool_name_length")] - ToolNameLength, - #[strum(serialize = "chat_tool_calls_tool_input_size")] - ToolInputSize, - #[strum(serialize = "chat_tool_calls_tool_output_size")] - ToolOutputSize, - - // Tool call chronological constraints - #[strum(serialize = "chat_tool_calls_completed_after_started")] - CompletedAfterStarted, -} - -impl ChatToolCallConstraints { - /// Creates a new [`ChatToolCallConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - ChatToolCallConstraints::ToolNameLength - | ChatToolCallConstraints::ToolInputSize - | ChatToolCallConstraints::ToolOutputSize => ConstraintCategory::Validation, - - ChatToolCallConstraints::CompletedAfterStarted => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: ChatToolCallConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for ChatToolCallConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_annotations.rs b/crates/nvisy-postgres/src/types/constraint/document_annotations.rs deleted file mode 100644 index 2e974fc..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_annotations.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Document annotations table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document annotations table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentAnnotationConstraints { - // Annotation content constraints - #[strum(serialize = "document_annotations_content_length")] - ContentLength, - #[strum(serialize = "document_annotations_type_format")] - TypeFormat, - - // Annotation metadata constraints - #[strum(serialize = "document_annotations_metadata_size")] - MetadataSize, - - // Annotation chronological constraints - #[strum(serialize = "document_annotations_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "document_annotations_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "document_annotations_deleted_after_updated")] - DeletedAfterUpdated, -} - -impl DocumentAnnotationConstraints { - /// Creates a new [`DocumentAnnotationConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentAnnotationConstraints::ContentLength - | DocumentAnnotationConstraints::TypeFormat - | DocumentAnnotationConstraints::MetadataSize => ConstraintCategory::Validation, - - DocumentAnnotationConstraints::UpdatedAfterCreated - | DocumentAnnotationConstraints::DeletedAfterCreated - | DocumentAnnotationConstraints::DeletedAfterUpdated => { - ConstraintCategory::Chronological - } - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentAnnotationConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentAnnotationConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_chunks.rs b/crates/nvisy-postgres/src/types/constraint/document_chunks.rs deleted file mode 100644 index 75983f9..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_chunks.rs +++ /dev/null @@ -1,79 +0,0 @@ -//! Document chunks table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document chunks table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentChunkConstraints { - // Chunk position constraints - #[strum(serialize = "document_chunks_chunk_index_min")] - ChunkIndexMin, - - // Content constraints - #[strum(serialize = "document_chunks_content_sha256_length")] - ContentSha256Length, - #[strum(serialize = "document_chunks_content_size_min")] - ContentSizeMin, - #[strum(serialize = "document_chunks_token_count_min")] - TokenCountMin, - - // Embedding constraints - #[strum(serialize = "document_chunks_embedding_model_format")] - EmbeddingModelFormat, - - // Metadata constraints - #[strum(serialize = "document_chunks_metadata_size")] - MetadataSize, - - // Chronological constraints - #[strum(serialize = "document_chunks_updated_after_created")] - UpdatedAfterCreated, - - // Uniqueness constraints - #[strum(serialize = "document_chunks_file_chunk_unique")] - FileChunkUnique, -} - -impl DocumentChunkConstraints { - /// Creates a new [`DocumentChunkConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentChunkConstraints::ChunkIndexMin - | DocumentChunkConstraints::ContentSha256Length - | DocumentChunkConstraints::ContentSizeMin - | DocumentChunkConstraints::TokenCountMin - | DocumentChunkConstraints::EmbeddingModelFormat - | DocumentChunkConstraints::MetadataSize => ConstraintCategory::Validation, - - DocumentChunkConstraints::UpdatedAfterCreated => ConstraintCategory::Chronological, - - DocumentChunkConstraints::FileChunkUnique => ConstraintCategory::Uniqueness, - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentChunkConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentChunkConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_comments.rs b/crates/nvisy-postgres/src/types/constraint/document_comments.rs deleted file mode 100644 index 9c41218..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_comments.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Document comments table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document comments table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentCommentConstraints { - // Comment content validation constraints - #[strum(serialize = "document_comments_content_length")] - ContentLength, - - // Comment target validation constraints - #[strum(serialize = "document_comments_one_target")] - OneTarget, - - // Comment metadata constraints - #[strum(serialize = "document_comments_metadata_size")] - MetadataSize, - - // Comment chronological constraints - #[strum(serialize = "document_comments_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "document_comments_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "document_comments_deleted_after_updated")] - DeletedAfterUpdated, -} - -impl DocumentCommentConstraints { - /// Creates a new [`DocumentCommentConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentCommentConstraints::ContentLength - | DocumentCommentConstraints::OneTarget - | DocumentCommentConstraints::MetadataSize => ConstraintCategory::Validation, - - DocumentCommentConstraints::UpdatedAfterCreated - | DocumentCommentConstraints::DeletedAfterCreated - | DocumentCommentConstraints::DeletedAfterUpdated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentCommentConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentCommentConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_files.rs b/crates/nvisy-postgres/src/types/constraint/document_files.rs deleted file mode 100644 index 3fa0533..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_files.rs +++ /dev/null @@ -1,107 +0,0 @@ -//! Document files table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document files table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentFileConstraints { - // File identity validation constraints - #[strum(serialize = "document_files_display_name_length")] - DisplayNameLength, - #[strum(serialize = "document_files_original_filename_length")] - OriginalFilenameLength, - #[strum(serialize = "document_files_file_extension_format")] - FileExtensionFormat, - #[strum(serialize = "document_files_tags_count_max")] - TagsCountMax, - - // File processing constraints - #[strum(serialize = "document_files_processing_priority_range")] - ProcessingPriorityRange, - - // File storage constraints - #[strum(serialize = "document_files_file_size_min")] - FileSizeMin, - #[strum(serialize = "document_files_storage_path_not_empty")] - StoragePathNotEmpty, - #[strum(serialize = "document_files_storage_bucket_not_empty")] - StorageBucketNotEmpty, - #[strum(serialize = "document_files_file_hash_sha256_length")] - FileHashSha256Length, - - // File metadata constraints - #[strum(serialize = "document_files_metadata_size")] - MetadataSize, - - // File retention constraints - #[strum(serialize = "document_files_retention_period")] - RetentionPeriod, - - // File version constraints - #[strum(serialize = "document_files_version_number_min")] - VersionNumberMin, - #[strum(serialize = "document_files_parent_same_document")] - ParentSameDocument, - - // File chronological constraints - #[strum(serialize = "document_files_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "document_files_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "document_files_deleted_after_updated")] - DeletedAfterUpdated, - #[strum(serialize = "document_files_auto_delete_after_created")] - AutoDeleteAfterCreated, -} - -impl DocumentFileConstraints { - /// Creates a new [`DocumentFileConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentFileConstraints::DisplayNameLength - | DocumentFileConstraints::OriginalFilenameLength - | DocumentFileConstraints::FileExtensionFormat - | DocumentFileConstraints::TagsCountMax - | DocumentFileConstraints::ProcessingPriorityRange - | DocumentFileConstraints::FileSizeMin - | DocumentFileConstraints::StoragePathNotEmpty - | DocumentFileConstraints::StorageBucketNotEmpty - | DocumentFileConstraints::FileHashSha256Length - | DocumentFileConstraints::MetadataSize - | DocumentFileConstraints::RetentionPeriod - | DocumentFileConstraints::VersionNumberMin - | DocumentFileConstraints::ParentSameDocument => ConstraintCategory::Validation, - - DocumentFileConstraints::UpdatedAfterCreated - | DocumentFileConstraints::DeletedAfterCreated - | DocumentFileConstraints::DeletedAfterUpdated - | DocumentFileConstraints::AutoDeleteAfterCreated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentFileConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentFileConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/document_versions.rs b/crates/nvisy-postgres/src/types/constraint/document_versions.rs deleted file mode 100644 index 476c880..0000000 --- a/crates/nvisy-postgres/src/types/constraint/document_versions.rs +++ /dev/null @@ -1,107 +0,0 @@ -//! Document versions table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document versions table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentVersionConstraints { - // Version validation constraints - #[strum(serialize = "document_versions_version_number_min")] - VersionNumberMin, - #[strum(serialize = "document_versions_display_name_length")] - DisplayNameLength, - #[strum(serialize = "document_versions_file_extension_format")] - FileExtensionFormat, - - // Version processing constraints - #[strum(serialize = "document_versions_processing_credits_min")] - ProcessingCreditsMin, - #[strum(serialize = "document_versions_processing_duration_min")] - ProcessingDurationMin, - #[strum(serialize = "document_versions_api_calls_min")] - ApiCallsMin, - - // Version storage constraints - #[strum(serialize = "document_versions_file_size_min")] - FileSizeMin, - #[strum(serialize = "document_versions_storage_path_not_empty")] - StoragePathNotEmpty, - #[strum(serialize = "document_versions_storage_bucket_not_empty")] - StorageBucketNotEmpty, - #[strum(serialize = "document_versions_file_hash_sha256_length")] - FileHashSha256Length, - - // Version metadata constraints - #[strum(serialize = "document_versions_results_size")] - ResultsSize, - #[strum(serialize = "document_versions_metadata_size")] - MetadataSize, - - // Version retention constraints - #[strum(serialize = "document_versions_retention_period")] - RetentionPeriod, - - // Version chronological constraints - #[strum(serialize = "document_versions_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "document_versions_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "document_versions_deleted_after_updated")] - DeletedAfterUpdated, - #[strum(serialize = "document_versions_auto_delete_after_created")] - AutoDeleteAfterCreated, -} - -impl DocumentVersionConstraints { - /// Creates a new [`DocumentVersionConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentVersionConstraints::VersionNumberMin - | DocumentVersionConstraints::DisplayNameLength - | DocumentVersionConstraints::FileExtensionFormat - | DocumentVersionConstraints::ProcessingCreditsMin - | DocumentVersionConstraints::ProcessingDurationMin - | DocumentVersionConstraints::ApiCallsMin - | DocumentVersionConstraints::FileSizeMin - | DocumentVersionConstraints::StoragePathNotEmpty - | DocumentVersionConstraints::StorageBucketNotEmpty - | DocumentVersionConstraints::FileHashSha256Length - | DocumentVersionConstraints::ResultsSize - | DocumentVersionConstraints::MetadataSize - | DocumentVersionConstraints::RetentionPeriod => ConstraintCategory::Validation, - - DocumentVersionConstraints::UpdatedAfterCreated - | DocumentVersionConstraints::DeletedAfterCreated - | DocumentVersionConstraints::DeletedAfterUpdated - | DocumentVersionConstraints::AutoDeleteAfterCreated => { - ConstraintCategory::Chronological - } - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentVersionConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentVersionConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/documents.rs b/crates/nvisy-postgres/src/types/constraint/documents.rs deleted file mode 100644 index 1ac5c85..0000000 --- a/crates/nvisy-postgres/src/types/constraint/documents.rs +++ /dev/null @@ -1,69 +0,0 @@ -//! Documents table constraint violations. - -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -use super::ConstraintCategory; - -/// Document table constraint violations. -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] -#[serde(into = "String", try_from = "String")] -pub enum DocumentConstraints { - // Document validation constraints - #[strum(serialize = "documents_display_name_length")] - DisplayNameLength, - #[strum(serialize = "documents_description_length_max")] - DescriptionLengthMax, - #[strum(serialize = "documents_tags_count_max")] - TagsCountMax, - - // Document metadata constraints - #[strum(serialize = "documents_metadata_size")] - MetadataSize, - - // Document chronological constraints - #[strum(serialize = "documents_updated_after_created")] - UpdatedAfterCreated, - #[strum(serialize = "documents_deleted_after_created")] - DeletedAfterCreated, - #[strum(serialize = "documents_deleted_after_updated")] - DeletedAfterUpdated, -} - -impl DocumentConstraints { - /// Creates a new [`DocumentConstraints`] from the constraint name. - pub fn new(constraint: &str) -> Option { - constraint.parse().ok() - } - - /// Returns the category of this constraint violation. - pub fn categorize(&self) -> ConstraintCategory { - match self { - DocumentConstraints::DisplayNameLength - | DocumentConstraints::DescriptionLengthMax - | DocumentConstraints::TagsCountMax - | DocumentConstraints::MetadataSize => ConstraintCategory::Validation, - - DocumentConstraints::UpdatedAfterCreated - | DocumentConstraints::DeletedAfterCreated - | DocumentConstraints::DeletedAfterUpdated => ConstraintCategory::Chronological, - } - } -} - -impl From for String { - #[inline] - fn from(val: DocumentConstraints) -> Self { - val.to_string() - } -} - -impl TryFrom for DocumentConstraints { - type Error = strum::ParseError; - - #[inline] - fn try_from(value: String) -> Result { - value.parse() - } -} diff --git a/crates/nvisy-postgres/src/types/constraint/file_annotations.rs b/crates/nvisy-postgres/src/types/constraint/file_annotations.rs new file mode 100644 index 0000000..a942af3 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/file_annotations.rs @@ -0,0 +1,64 @@ +//! File annotations table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// File annotations table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum FileAnnotationConstraints { + // Annotation content constraints + #[strum(serialize = "file_annotations_content_length")] + ContentLength, + + // Annotation metadata constraints + #[strum(serialize = "file_annotations_metadata_size")] + MetadataSize, + + // Annotation chronological constraints + #[strum(serialize = "file_annotations_updated_after_created")] + UpdatedAfterCreated, + #[strum(serialize = "file_annotations_deleted_after_created")] + DeletedAfterCreated, + #[strum(serialize = "file_annotations_deleted_after_updated")] + DeletedAfterUpdated, +} + +impl FileAnnotationConstraints { + /// Creates a new [`FileAnnotationConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + FileAnnotationConstraints::ContentLength | FileAnnotationConstraints::MetadataSize => { + ConstraintCategory::Validation + } + + FileAnnotationConstraints::UpdatedAfterCreated + | FileAnnotationConstraints::DeletedAfterCreated + | FileAnnotationConstraints::DeletedAfterUpdated => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: FileAnnotationConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for FileAnnotationConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/file_chunks.rs b/crates/nvisy-postgres/src/types/constraint/file_chunks.rs new file mode 100644 index 0000000..d75354f --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/file_chunks.rs @@ -0,0 +1,79 @@ +//! File chunks table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// File chunks table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum FileChunkConstraints { + // Chunk position constraints + #[strum(serialize = "file_chunks_chunk_index_min")] + ChunkIndexMin, + + // Content constraints + #[strum(serialize = "file_chunks_content_sha256_length")] + ContentSha256Length, + #[strum(serialize = "file_chunks_content_size_min")] + ContentSizeMin, + #[strum(serialize = "file_chunks_token_count_min")] + TokenCountMin, + + // Embedding constraints + #[strum(serialize = "file_chunks_embedding_model_format")] + EmbeddingModelFormat, + + // Metadata constraints + #[strum(serialize = "file_chunks_metadata_size")] + MetadataSize, + + // Chronological constraints + #[strum(serialize = "file_chunks_updated_after_created")] + UpdatedAfterCreated, + + // Uniqueness constraints + #[strum(serialize = "file_chunks_file_chunk_unique")] + FileChunkUnique, +} + +impl FileChunkConstraints { + /// Creates a new [`FileChunkConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + FileChunkConstraints::ChunkIndexMin + | FileChunkConstraints::ContentSha256Length + | FileChunkConstraints::ContentSizeMin + | FileChunkConstraints::TokenCountMin + | FileChunkConstraints::EmbeddingModelFormat + | FileChunkConstraints::MetadataSize => ConstraintCategory::Validation, + + FileChunkConstraints::UpdatedAfterCreated => ConstraintCategory::Chronological, + + FileChunkConstraints::FileChunkUnique => ConstraintCategory::Uniqueness, + } + } +} + +impl From for String { + #[inline] + fn from(val: FileChunkConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for FileChunkConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/files.rs b/crates/nvisy-postgres/src/types/constraint/files.rs new file mode 100644 index 0000000..07b9c03 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/files.rs @@ -0,0 +1,94 @@ +//! Files table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Files table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum FileConstraints { + // File identity validation constraints + #[strum(serialize = "files_display_name_length")] + DisplayNameLength, + #[strum(serialize = "files_original_filename_length")] + OriginalFilenameLength, + #[strum(serialize = "files_file_extension_format")] + FileExtensionFormat, + #[strum(serialize = "files_mime_type_format")] + MimeTypeFormat, + #[strum(serialize = "files_tags_count_max")] + TagsCountMax, + + // File storage constraints + #[strum(serialize = "files_file_size_min")] + FileSizeMin, + #[strum(serialize = "files_storage_path_not_empty")] + StoragePathNotEmpty, + #[strum(serialize = "files_storage_bucket_not_empty")] + StorageBucketNotEmpty, + #[strum(serialize = "files_file_hash_sha256_length")] + FileHashSha256Length, + + // File metadata constraints + #[strum(serialize = "files_metadata_size")] + MetadataSize, + + // File version constraints + #[strum(serialize = "files_version_number_min")] + VersionNumberMin, + + // File chronological constraints + #[strum(serialize = "files_updated_after_created")] + UpdatedAfterCreated, + #[strum(serialize = "files_deleted_after_created")] + DeletedAfterCreated, + #[strum(serialize = "files_deleted_after_updated")] + DeletedAfterUpdated, +} + +impl FileConstraints { + /// Creates a new [`FileConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + FileConstraints::DisplayNameLength + | FileConstraints::OriginalFilenameLength + | FileConstraints::FileExtensionFormat + | FileConstraints::MimeTypeFormat + | FileConstraints::TagsCountMax + | FileConstraints::FileSizeMin + | FileConstraints::StoragePathNotEmpty + | FileConstraints::StorageBucketNotEmpty + | FileConstraints::FileHashSha256Length + | FileConstraints::MetadataSize + | FileConstraints::VersionNumberMin => ConstraintCategory::Validation, + + FileConstraints::UpdatedAfterCreated + | FileConstraints::DeletedAfterCreated + | FileConstraints::DeletedAfterUpdated => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: FileConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for FileConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/mod.rs b/crates/nvisy-postgres/src/types/constraint/mod.rs index 135a540..edc1b98 100644 --- a/crates/nvisy-postgres/src/types/constraint/mod.rs +++ b/crates/nvisy-postgres/src/types/constraint/mod.rs @@ -18,18 +18,14 @@ mod workspace_members; mod workspace_webhooks; mod workspaces; -// Document-related constraint modules -mod document_annotations; -mod document_chunks; -mod document_comments; -mod document_files; -mod document_versions; -mod documents; - -// Chat-related constraint modules -mod chat_operations; -mod chat_sessions; -mod chat_tool_calls; +// File-related constraint modules +mod file_annotations; +mod file_chunks; +mod files; + +// Pipeline-related constraint modules +mod pipeline_runs; +mod pipelines; use std::fmt; @@ -39,15 +35,11 @@ pub use self::account_action_tokens::AccountActionTokenConstraints; pub use self::account_api_tokens::AccountApiTokenConstraints; pub use self::account_notifications::AccountNotificationConstraints; pub use self::accounts::AccountConstraints; -pub use self::chat_operations::ChatOperationConstraints; -pub use self::chat_sessions::ChatSessionConstraints; -pub use self::chat_tool_calls::ChatToolCallConstraints; -pub use self::document_annotations::DocumentAnnotationConstraints; -pub use self::document_chunks::DocumentChunkConstraints; -pub use self::document_comments::DocumentCommentConstraints; -pub use self::document_files::DocumentFileConstraints; -pub use self::document_versions::DocumentVersionConstraints; -pub use self::documents::DocumentConstraints; +pub use self::file_annotations::FileAnnotationConstraints; +pub use self::file_chunks::FileChunkConstraints; +pub use self::files::FileConstraints; +pub use self::pipeline_runs::PipelineRunConstraints; +pub use self::pipelines::PipelineConstraints; pub use self::workspace_activities::WorkspaceActivitiesConstraints; pub use self::workspace_integration_runs::WorkspaceIntegrationRunConstraints; pub use self::workspace_integrations::WorkspaceIntegrationConstraints; @@ -79,18 +71,14 @@ pub enum ConstraintViolation { WorkspaceWebhook(WorkspaceWebhookConstraints), WorkspaceIntegrationRun(WorkspaceIntegrationRunConstraints), - // Document-related constraints - Document(DocumentConstraints), - DocumentAnnotation(DocumentAnnotationConstraints), - DocumentChunk(DocumentChunkConstraints), - DocumentComment(DocumentCommentConstraints), - DocumentFile(DocumentFileConstraints), - DocumentVersion(DocumentVersionConstraints), - - // Chat-related constraints - ChatSession(ChatSessionConstraints), - ChatToolCall(ChatToolCallConstraints), - ChatOperation(ChatOperationConstraints), + // File-related constraints + File(FileConstraints), + FileAnnotation(FileAnnotationConstraints), + FileChunk(FileChunkConstraints), + + // Pipeline-related constraints + Pipeline(PipelineConstraints), + PipelineRun(PipelineRunConstraints), } /// Categories of database constraint violations. @@ -159,19 +147,13 @@ impl ConstraintViolation { WorkspaceWebhookConstraints::new => WorkspaceWebhook, WorkspaceIntegrationRunConstraints::new => WorkspaceIntegrationRun, }, - "documents" => try_parse!(DocumentConstraints::new => Document), - "document" => try_parse! { - DocumentAnnotationConstraints::new => DocumentAnnotation, - DocumentChunkConstraints::new => DocumentChunk, - DocumentCommentConstraints::new => DocumentComment, - DocumentFileConstraints::new => DocumentFile, - DocumentVersionConstraints::new => DocumentVersion, - }, - "chat" => try_parse! { - ChatSessionConstraints::new => ChatSession, - ChatToolCallConstraints::new => ChatToolCall, - ChatOperationConstraints::new => ChatOperation, + "files" => try_parse!(FileConstraints::new => File), + "file" => try_parse! { + FileAnnotationConstraints::new => FileAnnotation, + FileChunkConstraints::new => FileChunk, }, + "pipelines" => try_parse!(PipelineConstraints::new => Pipeline), + "pipeline" => try_parse!(PipelineRunConstraints::new => PipelineRun), _ => None, } } @@ -196,18 +178,14 @@ impl ConstraintViolation { ConstraintViolation::WorkspaceWebhook(_) => "workspace_webhooks", ConstraintViolation::WorkspaceIntegrationRun(_) => "workspace_integration_runs", - // Document-related tables - ConstraintViolation::Document(_) => "documents", - ConstraintViolation::DocumentAnnotation(_) => "document_annotations", - ConstraintViolation::DocumentChunk(_) => "document_chunks", - ConstraintViolation::DocumentComment(_) => "document_comments", - ConstraintViolation::DocumentFile(_) => "document_files", - ConstraintViolation::DocumentVersion(_) => "document_versions", - - // Chat-related tables - ConstraintViolation::ChatSession(_) => "chat_sessions", - ConstraintViolation::ChatToolCall(_) => "chat_tool_calls", - ConstraintViolation::ChatOperation(_) => "chat_operations", + // File-related tables + ConstraintViolation::File(_) => "files", + ConstraintViolation::FileAnnotation(_) => "file_annotations", + ConstraintViolation::FileChunk(_) => "file_chunks", + + // Pipeline-related tables + ConstraintViolation::Pipeline(_) => "pipelines", + ConstraintViolation::PipelineRun(_) => "pipeline_runs", } } @@ -215,7 +193,6 @@ impl ConstraintViolation { /// /// This groups constraints by their business domain for higher-level categorization. pub fn functional_area(&self) -> &'static str { - // TODO: Implement functional area enumeration. match self { ConstraintViolation::Account(_) | ConstraintViolation::AccountNotification(_) @@ -230,16 +207,11 @@ impl ConstraintViolation { | ConstraintViolation::WorkspaceWebhook(_) | ConstraintViolation::WorkspaceIntegrationRun(_) => "workspaces", - ConstraintViolation::Document(_) - | ConstraintViolation::DocumentAnnotation(_) - | ConstraintViolation::DocumentChunk(_) - | ConstraintViolation::DocumentComment(_) - | ConstraintViolation::DocumentFile(_) - | ConstraintViolation::DocumentVersion(_) => "documents", + ConstraintViolation::File(_) + | ConstraintViolation::FileAnnotation(_) + | ConstraintViolation::FileChunk(_) => "files", - ConstraintViolation::ChatSession(_) - | ConstraintViolation::ChatToolCall(_) - | ConstraintViolation::ChatOperation(_) => "chat", + ConstraintViolation::Pipeline(_) | ConstraintViolation::PipelineRun(_) => "pipelines", } } @@ -261,16 +233,12 @@ impl ConstraintViolation { ConstraintViolation::WorkspaceWebhook(c) => c.categorize(), ConstraintViolation::WorkspaceIntegrationRun(c) => c.categorize(), - ConstraintViolation::Document(c) => c.categorize(), - ConstraintViolation::DocumentAnnotation(c) => c.categorize(), - ConstraintViolation::DocumentChunk(c) => c.categorize(), - ConstraintViolation::DocumentComment(c) => c.categorize(), - ConstraintViolation::DocumentFile(c) => c.categorize(), - ConstraintViolation::DocumentVersion(c) => c.categorize(), + ConstraintViolation::File(c) => c.categorize(), + ConstraintViolation::FileAnnotation(c) => c.categorize(), + ConstraintViolation::FileChunk(c) => c.categorize(), - ConstraintViolation::ChatSession(c) => c.categorize(), - ConstraintViolation::ChatToolCall(c) => c.categorize(), - ConstraintViolation::ChatOperation(c) => c.categorize(), + ConstraintViolation::Pipeline(c) => c.categorize(), + ConstraintViolation::PipelineRun(c) => c.categorize(), } } @@ -297,16 +265,12 @@ impl fmt::Display for ConstraintViolation { ConstraintViolation::WorkspaceWebhook(c) => write!(f, "{}", c), ConstraintViolation::WorkspaceIntegrationRun(c) => write!(f, "{}", c), - ConstraintViolation::Document(c) => write!(f, "{}", c), - ConstraintViolation::DocumentAnnotation(c) => write!(f, "{}", c), - ConstraintViolation::DocumentChunk(c) => write!(f, "{}", c), - ConstraintViolation::DocumentComment(c) => write!(f, "{}", c), - ConstraintViolation::DocumentFile(c) => write!(f, "{}", c), - ConstraintViolation::DocumentVersion(c) => write!(f, "{}", c), + ConstraintViolation::File(c) => write!(f, "{}", c), + ConstraintViolation::FileAnnotation(c) => write!(f, "{}", c), + ConstraintViolation::FileChunk(c) => write!(f, "{}", c), - ConstraintViolation::ChatSession(c) => write!(f, "{}", c), - ConstraintViolation::ChatToolCall(c) => write!(f, "{}", c), - ConstraintViolation::ChatOperation(c) => write!(f, "{}", c), + ConstraintViolation::Pipeline(c) => write!(f, "{}", c), + ConstraintViolation::PipelineRun(c) => write!(f, "{}", c), } } } @@ -340,10 +304,8 @@ mod tests { ); assert_eq!( - ConstraintViolation::new("document_versions_version_number_min"), - Some(ConstraintViolation::DocumentVersion( - DocumentVersionConstraints::VersionNumberMin - )) + ConstraintViolation::new("files_version_number_min"), + Some(ConstraintViolation::File(FileConstraints::VersionNumberMin)) ); assert_eq!(ConstraintViolation::new("unknown_constraint"), None); @@ -357,9 +319,8 @@ mod tests { let violation = ConstraintViolation::Workspace(WorkspaceConstraints::DisplayNameLength); assert_eq!(violation.table_name(), "workspaces"); - let violation = - ConstraintViolation::DocumentFile(DocumentFileConstraints::StoragePathNotEmpty); - assert_eq!(violation.table_name(), "document_files"); + let violation = ConstraintViolation::File(FileConstraints::StoragePathNotEmpty); + assert_eq!(violation.table_name(), "files"); } #[test] @@ -375,9 +336,8 @@ mod tests { ConstraintViolation::WorkspaceMember(WorkspaceMemberConstraints::ShowOrderRange); assert_eq!(violation.functional_area(), "workspaces"); - let violation = - ConstraintViolation::DocumentVersion(DocumentVersionConstraints::VersionNumberMin); - assert_eq!(violation.functional_area(), "documents"); + let violation = ConstraintViolation::File(FileConstraints::VersionNumberMin); + assert_eq!(violation.functional_area(), "files"); } #[test] diff --git a/crates/nvisy-postgres/src/types/constraint/pipeline_runs.rs b/crates/nvisy-postgres/src/types/constraint/pipeline_runs.rs new file mode 100644 index 0000000..4d92803 --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/pipeline_runs.rs @@ -0,0 +1,73 @@ +//! Pipeline runs table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Pipeline runs table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum PipelineRunConstraints { + // Pipeline run input/output config constraints + #[strum(serialize = "pipeline_runs_input_config_size")] + InputConfigSize, + #[strum(serialize = "pipeline_runs_output_config_size")] + OutputConfigSize, + + // Pipeline run definition snapshot constraints + #[strum(serialize = "pipeline_runs_definition_snapshot_size")] + DefinitionSnapshotSize, + + // Pipeline run error constraints + #[strum(serialize = "pipeline_runs_error_size")] + ErrorSize, + + // Pipeline run metrics constraints + #[strum(serialize = "pipeline_runs_metrics_size")] + MetricsSize, + + // Pipeline run chronological constraints + #[strum(serialize = "pipeline_runs_started_after_created")] + StartedAfterCreated, + #[strum(serialize = "pipeline_runs_completed_after_started")] + CompletedAfterStarted, +} + +impl PipelineRunConstraints { + /// Creates a new [`PipelineRunConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + PipelineRunConstraints::InputConfigSize + | PipelineRunConstraints::OutputConfigSize + | PipelineRunConstraints::DefinitionSnapshotSize + | PipelineRunConstraints::ErrorSize + | PipelineRunConstraints::MetricsSize => ConstraintCategory::Validation, + + PipelineRunConstraints::StartedAfterCreated + | PipelineRunConstraints::CompletedAfterStarted => ConstraintCategory::Chronological, + } + } +} + +impl From for String { + #[inline] + fn from(val: PipelineRunConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for PipelineRunConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/constraint/pipelines.rs b/crates/nvisy-postgres/src/types/constraint/pipelines.rs new file mode 100644 index 0000000..a21f9de --- /dev/null +++ b/crates/nvisy-postgres/src/types/constraint/pipelines.rs @@ -0,0 +1,71 @@ +//! Pipelines table constraint violations. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +use super::ConstraintCategory; + +/// Pipelines table constraint violations. +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Serialize, Deserialize, Display, EnumIter, EnumString)] +#[serde(into = "String", try_from = "String")] +pub enum PipelineConstraints { + // Pipeline name validation constraints + #[strum(serialize = "pipelines_name_length")] + NameLength, + + // Pipeline description validation constraints + #[strum(serialize = "pipelines_description_length")] + DescriptionLength, + + // Pipeline definition constraints + #[strum(serialize = "pipelines_definition_size")] + DefinitionSize, + + // Pipeline metadata constraints + #[strum(serialize = "pipelines_metadata_size")] + MetadataSize, + + // Pipeline chronological constraints + #[strum(serialize = "pipelines_updated_after_created")] + UpdatedAfterCreated, + #[strum(serialize = "pipelines_deleted_after_created")] + DeletedAfterCreated, +} + +impl PipelineConstraints { + /// Creates a new [`PipelineConstraints`] from the constraint name. + pub fn new(constraint: &str) -> Option { + constraint.parse().ok() + } + + /// Returns the category of this constraint violation. + pub fn categorize(&self) -> ConstraintCategory { + match self { + PipelineConstraints::NameLength + | PipelineConstraints::DescriptionLength + | PipelineConstraints::DefinitionSize + | PipelineConstraints::MetadataSize => ConstraintCategory::Validation, + + PipelineConstraints::UpdatedAfterCreated | PipelineConstraints::DeletedAfterCreated => { + ConstraintCategory::Chronological + } + } + } +} + +impl From for String { + #[inline] + fn from(val: PipelineConstraints) -> Self { + val.to_string() + } +} + +impl TryFrom for PipelineConstraints { + type Error = strum::ParseError; + + #[inline] + fn try_from(value: String) -> Result { + value.parse() + } +} diff --git a/crates/nvisy-postgres/src/types/enums/chat_session_status.rs b/crates/nvisy-postgres/src/types/enums/chat_session_status.rs deleted file mode 100644 index 31366c5..0000000 --- a/crates/nvisy-postgres/src/types/enums/chat_session_status.rs +++ /dev/null @@ -1,89 +0,0 @@ -//! Chat session status enumeration for LLM-assisted editing sessions. - -use diesel_derive_enum::DbEnum; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -/// Defines the lifecycle status of a chat editing session. -/// -/// This enumeration corresponds to the `CHAT_SESSION_STATUS` PostgreSQL enum and is used -/// to track the state of LLM-assisted document editing sessions as they progress through -/// their lifecycle from active use to archival. -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::ChatSessionStatus"] -pub enum ChatSessionStatus { - /// Session is currently active and in use - #[db_rename = "active"] - #[serde(rename = "active")] - #[default] - Active, - - /// Session is temporarily paused but can be resumed - #[db_rename = "paused"] - #[serde(rename = "paused")] - Paused, - - /// Session has been archived and is no longer active - #[db_rename = "archived"] - #[serde(rename = "archived")] - Archived, -} - -impl ChatSessionStatus { - /// Returns whether the session is currently active. - #[inline] - pub fn is_active(self) -> bool { - matches!(self, ChatSessionStatus::Active) - } - - /// Returns whether the session is paused. - #[inline] - pub fn is_paused(self) -> bool { - matches!(self, ChatSessionStatus::Paused) - } - - /// Returns whether the session is archived. - #[inline] - pub fn is_archived(self) -> bool { - matches!(self, ChatSessionStatus::Archived) - } - - /// Returns whether the session can accept new messages or tool calls. - #[inline] - pub fn can_accept_input(self) -> bool { - matches!(self, ChatSessionStatus::Active) - } - - /// Returns whether the session can be resumed. - #[inline] - pub fn can_resume(self) -> bool { - matches!(self, ChatSessionStatus::Paused) - } - - /// Returns whether the session can be paused. - #[inline] - pub fn can_pause(self) -> bool { - matches!(self, ChatSessionStatus::Active) - } - - /// Returns whether the session can be archived. - #[inline] - pub fn can_archive(self) -> bool { - matches!(self, ChatSessionStatus::Active | ChatSessionStatus::Paused) - } - - /// Returns whether the session is in a final state. - #[inline] - pub fn is_final(self) -> bool { - matches!(self, ChatSessionStatus::Archived) - } - - /// Returns session statuses that are considered active (not archived). - pub fn active_statuses() -> &'static [ChatSessionStatus] { - &[ChatSessionStatus::Active, ChatSessionStatus::Paused] - } -} diff --git a/crates/nvisy-postgres/src/types/enums/chat_tool_status.rs b/crates/nvisy-postgres/src/types/enums/chat_tool_status.rs deleted file mode 100644 index e1f4fe2..0000000 --- a/crates/nvisy-postgres/src/types/enums/chat_tool_status.rs +++ /dev/null @@ -1,99 +0,0 @@ -//! Chat tool status enumeration for tool execution tracking. - -use diesel_derive_enum::DbEnum; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -/// Defines the execution status of a chat tool call. -/// -/// This enumeration corresponds to the `CHAT_TOOL_STATUS` PostgreSQL enum and is used -/// to track the state of tool invocations within chat sessions as they progress -/// from pending through execution to completion or cancellation. -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::ChatToolStatus"] -pub enum ChatToolStatus { - /// Tool call is queued and waiting to be executed - #[db_rename = "pending"] - #[serde(rename = "pending")] - #[default] - Pending, - - /// Tool is currently being executed - #[db_rename = "running"] - #[serde(rename = "running")] - Running, - - /// Tool execution completed successfully - #[db_rename = "completed"] - #[serde(rename = "completed")] - Completed, - - /// Tool execution was cancelled - #[db_rename = "cancelled"] - #[serde(rename = "cancelled")] - Cancelled, -} - -impl ChatToolStatus { - /// Returns whether the tool call is pending execution. - #[inline] - pub fn is_pending(self) -> bool { - matches!(self, ChatToolStatus::Pending) - } - - /// Returns whether the tool is currently running. - #[inline] - pub fn is_running(self) -> bool { - matches!(self, ChatToolStatus::Running) - } - - /// Returns whether the tool execution completed successfully. - #[inline] - pub fn is_completed(self) -> bool { - matches!(self, ChatToolStatus::Completed) - } - - /// Returns whether the tool execution was cancelled. - #[inline] - pub fn is_cancelled(self) -> bool { - matches!(self, ChatToolStatus::Cancelled) - } - - /// Returns whether the tool is in a final state. - #[inline] - pub fn is_final(self) -> bool { - matches!(self, ChatToolStatus::Completed | ChatToolStatus::Cancelled) - } - - /// Returns whether the tool can be started. - #[inline] - pub fn can_start(self) -> bool { - matches!(self, ChatToolStatus::Pending) - } - - /// Returns whether the tool can be cancelled. - #[inline] - pub fn can_cancel(self) -> bool { - matches!(self, ChatToolStatus::Pending | ChatToolStatus::Running) - } - - /// Returns whether the tool execution is active (not final). - #[inline] - pub fn is_active(self) -> bool { - matches!(self, ChatToolStatus::Pending | ChatToolStatus::Running) - } - - /// Returns tool statuses that are considered active (not final). - pub fn active_statuses() -> &'static [ChatToolStatus] { - &[ChatToolStatus::Pending, ChatToolStatus::Running] - } - - /// Returns tool statuses that represent final states. - pub fn final_statuses() -> &'static [ChatToolStatus] { - &[ChatToolStatus::Completed, ChatToolStatus::Cancelled] - } -} diff --git a/crates/nvisy-postgres/src/types/enums/content_segmentation.rs b/crates/nvisy-postgres/src/types/enums/content_segmentation.rs deleted file mode 100644 index 7019d0e..0000000 --- a/crates/nvisy-postgres/src/types/enums/content_segmentation.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Content segmentation enumeration for knowledge extraction. - -use diesel_derive_enum::DbEnum; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -/// Defines the content segmentation strategy for document processing. -/// -/// This enumeration corresponds to the `CONTENT_SEGMENTATION` PostgreSQL enum and is used -/// to specify how document content should be segmented for knowledge extraction. -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::ContentSegmentation"] -pub enum ContentSegmentation { - /// No segmentation applied - process content as a whole - #[db_rename = "none"] - #[serde(rename = "none")] - None, - - /// Semantic-based segmentation - split by meaning and context - #[db_rename = "semantic"] - #[serde(rename = "semantic")] - #[default] - Semantic, - - /// Fixed-size segmentation - split by character or token count - #[db_rename = "fixed"] - #[serde(rename = "fixed")] - Fixed, -} - -impl ContentSegmentation { - /// Returns whether segmentation is disabled. - #[inline] - pub fn is_disabled(self) -> bool { - matches!(self, ContentSegmentation::None) - } - - /// Returns whether this strategy uses semantic analysis. - #[inline] - pub fn is_semantic(self) -> bool { - matches!(self, ContentSegmentation::Semantic) - } - - /// Returns whether this strategy uses fixed-size chunks. - #[inline] - pub fn is_fixed(self) -> bool { - matches!(self, ContentSegmentation::Fixed) - } - - /// Returns whether this strategy preserves context between segments. - #[inline] - pub fn preserves_context(self) -> bool { - self.is_semantic() - } -} diff --git a/crates/nvisy-postgres/src/types/enums/mod.rs b/crates/nvisy-postgres/src/types/enums/mod.rs index d74d4ea..1e52806 100644 --- a/crates/nvisy-postgres/src/types/enums/mod.rs +++ b/crates/nvisy-postgres/src/types/enums/mod.rs @@ -20,33 +20,30 @@ pub mod webhook_status; pub mod webhook_type; pub mod workspace_role; -// Document-related enumerations +// File-related enumerations pub mod annotation_type; -pub mod content_segmentation; pub mod file_source; -pub mod processing_status; -pub mod require_mode; -// Chat-related enumerations -pub mod chat_session_status; -pub mod chat_tool_status; +// Pipeline-related enumerations +pub mod pipeline_run_status; +pub mod pipeline_status; +pub mod pipeline_trigger_type; pub use action_token_type::ActionTokenType; pub use activity_type::{ActivityCategory, ActivityType}; pub use annotation_type::AnnotationType; pub use api_token_type::ApiTokenType; -pub use chat_session_status::ChatSessionStatus; -pub use chat_tool_status::ChatToolStatus; -pub use content_segmentation::ContentSegmentation; pub use file_source::FileSource; pub use integration_status::IntegrationStatus; pub use integration_type::IntegrationType; pub use invite_status::InviteStatus; pub use notification_event::NotificationEvent; -pub use processing_status::ProcessingStatus; -pub use require_mode::RequireMode; pub use run_type::RunType; pub use webhook_event::WebhookEvent; pub use webhook_status::WebhookStatus; pub use webhook_type::WebhookType; pub use workspace_role::WorkspaceRole; + +pub use pipeline_run_status::PipelineRunStatus; +pub use pipeline_status::PipelineStatus; +pub use pipeline_trigger_type::PipelineTriggerType; diff --git a/crates/nvisy-postgres/src/types/enums/pipeline_run_status.rs b/crates/nvisy-postgres/src/types/enums/pipeline_run_status.rs new file mode 100644 index 0000000..06aaee2 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/pipeline_run_status.rs @@ -0,0 +1,105 @@ +//! Pipeline run status enumeration indicating the execution state of a pipeline run. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines the execution status of a pipeline run. +/// +/// This enumeration corresponds to the `PIPELINE_RUN_STATUS` PostgreSQL enum and is used +/// to track the current state of a pipeline execution. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::PipelineRunStatus"] +pub enum PipelineRunStatus { + /// Run is waiting to start + #[db_rename = "queued"] + #[serde(rename = "queued")] + #[default] + Queued, + + /// Run is in progress + #[db_rename = "running"] + #[serde(rename = "running")] + Running, + + /// Run finished successfully + #[db_rename = "completed"] + #[serde(rename = "completed")] + Completed, + + /// Run failed with error + #[db_rename = "failed"] + #[serde(rename = "failed")] + Failed, + + /// Run was cancelled by user + #[db_rename = "cancelled"] + #[serde(rename = "cancelled")] + Cancelled, +} + +impl PipelineRunStatus { + /// Returns whether the run is queued. + #[inline] + pub fn is_queued(self) -> bool { + matches!(self, PipelineRunStatus::Queued) + } + + /// Returns whether the run is currently running. + #[inline] + pub fn is_running(self) -> bool { + matches!(self, PipelineRunStatus::Running) + } + + /// Returns whether the run completed successfully. + #[inline] + pub fn is_completed(self) -> bool { + matches!(self, PipelineRunStatus::Completed) + } + + /// Returns whether the run failed. + #[inline] + pub fn is_failed(self) -> bool { + matches!(self, PipelineRunStatus::Failed) + } + + /// Returns whether the run was cancelled. + #[inline] + pub fn is_cancelled(self) -> bool { + matches!(self, PipelineRunStatus::Cancelled) + } + + /// Returns whether the run is still active (queued or running). + #[inline] + pub fn is_active(self) -> bool { + matches!(self, PipelineRunStatus::Queued | PipelineRunStatus::Running) + } + + /// Returns whether the run has finished (completed, failed, or cancelled). + #[inline] + pub fn is_finished(self) -> bool { + matches!( + self, + PipelineRunStatus::Completed | PipelineRunStatus::Failed | PipelineRunStatus::Cancelled + ) + } + + /// Returns whether the run finished with a terminal error state. + #[inline] + pub fn is_terminal_error(self) -> bool { + matches!(self, PipelineRunStatus::Failed) + } + + /// Returns whether the run can be retried. + #[inline] + pub fn is_retriable(self) -> bool { + matches!( + self, + PipelineRunStatus::Failed | PipelineRunStatus::Cancelled + ) + } +} diff --git a/crates/nvisy-postgres/src/types/enums/pipeline_status.rs b/crates/nvisy-postgres/src/types/enums/pipeline_status.rs new file mode 100644 index 0000000..a0e5a84 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/pipeline_status.rs @@ -0,0 +1,65 @@ +//! Pipeline status enumeration indicating the lifecycle state of a pipeline. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines the lifecycle status of a pipeline definition. +/// +/// This enumeration corresponds to the `PIPELINE_STATUS` PostgreSQL enum and is used +/// to track whether a pipeline is being configured, active and ready to run, or disabled. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::PipelineStatus"] +pub enum PipelineStatus { + /// Pipeline is being configured + #[db_rename = "draft"] + #[serde(rename = "draft")] + #[default] + Draft, + + /// Pipeline is ready to run + #[db_rename = "active"] + #[serde(rename = "active")] + Active, + + /// Pipeline is disabled + #[db_rename = "disabled"] + #[serde(rename = "disabled")] + Disabled, +} + +impl PipelineStatus { + /// Returns whether the pipeline is in draft status. + #[inline] + pub fn is_draft(self) -> bool { + matches!(self, PipelineStatus::Draft) + } + + /// Returns whether the pipeline is active. + #[inline] + pub fn is_active(self) -> bool { + matches!(self, PipelineStatus::Active) + } + + /// Returns whether the pipeline is disabled. + #[inline] + pub fn is_disabled(self) -> bool { + matches!(self, PipelineStatus::Disabled) + } + + /// Returns whether the pipeline can be executed. + #[inline] + pub fn is_runnable(self) -> bool { + matches!(self, PipelineStatus::Active) + } + + /// Returns whether the pipeline can be edited. + #[inline] + pub fn is_editable(self) -> bool { + matches!(self, PipelineStatus::Draft | PipelineStatus::Disabled) + } +} diff --git a/crates/nvisy-postgres/src/types/enums/pipeline_trigger_type.rs b/crates/nvisy-postgres/src/types/enums/pipeline_trigger_type.rs new file mode 100644 index 0000000..b071977 --- /dev/null +++ b/crates/nvisy-postgres/src/types/enums/pipeline_trigger_type.rs @@ -0,0 +1,68 @@ +//! Pipeline trigger type enumeration indicating how a pipeline run was initiated. + +use diesel_derive_enum::DbEnum; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Defines how a pipeline run was initiated. +/// +/// This enumeration corresponds to the `PIPELINE_TRIGGER_TYPE` PostgreSQL enum and is used +/// to track whether a run was manually triggered, triggered by a source connector, or scheduled. +#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] +#[ExistingTypePath = "crate::schema::sql_types::PipelineTriggerType"] +pub enum PipelineTriggerType { + /// Manually triggered by user + #[db_rename = "manual"] + #[serde(rename = "manual")] + #[default] + Manual, + + /// Triggered by source connector (upload, webhook, etc.) + #[db_rename = "source"] + #[serde(rename = "source")] + Source, + + /// Triggered by schedule + #[db_rename = "scheduled"] + #[serde(rename = "scheduled")] + Scheduled, +} + +impl PipelineTriggerType { + /// Returns whether the run was manually triggered. + #[inline] + pub fn is_manual(self) -> bool { + matches!(self, PipelineTriggerType::Manual) + } + + /// Returns whether the run was triggered by a source connector. + #[inline] + pub fn is_source(self) -> bool { + matches!(self, PipelineTriggerType::Source) + } + + /// Returns whether the run was scheduled. + #[inline] + pub fn is_scheduled(self) -> bool { + matches!(self, PipelineTriggerType::Scheduled) + } + + /// Returns whether the run was triggered automatically (source or scheduled). + #[inline] + pub fn is_automatic(self) -> bool { + matches!( + self, + PipelineTriggerType::Source | PipelineTriggerType::Scheduled + ) + } + + /// Returns whether the run was triggered by user action. + #[inline] + pub fn is_user_initiated(self) -> bool { + matches!(self, PipelineTriggerType::Manual) + } +} diff --git a/crates/nvisy-postgres/src/types/enums/processing_status.rs b/crates/nvisy-postgres/src/types/enums/processing_status.rs deleted file mode 100644 index 14fd140..0000000 --- a/crates/nvisy-postgres/src/types/enums/processing_status.rs +++ /dev/null @@ -1,111 +0,0 @@ -//! Processing status enumeration for document and file processing operations. - -use diesel_derive_enum::DbEnum; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -/// Defines the current processing status of a file in the processing pipeline. -/// -/// This enumeration corresponds to the `PROCESSING_STATUS` PostgreSQL enum and is used -/// to track the state of files as they progress through various processing stages -/// such as text extraction, OCR, transcription, and analysis. -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::ProcessingStatus"] -pub enum ProcessingStatus { - /// File is queued for processing and waiting to be picked up - #[db_rename = "pending"] - #[serde(rename = "pending")] - #[default] - Pending, - - /// File is currently being processed by the system - #[db_rename = "processing"] - #[serde(rename = "processing")] - Processing, - - /// Processing completed, file is ready for use - #[db_rename = "ready"] - #[serde(rename = "ready")] - Ready, - - /// Processing was canceled by user or system - #[db_rename = "canceled"] - #[serde(rename = "canceled")] - Canceled, -} - -impl ProcessingStatus { - /// Returns whether the file is in a state that allows processing. - #[inline] - pub fn can_be_processed(self) -> bool { - matches!(self, ProcessingStatus::Pending) - } - - /// Returns whether the file is currently being processed. - #[inline] - pub fn is_processing(self) -> bool { - matches!(self, ProcessingStatus::Processing) - } - - /// Returns whether the processing is in a final state. - #[inline] - pub fn is_final(self) -> bool { - matches!(self, ProcessingStatus::Ready | ProcessingStatus::Canceled) - } - - /// Returns whether the file is ready for use. - #[inline] - pub fn is_ready(self) -> bool { - matches!(self, ProcessingStatus::Ready) - } - - /// Returns whether the processing was canceled. - #[inline] - pub fn is_canceled(self) -> bool { - matches!(self, ProcessingStatus::Canceled) - } - - /// Returns whether the processing is pending (waiting to start). - #[inline] - pub fn is_pending(self) -> bool { - matches!(self, ProcessingStatus::Pending) - } - - /// Returns whether the processing can be retried. - #[inline] - pub fn can_be_retried(self) -> bool { - matches!(self, ProcessingStatus::Ready | ProcessingStatus::Canceled) - } - - /// Returns whether the processing can be canceled. - #[inline] - pub fn can_be_canceled(self) -> bool { - matches!( - self, - ProcessingStatus::Pending | ProcessingStatus::Processing - ) - } - - /// Returns whether this status represents an active processing operation. - #[inline] - pub fn is_active(self) -> bool { - matches!( - self, - ProcessingStatus::Pending | ProcessingStatus::Processing - ) - } - - /// Returns processing statuses that are considered active (not final). - pub fn active_statuses() -> &'static [ProcessingStatus] { - &[ProcessingStatus::Pending, ProcessingStatus::Processing] - } - - /// Returns processing statuses that represent final states. - pub fn final_statuses() -> &'static [ProcessingStatus] { - &[ProcessingStatus::Ready, ProcessingStatus::Canceled] - } -} diff --git a/crates/nvisy-postgres/src/types/enums/require_mode.rs b/crates/nvisy-postgres/src/types/enums/require_mode.rs deleted file mode 100644 index da52458..0000000 --- a/crates/nvisy-postgres/src/types/enums/require_mode.rs +++ /dev/null @@ -1,122 +0,0 @@ -//! Require mode enumeration for file content type classification. - -use diesel_derive_enum::DbEnum; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, EnumString}; - -/// Classifies the content type of uploaded files. -/// -/// This enumeration corresponds to the `REQUIRE_MODE` PostgreSQL enum and is used -/// to categorize files based on their content type for appropriate processing. -#[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] -#[ExistingTypePath = "crate::schema::sql_types::RequireMode"] -pub enum RequireMode { - /// Unknown or unrecognized file type. - #[db_rename = "unknown"] - #[serde(rename = "unknown")] - #[default] - Unknown, - - /// Text documents (PDF, DOCX, TXT, etc.). - #[db_rename = "document"] - #[serde(rename = "document")] - Document, - - /// Image files (PNG, JPG, SVG, etc.). - #[db_rename = "image"] - #[serde(rename = "image")] - Image, - - /// Spreadsheet files (XLSX, CSV, etc.). - #[db_rename = "spreadsheet"] - #[serde(rename = "spreadsheet")] - Spreadsheet, - - /// Presentation files (PPTX, KEY, etc.). - #[db_rename = "presentation"] - #[serde(rename = "presentation")] - Presentation, - - /// Audio files (MP3, WAV, etc.). - #[db_rename = "audio"] - #[serde(rename = "audio")] - Audio, - - /// Video files (MP4, MOV, etc.). - #[db_rename = "video"] - #[serde(rename = "video")] - Video, - - /// Archive files (ZIP, TAR, etc.). - #[db_rename = "archive"] - #[serde(rename = "archive")] - Archive, - - /// Data files (JSON, XML, CSV, etc.). - #[db_rename = "data"] - #[serde(rename = "data")] - Data, -} - -impl RequireMode { - /// Returns whether this is a text-based content type. - #[inline] - pub fn is_text_based(self) -> bool { - matches!( - self, - RequireMode::Document | RequireMode::Spreadsheet | RequireMode::Data - ) - } - - /// Returns whether this is a visual content type. - #[inline] - pub fn is_visual(self) -> bool { - matches!( - self, - RequireMode::Image | RequireMode::Video | RequireMode::Presentation - ) - } - - /// Returns whether this is a media content type. - #[inline] - pub fn is_media(self) -> bool { - matches!( - self, - RequireMode::Image | RequireMode::Audio | RequireMode::Video - ) - } - - /// Returns whether this content type can be indexed for search. - #[inline] - pub fn is_indexable(self) -> bool { - matches!( - self, - RequireMode::Document - | RequireMode::Spreadsheet - | RequireMode::Presentation - | RequireMode::Data - ) - } - - /// Returns whether this content type requires extraction before processing. - #[inline] - pub fn requires_extraction(self) -> bool { - matches!(self, RequireMode::Archive) - } - - /// Returns whether this content type requires transcription. - #[inline] - pub fn requires_transcription(self) -> bool { - matches!(self, RequireMode::Audio | RequireMode::Video) - } - - /// Returns whether this content type requires OCR. - #[inline] - pub fn requires_ocr(self) -> bool { - matches!(self, RequireMode::Image) - } -} diff --git a/crates/nvisy-postgres/src/types/mod.rs b/crates/nvisy-postgres/src/types/mod.rs index 5d33d3d..bd40af2 100644 --- a/crates/nvisy-postgres/src/types/mod.rs +++ b/crates/nvisy-postgres/src/types/mod.rs @@ -15,19 +15,17 @@ pub use constants::{ }; pub use constraint::{ AccountActionTokenConstraints, AccountApiTokenConstraints, AccountConstraints, - AccountNotificationConstraints, ChatOperationConstraints, ChatSessionConstraints, - ChatToolCallConstraints, ConstraintCategory, ConstraintViolation, - DocumentAnnotationConstraints, DocumentChunkConstraints, DocumentCommentConstraints, - DocumentConstraints, DocumentFileConstraints, DocumentVersionConstraints, - WorkspaceActivitiesConstraints, WorkspaceConstraints, WorkspaceIntegrationConstraints, - WorkspaceIntegrationRunConstraints, WorkspaceInviteConstraints, WorkspaceMemberConstraints, - WorkspaceWebhookConstraints, + AccountNotificationConstraints, ConstraintCategory, ConstraintViolation, + FileAnnotationConstraints, FileChunkConstraints, FileConstraints, PipelineConstraints, + PipelineRunConstraints, WorkspaceActivitiesConstraints, WorkspaceConstraints, + WorkspaceIntegrationConstraints, WorkspaceIntegrationRunConstraints, + WorkspaceInviteConstraints, WorkspaceMemberConstraints, WorkspaceWebhookConstraints, }; pub use enums::{ - ActionTokenType, ActivityCategory, ActivityType, AnnotationType, ApiTokenType, - ChatSessionStatus, ChatToolStatus, ContentSegmentation, FileSource, IntegrationStatus, - IntegrationType, InviteStatus, NotificationEvent, ProcessingStatus, RequireMode, RunType, - WebhookEvent, WebhookStatus, WebhookType, WorkspaceRole, + ActionTokenType, ActivityCategory, ActivityType, AnnotationType, ApiTokenType, FileSource, + IntegrationStatus, IntegrationType, InviteStatus, NotificationEvent, PipelineRunStatus, + PipelineStatus, PipelineTriggerType, RunType, WebhookEvent, WebhookStatus, WebhookType, + WorkspaceRole, }; pub use filtering::{FileFilter, FileFormat, InviteFilter, MemberFilter}; pub use pagination::{Cursor, CursorPage, CursorPagination, OffsetPage, OffsetPagination}; diff --git a/crates/nvisy-rig/src/rag/indexer/indexed.rs b/crates/nvisy-rig/src/rag/indexer/indexed.rs index bcc1d0b..2245ea7 100644 --- a/crates/nvisy-rig/src/rag/indexer/indexed.rs +++ b/crates/nvisy-rig/src/rag/indexer/indexed.rs @@ -1,6 +1,6 @@ //! Indexed chunk result type. -use nvisy_postgres::model::DocumentChunk; +use nvisy_postgres::model::FileChunk; use uuid::Uuid; /// Result of indexing a single chunk. @@ -16,8 +16,8 @@ pub struct IndexedChunk { pub token_count: i32, } -impl From for IndexedChunk { - fn from(chunk: DocumentChunk) -> Self { +impl From for IndexedChunk { + fn from(chunk: FileChunk) -> Self { Self { id: chunk.id, chunk_index: chunk.chunk_index, diff --git a/crates/nvisy-rig/src/rag/indexer/mod.rs b/crates/nvisy-rig/src/rag/indexer/mod.rs index b15f964..f0458d9 100644 --- a/crates/nvisy-rig/src/rag/indexer/mod.rs +++ b/crates/nvisy-rig/src/rag/indexer/mod.rs @@ -4,8 +4,8 @@ mod indexed; -use nvisy_postgres::model::NewDocumentChunk; -use nvisy_postgres::query::DocumentChunkRepository; +use nvisy_postgres::model::NewFileChunk; +use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; use sha2::{Digest, Sha256}; @@ -99,7 +99,7 @@ impl Indexer { // Prepare new chunk records let model_name = self.provider.model_name(); - let new_chunks: Vec = chunks + let new_chunks: Vec = chunks .iter() .zip(embeddings.iter()) .enumerate() @@ -117,14 +117,14 @@ impl Indexer { "page": chunk.metadata.page, }); - NewDocumentChunk { + NewFileChunk { file_id: self.file_id, chunk_index: Some(idx as i32), content_sha256, content_size: Some(content_size), token_count: Some(estimate_tokens(&chunk.text) as i32), embedding: Vector::from(embedding_vec), - embedding_model: Some(model_name.to_owned()), + embedding_model: model_name.to_owned(), metadata: Some(metadata), } }) @@ -138,7 +138,7 @@ impl Indexer { .map_err(|e| Error::retrieval(format!("failed to get connection: {e}")))?; let created = conn - .create_document_chunks(new_chunks) + .create_file_chunks(new_chunks) .await .map_err(|e| Error::retrieval(format!("failed to create chunks: {e}")))?; @@ -154,7 +154,7 @@ impl Indexer { .map_err(|e| Error::retrieval(format!("failed to get connection: {e}")))?; let deleted = conn - .delete_document_file_chunks(self.file_id) + .delete_file_chunks(self.file_id) .await .map_err(|e| Error::retrieval(format!("failed to delete chunks: {e}")))?; diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs index 3c0e171..3d90b78 100644 --- a/crates/nvisy-rig/src/rag/searcher/mod.rs +++ b/crates/nvisy-rig/src/rag/searcher/mod.rs @@ -8,8 +8,8 @@ mod scope; use std::collections::HashMap; use nvisy_nats::object::{DocumentKey, DocumentStore, Files}; -use nvisy_postgres::model::ScoredDocumentChunk; -use nvisy_postgres::query::DocumentChunkRepository; +use nvisy_postgres::model::ScoredFileChunk; +use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; use tokio::io::AsyncReadExt; @@ -82,15 +82,15 @@ impl Searcher { let min_score = self.min_score.unwrap_or(0.0); - let scored_chunks: Vec = match &self.scope { + let scored_chunks: Vec = match &self.scope { SearchScope::Files(file_ids) => { conn.search_scored_chunks_in_files(query_vector, file_ids, min_score, limit as i64) .await } - SearchScope::Documents(doc_ids) => { - conn.search_scored_chunks_in_documents( + SearchScope::Workspace(workspace_id) => { + conn.search_scored_chunks_in_workspace( query_vector, - doc_ids, + *workspace_id, min_score, limit as i64, ) diff --git a/crates/nvisy-rig/src/rag/searcher/scope.rs b/crates/nvisy-rig/src/rag/searcher/scope.rs index 79fc131..b1bc8b0 100644 --- a/crates/nvisy-rig/src/rag/searcher/scope.rs +++ b/crates/nvisy-rig/src/rag/searcher/scope.rs @@ -4,14 +4,14 @@ use uuid::Uuid; /// Search scope for vector queries. /// -/// Restricts search to specific files or documents to prevent cross-user data access. +/// Restricts search to specific files or a workspace to prevent cross-user data access. #[derive(Debug, Clone)] pub enum SearchScope { /// Search within specific files. Files(Vec), - /// Search within specific documents (all files in those documents). - Documents(Vec), + /// Search within a workspace (all files in that workspace). + Workspace(Uuid), } impl SearchScope { @@ -25,13 +25,8 @@ impl SearchScope { Self::Files(file_ids) } - /// Creates a scope for a single document. - pub fn document(document_id: Uuid) -> Self { - Self::Documents(vec![document_id]) - } - - /// Creates a scope for multiple documents. - pub fn documents(document_ids: Vec) -> Self { - Self::Documents(document_ids) + /// Creates a scope for a workspace. + pub fn workspace(workspace_id: Uuid) -> Self { + Self::Workspace(workspace_id) } } diff --git a/crates/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml index 7e14a2f..21eb0a1 100644 --- a/crates/nvisy-runtime/Cargo.toml +++ b/crates/nvisy-runtime/Cargo.toml @@ -1,3 +1,5 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + [package] name = "nvisy-runtime" version = { workspace = true } @@ -5,38 +7,48 @@ rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } +readme = "./README.md" authors = { workspace = true } repository = { workspace = true } homepage = { workspace = true } documentation = { workspace = true } -description = "Document processing runtime for nvisy" -keywords = ["document", "pdf", "docx", "archive", "processing"] -categories = ["document-processing", "parsing"] +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] [features] +# Default feature set (none for minimal dependencies) default = [] -config = ["clap"] [dependencies] +# Internal crates +nvisy-core = { workspace = true } +nvisy-opendal = { workspace = true } + +# Runtime crates nvisy-rt-core = { workspace = true } -nvisy-rt-document = { workspace = true } nvisy-rt-engine = { workspace = true } -# CLI -clap = { workspace = true, features = ["derive", "env"], optional = true } +# Async runtime +tokio = { workspace = true, features = ["rt", "sync", "time"] } +futures = { workspace = true, features = [] } + +# Observability +tracing = { workspace = true, features = [] } # (De)serialization -serde = { workspace = true } -schemars = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true, features = [] } -# Derive macros -derive_more = { workspace = true, features = ["deref", "deref_mut"] } -thiserror = { workspace = true } +# Derive macros & utilities +thiserror = { workspace = true, features = [] } +derive_more = { workspace = true, features = ["debug", "display", "from", "into"] } +derive_builder = { workspace = true, features = [] } -# Async runtime -tokio = { workspace = true, features = ["fs"] } +# Data types +uuid = { workspace = true, features = ["v7", "serde"] } [dev-dependencies] -tokio = { workspace = true, features = ["macros", "rt-multi-thread", "fs"] } +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-runtime/README.md b/crates/nvisy-runtime/README.md new file mode 100644 index 0000000..5ec8383 --- /dev/null +++ b/crates/nvisy-runtime/README.md @@ -0,0 +1,69 @@ +# nvisy-runtime + +Workflow definitions and execution engine for Nvisy pipelines. + +This crate provides the core abstractions for defining and executing +data processing workflows as directed acyclic graphs (DAGs). + +## Architecture + +Workflows are represented as graphs with three types of nodes: + +- **Source nodes**: Read or produce data (entry points) +- **Transformer nodes**: Process or transform data (intermediate) +- **Sink nodes**: Write or consume data (exit points) + +## Example + +```rust +use nvisy_runtime::prelude::*; + +// Create a workflow graph +let mut graph = WorkflowGraph::new(); + +// Add nodes +let source = graph.add_node(SourceNode::new("s3_input", SourceKind::S3)); +let transform = graph.add_node(TransformerNode::new("extract_text", TransformerKind::ExtractText)); +let sink = graph.add_node(SinkNode::new("store_output", SinkKind::Database)); + +// Connect nodes +graph.connect(source, transform).unwrap(); +graph.connect(transform, sink).unwrap(); + +// Validate the workflow +graph.validate().unwrap(); +``` + +## Node Types + +### Source Nodes +- `S3` - Amazon S3 compatible storage +- `Gcs` - Google Cloud Storage +- `AzureBlob` - Azure Blob Storage +- `GoogleDrive` - Google Drive +- `Dropbox` - Dropbox cloud storage +- `OneDrive` - Microsoft OneDrive +- `HttpUpload` - Receive files from HTTP upload +- `ApiEndpoint` - Fetch from an external API + +### Transformer Nodes +- `ExtractText` - Extract text from documents +- `ChunkContent` - Split content into chunks +- `GenerateEmbeddings` - Generate vector embeddings +- `LlmTransform` - Transform using an LLM +- `ConvertFormat` - Convert file format +- `Validate` - Validate content against schema +- `Filter` - Filter data based on conditions +- `Merge` - Merge multiple inputs + +### Sink Nodes +- `S3` - Amazon S3 compatible storage +- `Gcs` - Google Cloud Storage +- `AzureBlob` - Azure Blob Storage +- `GoogleDrive` - Google Drive +- `Dropbox` - Dropbox cloud storage +- `OneDrive` - Microsoft OneDrive +- `Database` - Store in database +- `VectorStore` - Store vector embeddings +- `Webhook` - Send to webhook +- `ApiEndpoint` - Send to external API diff --git a/crates/nvisy-runtime/src/archive.rs b/crates/nvisy-runtime/src/archive.rs deleted file mode 100644 index 81056d6..0000000 --- a/crates/nvisy-runtime/src/archive.rs +++ /dev/null @@ -1,196 +0,0 @@ -//! Archive service for creating compressed archives. - -use derive_more::{Deref, DerefMut}; -use nvisy_rt_engine::{ArchiveRegistry, ArchiveType}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -/// Supported archive formats. -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - Hash, - Serialize, - Deserialize, - JsonSchema -)] -#[serde(rename_all = "lowercase")] -pub enum ArchiveFormat { - /// ZIP archive format. - Zip, - /// TAR archive format (gzip compressed). - Tar, -} - -impl ArchiveFormat { - /// Returns the file extension for this format. - #[must_use] - pub fn extension(&self) -> &'static str { - match self { - Self::Zip => "zip", - Self::Tar => "tar.gz", - } - } - - /// Returns the MIME type for this format. - #[must_use] - pub fn mime_type(&self) -> &'static str { - match self { - Self::Zip => "application/zip", - Self::Tar => "application/x-tar", - } - } - - /// Converts to the underlying [`ArchiveType`]. - #[must_use] - pub fn to_archive_type(self) -> ArchiveType { - match self { - Self::Zip => ArchiveType::Zip, - Self::Tar => ArchiveType::TarGz, - } - } -} - -/// Error type for archive operations. -#[derive(Debug, thiserror::Error)] -pub enum ArchiveError { - /// Error from the archive library. - #[error("Archive error: {0}")] - Archive(#[from] nvisy_rt_engine::arc::Error), - - /// IO error during archive creation. - #[error("IO error: {0}")] - Io(#[from] std::io::Error), -} - -/// Result type for archive operations. -pub type ArchiveResult = Result; - -/// Service for creating compressed archives. -/// -/// This service derefs to the underlying [`ArchiveRegistry`]. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct ArchiveService { - #[deref] - #[deref_mut] - registry: ArchiveRegistry, -} - -impl ArchiveService { - /// Creates a new archive service with default settings. - /// - /// # Panics - /// - /// Panics if the temp directory cannot be created. - #[must_use] - pub fn new() -> Self { - Self { - registry: ArchiveRegistry::new(std::env::temp_dir().join("nvisy-archive")) - .expect("failed to create archive registry"), - } - } - - /// Creates an archive from a list of files. - /// - /// # Arguments - /// - /// * `files` - A list of (filename, content) tuples. - /// * `format` - The archive format to create. - /// - /// # Errors - /// - /// Returns an error if archive creation fails. - pub async fn create_archive( - &self, - files: Vec<(String, Vec)>, - format: ArchiveFormat, - ) -> ArchiveResult> { - let archive_type = format.to_archive_type(); - - // Create a handler for assembling files - let mut handler = self.registry.create_archive_dir(archive_type)?; - - // Write all files to the directory - for (filename, content) in files { - handler.write_file(&filename, &content).await?; - } - - // Pack into an archive and read the bytes - let archive_name = format!("archive.{}", format.extension()); - let archive_file = handler.pack(&archive_name).await?; - let archive_path = archive_file - .path() - .ok_or_else(|| ArchiveError::Io(std::io::Error::other("Archive has no path")))?; - let archive_bytes = tokio::fs::read(archive_path).await?; - - Ok(archive_bytes) - } -} - -impl Default for ArchiveService { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_archive_format_extension() { - assert_eq!(ArchiveFormat::Zip.extension(), "zip"); - assert_eq!(ArchiveFormat::Tar.extension(), "tar.gz"); - } - - #[test] - fn test_archive_format_mime_type() { - assert_eq!(ArchiveFormat::Zip.mime_type(), "application/zip"); - assert_eq!(ArchiveFormat::Tar.mime_type(), "application/x-tar"); - } - - #[test] - fn test_archive_format_to_archive_type() { - assert_eq!(ArchiveFormat::Zip.to_archive_type(), ArchiveType::Zip); - assert_eq!(ArchiveFormat::Tar.to_archive_type(), ArchiveType::TarGz); - } - - #[tokio::test] - async fn test_create_zip_archive() { - let service = ArchiveService::new(); - let files = vec![ - ("test1.txt".to_string(), b"Hello".to_vec()), - ("test2.txt".to_string(), b"World".to_vec()), - ]; - - let archive = service - .create_archive(files, ArchiveFormat::Zip) - .await - .unwrap(); - assert!(!archive.is_empty()); - - // Verify it's a valid ZIP (starts with PK signature) - assert_eq!(&archive[0..2], b"PK"); - } - - #[tokio::test] - async fn test_create_tar_archive() { - let service = ArchiveService::new(); - let files = vec![ - ("test1.txt".to_string(), b"Hello".to_vec()), - ("test2.txt".to_string(), b"World".to_vec()), - ]; - - let archive = service - .create_archive(files, ArchiveFormat::Tar) - .await - .unwrap(); - assert!(!archive.is_empty()); - - // Verify it's a valid gzip (starts with 0x1f 0x8b) - assert_eq!(&archive[0..2], &[0x1f, 0x8b]); - } -} diff --git a/crates/nvisy-runtime/src/engine/config.rs b/crates/nvisy-runtime/src/engine/config.rs new file mode 100644 index 0000000..9b648fa --- /dev/null +++ b/crates/nvisy-runtime/src/engine/config.rs @@ -0,0 +1,48 @@ +//! Engine configuration. + +use std::time::Duration; + +use derive_builder::Builder; + +/// Configuration for the workflow execution engine. +#[derive(Debug, Clone, Builder)] +#[builder(setter(into), build_fn(validate = "Self::validate"))] +pub struct EngineConfig { + /// Maximum number of concurrent workflow executions. + #[builder(default = "10")] + pub max_concurrent_runs: usize, + + /// Default timeout for workflow execution. + #[builder(default = "Duration::from_secs(3600)")] + pub default_timeout: Duration, + + /// Maximum number of retries for failed nodes. + #[builder(default = "3")] + pub max_retries: u32, + + /// Delay between retries. + #[builder(default = "Duration::from_secs(1)")] + pub retry_delay: Duration, +} + +impl EngineConfigBuilder { + fn validate(&self) -> Result<(), String> { + if let Some(max) = self.max_concurrent_runs { + if max == 0 { + return Err("max_concurrent_runs must be at least 1".into()); + } + } + Ok(()) + } +} + +impl Default for EngineConfig { + fn default() -> Self { + Self { + max_concurrent_runs: 10, + default_timeout: Duration::from_secs(3600), + max_retries: 3, + retry_delay: Duration::from_secs(1), + } + } +} diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs new file mode 100644 index 0000000..f2049c6 --- /dev/null +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -0,0 +1,112 @@ +//! Workflow execution engine. + +use std::sync::Arc; + +use tokio::sync::Semaphore; + +use crate::error::WorkflowResult; +use crate::graph::WorkflowGraph; + +use super::EngineConfig; + +/// Tracing target for engine operations. +const TRACING_TARGET: &str = "nvisy_workflow::engine"; + +/// The workflow execution engine. +/// +/// Manages workflow execution, concurrency, and resource allocation. +pub struct Engine { + config: EngineConfig, + semaphore: Arc, +} + +impl Engine { + /// Creates a new engine with the given configuration. + pub fn new(config: EngineConfig) -> Self { + let semaphore = Arc::new(Semaphore::new(config.max_concurrent_runs)); + + tracing::info!( + target: TRACING_TARGET, + max_concurrent_runs = config.max_concurrent_runs, + default_timeout_secs = config.default_timeout.as_secs(), + "Workflow engine initialized" + ); + + Self { config, semaphore } + } + + /// Creates a new engine with default configuration. + pub fn with_defaults() -> Self { + Self::new(EngineConfig::default()) + } + + /// Returns the engine configuration. + pub fn config(&self) -> &EngineConfig { + &self.config + } + + /// Validates a workflow graph. + pub fn validate(&self, workflow: &WorkflowGraph) -> WorkflowResult<()> { + workflow.validate() + } + + /// Executes a workflow graph. + /// + /// This will: + /// 1. Acquire a semaphore permit for concurrency control + /// 2. Validate the workflow + /// 3. Execute nodes in topological order + /// 4. Handle errors and retries + pub async fn execute(&self, workflow: &WorkflowGraph) -> WorkflowResult<()> { + let _permit = self.semaphore.acquire().await.map_err(|e| { + crate::error::WorkflowError::Internal(format!("semaphore closed: {}", e)) + })?; + + // Validate the workflow first + workflow.validate()?; + + // Get execution order + let order = workflow.topological_order()?; + + tracing::debug!( + target: TRACING_TARGET, + node_count = order.len(), + "Starting workflow execution" + ); + + // TODO: Execute each node in order + // For now, just log the execution plan + for (idx, node_id) in order.iter().enumerate() { + if let Some(node) = workflow.get_node(*node_id) { + tracing::trace!( + target: TRACING_TARGET, + step = idx + 1, + node_id = %node_id, + node_name = node.name(), + "Would execute node" + ); + } + } + + tracing::debug!( + target: TRACING_TARGET, + "Workflow execution completed (placeholder)" + ); + + Ok(()) + } + + /// Returns the number of available execution slots. + pub fn available_slots(&self) -> usize { + self.semaphore.available_permits() + } +} + +impl std::fmt::Debug for Engine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Engine") + .field("config", &self.config) + .field("available_slots", &self.available_slots()) + .finish() + } +} diff --git a/crates/nvisy-runtime/src/engine/mod.rs b/crates/nvisy-runtime/src/engine/mod.rs new file mode 100644 index 0000000..5d64245 --- /dev/null +++ b/crates/nvisy-runtime/src/engine/mod.rs @@ -0,0 +1,11 @@ +//! Workflow execution engine. +//! +//! This module provides the runtime for executing workflows: +//! - [`Engine`]: The main execution engine +//! - [`EngineConfig`]: Configuration options + +mod config; +mod executor; + +pub use config::EngineConfig; +pub use executor::Engine; diff --git a/crates/nvisy-runtime/src/error.rs b/crates/nvisy-runtime/src/error.rs new file mode 100644 index 0000000..5b6b0dc --- /dev/null +++ b/crates/nvisy-runtime/src/error.rs @@ -0,0 +1,54 @@ +//! Workflow error types. + +use thiserror::Error; + +use crate::node::NodeId; + +/// Result type for workflow operations. +pub type WorkflowResult = Result; + +/// Errors that can occur during workflow operations. +#[derive(Debug, Error)] +pub enum WorkflowError { + /// Workflow definition is invalid. + #[error("invalid workflow definition: {0}")] + InvalidDefinition(String), + + /// Node configuration is invalid. + #[error("invalid config for node {node_id}: {message}")] + InvalidNodeConfig { + /// ID of the node with invalid config. + node_id: NodeId, + /// Error message. + message: String, + }, + + /// Node execution failed. + #[error("node {node_id} failed: {message}")] + NodeFailed { + /// ID of the failed node. + node_id: NodeId, + /// Error message. + message: String, + }, + + /// Workflow execution was cancelled. + #[error("workflow execution cancelled")] + Cancelled, + + /// Workflow execution timed out. + #[error("workflow execution timed out")] + Timeout, + + /// Storage operation failed. + #[error("storage error: {0}")] + Storage(#[from] nvisy_opendal::StorageError), + + /// Serialization/deserialization error. + #[error("serialization error: {0}")] + Serialization(#[from] serde_json::Error), + + /// Internal error. + #[error("internal error: {0}")] + Internal(String), +} diff --git a/crates/nvisy-runtime/src/graph/edge.rs b/crates/nvisy-runtime/src/graph/edge.rs new file mode 100644 index 0000000..6c6b401 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/edge.rs @@ -0,0 +1,59 @@ +//! Edge types for connecting nodes in a workflow graph. + +use serde::{Deserialize, Serialize}; + +use crate::node::NodeId; + +/// An edge connecting two nodes in the workflow graph. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Edge { + /// Source node ID. + pub from: NodeId, + /// Target node ID. + pub to: NodeId, + /// Optional port/slot name on the source node. + #[serde(skip_serializing_if = "Option::is_none")] + pub from_port: Option, + /// Optional port/slot name on the target node. + #[serde(skip_serializing_if = "Option::is_none")] + pub to_port: Option, +} + +impl Edge { + /// Creates a new edge between two nodes. + pub fn new(from: NodeId, to: NodeId) -> Self { + Self { + from, + to, + from_port: None, + to_port: None, + } + } + + /// Creates an edge with port specifications. + pub fn with_ports( + from: NodeId, + from_port: impl Into, + to: NodeId, + to_port: impl Into, + ) -> Self { + Self { + from, + to, + from_port: Some(from_port.into()), + to_port: Some(to_port.into()), + } + } + + /// Sets the source port. + pub fn from_port(mut self, port: impl Into) -> Self { + self.from_port = Some(port.into()); + self + } + + /// Sets the target port. + pub fn to_port(mut self, port: impl Into) -> Self { + self.to_port = Some(port.into()); + self + } +} diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs new file mode 100644 index 0000000..be73ddb --- /dev/null +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -0,0 +1,11 @@ +//! Workflow graph structures. +//! +//! This module provides the graph representation for workflows: +//! - [`WorkflowGraph`]: The main graph structure containing nodes and edges +//! - [`Edge`]: Connections between nodes + +mod edge; +mod workflow; + +pub use edge::Edge; +pub use workflow::WorkflowGraph; diff --git a/crates/nvisy-runtime/src/graph/workflow.rs b/crates/nvisy-runtime/src/graph/workflow.rs new file mode 100644 index 0000000..e35059c --- /dev/null +++ b/crates/nvisy-runtime/src/graph/workflow.rs @@ -0,0 +1,286 @@ +//! Workflow graph definition. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use crate::error::{WorkflowError, WorkflowResult}; +use crate::node::{NodeData, NodeId}; + +use super::Edge; + +/// A workflow graph containing nodes and edges. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct WorkflowGraph { + /// Map of node IDs to their data. + nodes: HashMap, + /// Edges connecting nodes. + edges: Vec, + /// Workflow metadata. + #[serde(default)] + pub metadata: serde_json::Value, +} + +impl WorkflowGraph { + /// Creates a new empty workflow graph. + pub fn new() -> Self { + Self::default() + } + + /// Returns the number of nodes in the graph. + pub fn node_count(&self) -> usize { + self.nodes.len() + } + + /// Returns the number of edges in the graph. + pub fn edge_count(&self) -> usize { + self.edges.len() + } + + /// Returns whether the graph is empty. + pub fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + /// Adds a node to the graph and returns its ID. + pub fn add_node(&mut self, data: impl Into) -> NodeId { + let id = NodeId::new(); + self.nodes.insert(id, data.into()); + id + } + + /// Adds a node with a specific ID. + pub fn add_node_with_id(&mut self, id: NodeId, data: impl Into) { + self.nodes.insert(id, data.into()); + } + + /// Removes a node and all its connected edges. + pub fn remove_node(&mut self, id: NodeId) -> Option { + // Remove all edges connected to this node + self.edges.retain(|e| e.from != id && e.to != id); + self.nodes.remove(&id) + } + + /// Returns a reference to a node's data. + pub fn get_node(&self, id: NodeId) -> Option<&NodeData> { + self.nodes.get(&id) + } + + /// Returns a mutable reference to a node's data. + pub fn get_node_mut(&mut self, id: NodeId) -> Option<&mut NodeData> { + self.nodes.get_mut(&id) + } + + /// Returns whether a node exists. + pub fn contains_node(&self, id: NodeId) -> bool { + self.nodes.contains_key(&id) + } + + /// Returns an iterator over all nodes. + pub fn nodes(&self) -> impl Iterator { + self.nodes.iter().map(|(&id, data)| (id, data)) + } + + /// Returns an iterator over all node IDs. + pub fn node_ids(&self) -> impl Iterator + '_ { + self.nodes.keys().copied() + } + + /// Adds an edge between two nodes. + pub fn add_edge(&mut self, edge: Edge) -> WorkflowResult<()> { + // Validate that both nodes exist + if !self.nodes.contains_key(&edge.from) { + return Err(WorkflowError::InvalidDefinition(format!( + "source node {} does not exist", + edge.from + ))); + } + if !self.nodes.contains_key(&edge.to) { + return Err(WorkflowError::InvalidDefinition(format!( + "target node {} does not exist", + edge.to + ))); + } + + self.edges.push(edge); + Ok(()) + } + + /// Connects two nodes with a simple edge. + pub fn connect(&mut self, from: NodeId, to: NodeId) -> WorkflowResult<()> { + self.add_edge(Edge::new(from, to)) + } + + /// Returns an iterator over all edges. + pub fn edges(&self) -> impl Iterator { + self.edges.iter() + } + + /// Returns edges originating from a node. + pub fn outgoing_edges(&self, id: NodeId) -> impl Iterator { + self.edges.iter().filter(move |e| e.from == id) + } + + /// Returns edges targeting a node. + pub fn incoming_edges(&self, id: NodeId) -> impl Iterator { + self.edges.iter().filter(move |e| e.to == id) + } + + /// Returns all source nodes (nodes with no incoming edges). + pub fn source_nodes(&self) -> Vec { + self.nodes + .keys() + .copied() + .filter(|&id| { + self.nodes.get(&id).is_some_and(|data| data.is_source()) + || !self.edges.iter().any(|e| e.to == id) + }) + .collect() + } + + /// Returns all sink nodes (nodes with no outgoing edges). + pub fn sink_nodes(&self) -> Vec { + self.nodes + .keys() + .copied() + .filter(|&id| { + self.nodes.get(&id).is_some_and(|data| data.is_sink()) + || !self.edges.iter().any(|e| e.from == id) + }) + .collect() + } + + /// Validates the workflow graph. + pub fn validate(&self) -> WorkflowResult<()> { + // Must have at least one node + if self.nodes.is_empty() { + return Err(WorkflowError::InvalidDefinition( + "workflow must have at least one node".into(), + )); + } + + // Must have at least one source + let sources: Vec<_> = self + .nodes + .iter() + .filter(|(_, data)| data.is_source()) + .collect(); + if sources.is_empty() { + return Err(WorkflowError::InvalidDefinition( + "workflow must have at least one source node".into(), + )); + } + + // Must have at least one sink + let sinks: Vec<_> = self + .nodes + .iter() + .filter(|(_, data)| data.is_sink()) + .collect(); + if sinks.is_empty() { + return Err(WorkflowError::InvalidDefinition( + "workflow must have at least one sink node".into(), + )); + } + + // Check for cycles (simple DFS-based detection) + self.check_cycles()?; + + Ok(()) + } + + /// Checks for cycles in the graph using DFS. + fn check_cycles(&self) -> WorkflowResult<()> { + #[derive(Clone, Copy, PartialEq, Eq)] + enum State { + Unvisited, + Visiting, + Visited, + } + + let mut states: HashMap = self + .nodes + .keys() + .map(|&id| (id, State::Unvisited)) + .collect(); + + fn dfs( + graph: &WorkflowGraph, + node: NodeId, + states: &mut HashMap, + path: &mut Vec, + ) -> WorkflowResult<()> { + states.insert(node, State::Visiting); + path.push(node); + + for edge in graph.outgoing_edges(node) { + match states.get(&edge.to) { + Some(State::Visiting) => { + return Err(WorkflowError::InvalidDefinition(format!( + "cycle detected involving node {}", + edge.to + ))); + } + Some(State::Unvisited) => { + dfs(graph, edge.to, states, path)?; + } + _ => {} + } + } + + states.insert(node, State::Visited); + path.pop(); + Ok(()) + } + + for &node in self.nodes.keys() { + if states.get(&node) == Some(&State::Unvisited) { + let mut path = Vec::new(); + dfs(self, node, &mut states, &mut path)?; + } + } + + Ok(()) + } + + /// Returns nodes in topological order. + pub fn topological_order(&self) -> WorkflowResult> { + use std::collections::VecDeque; + + let mut in_degree: HashMap = self.nodes.keys().map(|&id| (id, 0)).collect(); + + // Calculate in-degrees + for edge in &self.edges { + *in_degree.get_mut(&edge.to).unwrap() += 1; + } + + // Start with nodes that have no incoming edges + let mut queue: VecDeque = in_degree + .iter() + .filter(|(_, deg)| **deg == 0) + .map(|(&id, _)| id) + .collect(); + + let mut result = Vec::with_capacity(self.nodes.len()); + + while let Some(node) = queue.pop_front() { + result.push(node); + + for edge in self.outgoing_edges(node) { + let deg = in_degree.get_mut(&edge.to).unwrap(); + *deg -= 1; + if *deg == 0 { + queue.push_back(edge.to); + } + } + } + + if result.len() != self.nodes.len() { + return Err(WorkflowError::InvalidDefinition( + "cycle detected in workflow graph".into(), + )); + } + + Ok(result) + } +} diff --git a/crates/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs index 01e8c9a..ac80181 100644 --- a/crates/nvisy-runtime/src/lib.rs +++ b/crates/nvisy-runtime/src/lib.rs @@ -1,19 +1,17 @@ -//! Document processing runtime for nvisy. -//! -//! This crate provides a service wrapper around the nvisy runtime engine, -//! integrating document processing capabilities with the server infrastructure. +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] -mod archive; -mod service; +pub mod engine; +mod error; +pub mod graph; +pub mod node; +pub mod runtime; -pub use nvisy_rt_core as rt_core; -pub use nvisy_rt_engine as rt_engine; +#[doc(hidden)] +pub mod prelude; -pub use archive::{ArchiveError, ArchiveFormat, ArchiveResult, ArchiveService}; -pub use service::{RuntimeConfig, RuntimeService}; +pub use error::{WorkflowError, WorkflowResult}; -// Re-export commonly used types from the engine -pub use nvisy_rt_engine::{ - BoundingBox, Capabilities, Document, DocumentFormat, Engine, EngineConfig, FormatRegistry, - LoadedDocument, Point, Region, RegionId, RegionKind, doc, -}; +/// Tracing target for runtime operations. +pub const TRACING_TARGET: &str = "nvisy_runtime"; diff --git a/crates/nvisy-runtime/src/node/data.rs b/crates/nvisy-runtime/src/node/data.rs new file mode 100644 index 0000000..6afa2e5 --- /dev/null +++ b/crates/nvisy-runtime/src/node/data.rs @@ -0,0 +1,229 @@ +//! Node data types representing different processing operations. + +use serde::{Deserialize, Serialize}; + +/// Data associated with a workflow node. +/// +/// Nodes are categorized by their role in data flow: +/// - **Source**: Reads/produces data (entry points) +/// - **Transformer**: Processes/transforms data (intermediate) +/// - **Sink**: Writes/consumes data (exit points) +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum NodeData { + /// Data source node - reads or produces data. + Source(SourceNode), + /// Data transformer node - processes or transforms data. + Transformer(TransformerNode), + /// Data sink node - writes or consumes data. + Sink(SinkNode), +} + +impl NodeData { + /// Returns the node's display name. + pub fn name(&self) -> &str { + match self { + NodeData::Source(n) => &n.name, + NodeData::Transformer(n) => &n.name, + NodeData::Sink(n) => &n.name, + } + } + + /// Returns whether this is a source node. + pub const fn is_source(&self) -> bool { + matches!(self, NodeData::Source(_)) + } + + /// Returns whether this is a transformer node. + pub const fn is_transformer(&self) -> bool { + matches!(self, NodeData::Transformer(_)) + } + + /// Returns whether this is a sink node. + pub const fn is_sink(&self) -> bool { + matches!(self, NodeData::Sink(_)) + } +} + +/// A data source node that reads or produces data. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SourceNode { + /// Display name of the source. + pub name: String, + /// Type of source. + pub kind: SourceKind, + /// Source-specific configuration. + #[serde(default)] + pub config: serde_json::Value, +} + +/// Types of data sources. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SourceKind { + /// Amazon S3 compatible storage. + S3, + /// Google Cloud Storage. + Gcs, + /// Azure Blob Storage. + AzureBlob, + /// Google Drive. + GoogleDrive, + /// Dropbox. + Dropbox, + /// OneDrive. + OneDrive, + /// Receive files from HTTP upload. + HttpUpload, + /// Fetch from an external API. + ApiEndpoint, + /// Custom source type. + Custom(String), +} + +/// A data transformer node that processes or transforms data. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct TransformerNode { + /// Display name of the transformer. + pub name: String, + /// Type of transformation. + pub kind: TransformerKind, + /// Transformer-specific configuration. + #[serde(default)] + pub config: serde_json::Value, +} + +/// Types of data transformations. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TransformerKind { + /// Extract text from documents (PDF, images via OCR). + ExtractText, + /// Split content into chunks. + ChunkContent, + /// Generate vector embeddings. + GenerateEmbeddings, + /// Transform using an LLM. + LlmTransform, + /// Convert file format. + ConvertFormat, + /// Validate content against schema. + Validate, + /// Filter data based on conditions. + Filter, + /// Merge multiple inputs. + Merge, + /// Custom transformation. + Custom(String), +} + +/// A data sink node that writes or consumes data. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SinkNode { + /// Display name of the sink. + pub name: String, + /// Type of sink. + pub kind: SinkKind, + /// Sink-specific configuration. + #[serde(default)] + pub config: serde_json::Value, +} + +/// Types of data sinks. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SinkKind { + /// Amazon S3 compatible storage. + S3, + /// Google Cloud Storage. + Gcs, + /// Azure Blob Storage. + AzureBlob, + /// Google Drive. + GoogleDrive, + /// Dropbox. + Dropbox, + /// OneDrive. + OneDrive, + /// Store in database. + Database, + /// Store vector embeddings. + VectorStore, + /// Send to webhook. + Webhook, + /// Send to external API. + ApiEndpoint, + /// Custom sink type. + Custom(String), +} + +impl SourceNode { + /// Creates a new source node. + pub fn new(name: impl Into, kind: SourceKind) -> Self { + Self { + name: name.into(), + kind, + config: serde_json::Value::Object(Default::default()), + } + } + + /// Sets the configuration. + pub fn with_config(mut self, config: serde_json::Value) -> Self { + self.config = config; + self + } +} + +impl TransformerNode { + /// Creates a new transformer node. + pub fn new(name: impl Into, kind: TransformerKind) -> Self { + Self { + name: name.into(), + kind, + config: serde_json::Value::Object(Default::default()), + } + } + + /// Sets the configuration. + pub fn with_config(mut self, config: serde_json::Value) -> Self { + self.config = config; + self + } +} + +impl SinkNode { + /// Creates a new sink node. + pub fn new(name: impl Into, kind: SinkKind) -> Self { + Self { + name: name.into(), + kind, + config: serde_json::Value::Object(Default::default()), + } + } + + /// Sets the configuration. + pub fn with_config(mut self, config: serde_json::Value) -> Self { + self.config = config; + self + } +} + +// Conversions to NodeData + +impl From for NodeData { + fn from(node: SourceNode) -> Self { + NodeData::Source(node) + } +} + +impl From for NodeData { + fn from(node: TransformerNode) -> Self { + NodeData::Transformer(node) + } +} + +impl From for NodeData { + fn from(node: SinkNode) -> Self { + NodeData::Sink(node) + } +} diff --git a/crates/nvisy-runtime/src/node/id.rs b/crates/nvisy-runtime/src/node/id.rs new file mode 100644 index 0000000..9f6e9c3 --- /dev/null +++ b/crates/nvisy-runtime/src/node/id.rs @@ -0,0 +1,61 @@ +//! Node identifier type. + +use std::str::FromStr; + +use derive_more::{Debug, Display, From, Into}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Unique identifier for a node in a workflow graph. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Debug, Display, From, Into)] +#[debug("{_0}")] +#[display("{_0}")] +#[serde(transparent)] +pub struct NodeId(Uuid); + +impl NodeId { + /// Creates a new random node ID. + #[inline] + pub fn new() -> Self { + Self(Uuid::now_v7()) + } + + /// Creates a node ID from an existing UUID. + #[inline] + pub const fn from_uuid(uuid: Uuid) -> Self { + Self(uuid) + } + + /// Returns the underlying UUID. + #[inline] + pub const fn as_uuid(&self) -> Uuid { + self.0 + } + + /// Returns the UUID as bytes. + #[inline] + pub const fn as_bytes(&self) -> &[u8; 16] { + self.0.as_bytes() + } +} + +impl Default for NodeId { + fn default() -> Self { + Self::new() + } +} + +impl FromStr for NodeId { + type Err = uuid::Error; + + fn from_str(s: &str) -> Result { + Ok(Self(Uuid::from_str(s)?)) + } +} + +impl AsRef for NodeId { + fn as_ref(&self) -> &Uuid { + &self.0 + } +} diff --git a/crates/nvisy-runtime/src/node/mod.rs b/crates/nvisy-runtime/src/node/mod.rs new file mode 100644 index 0000000..7ea61d1 --- /dev/null +++ b/crates/nvisy-runtime/src/node/mod.rs @@ -0,0 +1,13 @@ +//! Node types for workflow graphs. +//! +//! This module provides the core node abstractions: +//! - [`NodeId`]: Unique identifier for nodes +//! - [`NodeData`]: Data associated with each node (Source, Transformer, Sink) + +mod data; +mod id; + +pub use data::{ + NodeData, SinkKind, SinkNode, SourceKind, SourceNode, TransformerKind, TransformerNode, +}; +pub use id::NodeId; diff --git a/crates/nvisy-runtime/src/prelude.rs b/crates/nvisy-runtime/src/prelude.rs new file mode 100644 index 0000000..0e390fb --- /dev/null +++ b/crates/nvisy-runtime/src/prelude.rs @@ -0,0 +1,14 @@ +//! Prelude module for convenient imports. +//! +//! This module re-exports commonly used types for ergonomic imports: +//! +//! ```rust +//! use nvisy_workflow::prelude::*; +//! ``` + +pub use crate::engine::{Engine, EngineConfig}; +pub use crate::error::{WorkflowError, WorkflowResult}; +pub use crate::graph::{Edge, WorkflowGraph}; +pub use crate::node::{ + NodeData, NodeId, SinkKind, SinkNode, SourceKind, SourceNode, TransformerKind, TransformerNode, +}; diff --git a/crates/nvisy-runtime/src/runtime/config.rs b/crates/nvisy-runtime/src/runtime/config.rs new file mode 100644 index 0000000..b21312b --- /dev/null +++ b/crates/nvisy-runtime/src/runtime/config.rs @@ -0,0 +1,79 @@ +//! Runtime configuration. + +use serde::{Deserialize, Serialize}; + +/// Default maximum file size: 100 MB. +const DEFAULT_MAX_FILE_SIZE: u64 = 100 * 1024 * 1024; + +/// Configuration for the runtime service with sensible defaults. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RuntimeConfig { + /// Maximum file size in bytes (optional). + pub max_file_size: Option, +} + +impl RuntimeConfig { + /// Creates a new runtime configuration with defaults. + #[must_use] + pub fn new() -> Self { + Self { + max_file_size: None, + } + } + + /// Returns the maximum file size, using the default if not set. + #[inline] + #[must_use] + pub fn max_file_size(&self) -> u64 { + self.max_file_size.unwrap_or(DEFAULT_MAX_FILE_SIZE) + } + + /// Set the maximum file size in bytes. + #[must_use] + pub fn with_max_file_size(mut self, size: u64) -> Self { + self.max_file_size = Some(size); + self + } + + /// Validate the configuration and return any issues. + pub fn validate(&self) -> Result<(), String> { + if self.max_file_size == Some(0) { + return Err("Maximum file size cannot be zero".to_string()); + } + Ok(()) + } +} + +impl Default for RuntimeConfig { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_config() { + let config = RuntimeConfig::new(); + assert_eq!(config.max_file_size(), DEFAULT_MAX_FILE_SIZE); + assert!(config.validate().is_ok()); + } + + #[test] + fn test_config_builder() { + let config = RuntimeConfig::new().with_max_file_size(50 * 1024 * 1024); + assert_eq!(config.max_file_size(), 50 * 1024 * 1024); + assert!(config.validate().is_ok()); + } + + #[test] + fn test_config_validation() { + let valid_config = RuntimeConfig::new(); + assert!(valid_config.validate().is_ok()); + + let zero_size = RuntimeConfig::new().with_max_file_size(0); + assert!(zero_size.validate().is_err()); + } +} diff --git a/crates/nvisy-runtime/src/runtime/mod.rs b/crates/nvisy-runtime/src/runtime/mod.rs new file mode 100644 index 0000000..53ddfc4 --- /dev/null +++ b/crates/nvisy-runtime/src/runtime/mod.rs @@ -0,0 +1,16 @@ +//! Runtime services for document processing. + +mod config; +mod service; + +pub use config::RuntimeConfig; +pub use service::RuntimeService; + +// Re-export commonly used types from the runtime crates +pub use nvisy_rt_core as rt_core; +pub use nvisy_rt_engine as rt_engine; + +pub use nvisy_rt_engine::{ + BoundingBox, Capabilities, Document, DocumentFormat, Engine, EngineConfig, FormatRegistry, + LoadedDocument, Point, Region, RegionId, RegionKind, doc, +}; diff --git a/crates/nvisy-runtime/src/runtime/service.rs b/crates/nvisy-runtime/src/runtime/service.rs new file mode 100644 index 0000000..a208924 --- /dev/null +++ b/crates/nvisy-runtime/src/runtime/service.rs @@ -0,0 +1,79 @@ +//! Runtime service for document processing. + +use derive_more::{Deref, DerefMut}; +use nvisy_rt_engine::{Engine, EngineConfig}; + +use super::RuntimeConfig; + +/// Runtime service for document processing. +/// +/// Wraps the nvisy runtime engine and provides document loading +/// and processing capabilities for workflows. +/// +/// This service derefs to the underlying [`Engine`], allowing direct +/// access to all engine methods. +#[derive(Debug, Clone, Deref, DerefMut)] +pub struct RuntimeService { + #[deref] + #[deref_mut] + engine: Engine, +} + +impl RuntimeService { + /// Creates a new runtime service with default configuration. + #[must_use] + pub fn new() -> Self { + Self { + engine: Engine::new(), + } + } + + /// Creates a new runtime service with custom configuration. + #[must_use] + pub fn with_config(config: &RuntimeConfig) -> Self { + let engine_config = EngineConfig { + max_file_size: Some(config.max_file_size()), + ..Default::default() + }; + Self { + engine: Engine::with_config(engine_config), + } + } + + /// Returns a reference to the underlying engine. + #[must_use] + pub fn engine(&self) -> &Engine { + &self.engine + } + + /// Returns a mutable reference to the underlying engine. + #[must_use] + pub fn engine_mut(&mut self) -> &mut Engine { + &mut self.engine + } +} + +impl Default for RuntimeService { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_service_deref() { + let service = RuntimeService::new(); + // Test that we can call Engine methods directly via Deref + let _extensions = service.supported_extensions(); + let _mimes = service.supported_mime_types(); + } + + #[test] + fn test_service_with_config() { + let config = RuntimeConfig::new().with_max_file_size(10 * 1024 * 1024); + let _service = RuntimeService::with_config(&config); + } +} diff --git a/crates/nvisy-runtime/src/service.rs b/crates/nvisy-runtime/src/service.rs deleted file mode 100644 index 9862dd0..0000000 --- a/crates/nvisy-runtime/src/service.rs +++ /dev/null @@ -1,157 +0,0 @@ -//! Runtime service for document processing. - -use derive_more::{Deref, DerefMut}; -use nvisy_rt_engine::{Engine, EngineConfig}; -use serde::{Deserialize, Serialize}; - -#[cfg(feature = "config")] -use clap::Args; - -/// Configuration for the runtime service with sensible defaults. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "config", derive(Args))] -pub struct RuntimeConfig { - /// Maximum file size in bytes (optional). - #[cfg_attr( - feature = "config", - arg(long = "runtime-max-file-size", env = "RUNTIME_MAX_FILE_SIZE") - )] - pub runtime_max_file_size: Option, -} - -// Default values -const DEFAULT_MAX_FILE_SIZE: u64 = 100 * 1024 * 1024; // 100 MB - -impl RuntimeConfig { - /// Creates a new runtime configuration with defaults. - #[must_use] - pub fn new() -> Self { - Self { - runtime_max_file_size: None, - } - } - - /// Returns the maximum file size, using the default if not set. - #[inline] - #[must_use] - pub fn max_file_size(&self) -> u64 { - self.runtime_max_file_size.unwrap_or(DEFAULT_MAX_FILE_SIZE) - } - - /// Set the maximum file size in bytes. - #[must_use] - pub fn with_max_file_size(mut self, size: u64) -> Self { - self.runtime_max_file_size = Some(size); - self - } - - /// Validate the configuration and return any issues. - pub fn validate(&self) -> Result<(), String> { - if self.runtime_max_file_size == Some(0) { - return Err("Maximum file size cannot be zero".to_string()); - } - Ok(()) - } -} - -impl Default for RuntimeConfig { - fn default() -> Self { - Self::new() - } -} - -/// Runtime service for document processing. -/// -/// Wraps the nvisy runtime engine and provides document loading -/// and processing capabilities for the server. -/// -/// This service derefs to the underlying [`Engine`], allowing direct -/// access to all engine methods. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct RuntimeService { - #[deref] - #[deref_mut] - engine: Engine, -} - -impl RuntimeService { - /// Creates a new runtime service with default configuration. - #[must_use] - pub fn new() -> Self { - Self { - engine: Engine::new(), - } - } - - /// Creates a new runtime service with custom configuration. - #[must_use] - pub fn with_config(config: &RuntimeConfig) -> Self { - let engine_config = EngineConfig { - max_file_size: Some(config.max_file_size()), - ..Default::default() - }; - Self { - engine: Engine::with_config(engine_config), - } - } - - /// Returns a reference to the underlying engine. - #[must_use] - pub fn engine(&self) -> &Engine { - &self.engine - } - - /// Returns a mutable reference to the underlying engine. - #[must_use] - pub fn engine_mut(&mut self) -> &mut Engine { - &mut self.engine - } -} - -impl Default for RuntimeService { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_new_config() { - let config = RuntimeConfig::new(); - assert_eq!(config.max_file_size(), DEFAULT_MAX_FILE_SIZE); - assert!(config.validate().is_ok()); - } - - #[test] - fn test_config_builder() { - let config = RuntimeConfig::new().with_max_file_size(50 * 1024 * 1024); - assert_eq!(config.max_file_size(), 50 * 1024 * 1024); - assert!(config.validate().is_ok()); - } - - #[test] - fn test_config_validation() { - let valid_config = RuntimeConfig::new(); - assert!(valid_config.validate().is_ok()); - - let zero_size = RuntimeConfig::new().with_max_file_size(0); - assert!(zero_size.validate().is_err()); - } - - #[test] - fn test_service_deref() { - let service = RuntimeService::new(); - // Test that we can call Engine methods directly via Deref - let _extensions = service.supported_extensions(); - let _mimes = service.supported_mime_types(); - } - - #[test] - fn test_service_with_config() { - let config = RuntimeConfig::new().with_max_file_size(10 * 1024 * 1024); - let _service = RuntimeService::with_config(&config); - } -} diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index e94e498..dfc0410 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -36,7 +36,6 @@ config = [ nvisy-nats = { workspace = true, features = ["schema"] } nvisy-postgres = { workspace = true, features = ["schema"] } nvisy-rig = { workspace = true, features = [] } -nvisy-runtime = { workspace = true } nvisy-webhook = { workspace = true, features = ["schema"] } # Async runtime diff --git a/crates/nvisy-server/src/extract/auth/auth_provider.rs b/crates/nvisy-server/src/extract/auth/auth_provider.rs index 773d196..19e926d 100644 --- a/crates/nvisy-server/src/extract/auth/auth_provider.rs +++ b/crates/nvisy-server/src/extract/auth/auth_provider.rs @@ -5,7 +5,7 @@ //! The trait is designed to be implemented by types that represent authenticated users. use nvisy_postgres::model::WorkspaceMember; -use nvisy_postgres::query::{DocumentRepository, WorkspaceMemberRepository}; +use nvisy_postgres::query::{FileRepository, WorkspaceMemberRepository}; use nvisy_postgres::{PgConn, PgError}; use uuid::Uuid; @@ -120,15 +120,15 @@ pub trait AuthProvider { } } - /// Checks if a user has permission to access a document. + /// Checks if a user has permission to access a file. /// - /// This method resolves the document's workspace and checks workspace-level permissions. - /// Document owners have special privileges for write operations. + /// This method resolves the file's workspace and checks workspace-level permissions. + /// File owners have special privileges for write operations. /// /// # Arguments /// /// * `conn` - Database connection - /// * `document_id` - Document to check access for + /// * `file_id` - File to check access for /// * `permission` - Required permission level /// /// # Returns @@ -139,40 +139,40 @@ pub trait AuthProvider { /// /// Returns database errors if queries fail. #[allow(async_fn_in_trait)] - async fn check_document_permission( + async fn check_file_permission( &self, conn: &mut PgConn, - document_id: Uuid, + file_id: Uuid, permission: Permission, ) -> Result { - // Get the document to find its workspace - let document = conn.find_document_by_id(document_id).await?; + // Get the file to find its workspace + let file = conn.find_file_by_id(file_id).await?; - let Some(document) = document else { + let Some(file) = file else { tracing::warn!( target: TRACING_TARGET, account_id = %self.account_id(), - document_id = %document_id, - "access denied: document not found" + file_id = %file_id, + "access denied: file not found" ); - return Ok(AuthResult::denied("Document not found")); + return Ok(AuthResult::denied("File not found")); }; - // Document owners have special privileges for destructive operations - let is_document_owner = document.account_id == self.account_id(); + // File owners have special privileges for destructive operations + let is_file_owner = file.account_id == self.account_id(); let requires_ownership = matches!( permission, - Permission::UpdateDocuments | Permission::DeleteDocuments + Permission::UpdateFiles | Permission::DeleteFiles ); - if requires_ownership && !is_document_owner && !self.is_admin() { + if requires_ownership && !is_file_owner && !self.is_admin() { // Non-owners need explicit workspace-level permissions for destructive operations return self - .check_workspace_permission(conn, document.workspace_id, permission) + .check_workspace_permission(conn, file.workspace_id, permission) .await; } - self.check_workspace_permission(conn, document.workspace_id, permission) + self.check_workspace_permission(conn, file.workspace_id, permission) .await } @@ -274,17 +274,17 @@ pub trait AuthProvider { auth_result.into_result() } - /// Authorizes document access with ownership and workspace-level checks. + /// Authorizes file access with ownership and workspace-level checks. /// - /// This convenience method handles complex document authorization logic: - /// - Document owners have enhanced privileges for their own documents + /// This convenience method handles complex file authorization logic: + /// - File owners have enhanced privileges for their own files /// - All access requires at least workspace membership /// - Global administrators bypass all restrictions /// /// # Arguments /// - /// * `pg_client` - Database client for verification - /// * `document_id` - Target document identifier + /// * `conn` - Database connection for verification + /// * `file_id` - Target file identifier /// * `permission` - Required permission level /// /// # Returns @@ -295,14 +295,14 @@ pub trait AuthProvider { /// /// Returns `Forbidden` error if access is denied, or propagates database errors. #[allow(async_fn_in_trait)] - async fn authorize_document( + async fn authorize_file( &self, conn: &mut PgConn, - document_id: Uuid, + file_id: Uuid, permission: Permission, ) -> Result> { let auth_result = self - .check_document_permission(conn, document_id, permission) + .check_file_permission(conn, file_id, permission) .await?; auth_result.into_result() } diff --git a/crates/nvisy-server/src/extract/auth/permission.rs b/crates/nvisy-server/src/extract/auth/permission.rs index 5b98217..ec2c3b1 100644 --- a/crates/nvisy-server/src/extract/auth/permission.rs +++ b/crates/nvisy-server/src/extract/auth/permission.rs @@ -23,17 +23,7 @@ pub enum Permission { /// Can delete the entire workspace. DeleteWorkspace, - // Document permissions - /// Can view and read documents in the workspace. - ViewDocuments, - /// Can create new documents in the workspace. - CreateDocuments, - /// Can edit existing documents. - UpdateDocuments, - /// Can delete documents from the workspace. - DeleteDocuments, - - // File and asset permissions + // File permissions /// Can view and download files. ViewFiles, /// Can upload new files to the workspace. @@ -44,6 +34,20 @@ pub enum Permission { DownloadFiles, /// Can delete files from the workspace. DeleteFiles, + /// Can create and manage annotations on files. + AnnotateFiles, + + // Pipeline permissions + /// Can view pipelines in the workspace. + ViewPipelines, + /// Can create new pipelines. + CreatePipelines, + /// Can update existing pipelines. + UpdatePipelines, + /// Can delete pipelines. + DeletePipelines, + /// Can execute pipeline runs. + RunPipelines, // Member management permissions /// Can view workspace members and their roles. @@ -72,12 +76,6 @@ pub enum Permission { DeleteWebhooks, /// Can test webhooks by sending test payloads. TestWebhooks, - - // Workspace settings and configuration - /// Can view workspace settings. - ViewSettings, - /// Can modify workspace settings and configuration. - ManageSettings, } impl Permission { @@ -94,25 +92,26 @@ impl Permission { #[must_use] pub const fn minimum_required_role(self) -> WorkspaceRole { match self { - // Guest-level permissions + // Guest-level permissions (read-only access) Self::ViewWorkspace - | Self::ViewDocuments | Self::ViewFiles + | Self::ViewPipelines | Self::ViewMembers | Self::ViewIntegrations - | Self::ViewWebhooks - | Self::ViewSettings => WorkspaceRole::Guest, - - // Member-level permissions - Self::CreateDocuments - | Self::UpdateDocuments - | Self::DeleteDocuments - | Self::UploadFiles + | Self::ViewWebhooks => WorkspaceRole::Guest, + + // Member-level permissions (create and modify own resources) + Self::UploadFiles | Self::UpdateFiles | Self::DownloadFiles - | Self::DeleteFiles => WorkspaceRole::Member, - - // Admin-level permissions + | Self::DeleteFiles + | Self::AnnotateFiles + | Self::CreatePipelines + | Self::UpdatePipelines + | Self::DeletePipelines + | Self::RunPipelines => WorkspaceRole::Member, + + // Admin-level permissions (manage workspace resources) Self::UpdateWorkspace | Self::InviteMembers | Self::RemoveMembers @@ -120,8 +119,7 @@ impl Permission { | Self::CreateWebhooks | Self::UpdateWebhooks | Self::DeleteWebhooks - | Self::TestWebhooks - | Self::ManageSettings => WorkspaceRole::Admin, + | Self::TestWebhooks => WorkspaceRole::Admin, // Owner-only permissions (highest level) Self::DeleteWorkspace | Self::ManageRoles => WorkspaceRole::Owner, diff --git a/crates/nvisy-server/src/handler/annotations.rs b/crates/nvisy-server/src/handler/annotations.rs index 2eea75a..43543ad 100644 --- a/crates/nvisy-server/src/handler/annotations.rs +++ b/crates/nvisy-server/src/handler/annotations.rs @@ -7,7 +7,7 @@ use aide::transform::TransformOperation; use axum::extract::State; use axum::http::StatusCode; use nvisy_postgres::PgClient; -use nvisy_postgres::query::{DocumentAnnotationRepository, DocumentFileRepository}; +use nvisy_postgres::query::{FileAnnotationRepository, FileRepository}; use crate::extract::{AuthProvider, AuthState, Json, Path, Permission, Query, ValidateJson}; use crate::handler::request::{ @@ -24,8 +24,8 @@ const TRACING_TARGET: &str = "nvisy_server::handler::annotations"; async fn find_annotation( conn: &mut nvisy_postgres::PgConn, annotation_id: uuid::Uuid, -) -> Result { - conn.find_document_annotation_by_id(annotation_id) +) -> Result { + conn.find_file_annotation_by_id(annotation_id) .await? .ok_or_else(|| { ErrorKind::NotFound @@ -38,14 +38,12 @@ async fn find_annotation( async fn find_file( conn: &mut nvisy_postgres::PgConn, file_id: uuid::Uuid, -) -> Result { - conn.find_document_file_by_id(file_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("File not found") - .with_resource("file") - }) +) -> Result { + conn.find_file_by_id(file_id).await?.ok_or_else(|| { + ErrorKind::NotFound + .with_message("File not found") + .with_resource("file") + }) } /// Creates a new annotation on a file. @@ -68,11 +66,11 @@ async fn create_annotation( let file = find_file(&mut conn, path_params.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::CreateDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::AnnotateFiles) .await?; let new_annotation = request.into_model(path_params.file_id, auth_state.account_id); - let annotation = conn.create_document_annotation(new_annotation).await?; + let annotation = conn.create_file_annotation(new_annotation).await?; tracing::info!( target: TRACING_TARGET, @@ -116,11 +114,11 @@ async fn list_annotations( let file = find_file(&mut conn, path_params.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::ViewDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::ViewFiles) .await?; let page = conn - .cursor_list_file_document_annotations(path_params.file_id, pagination.into()) + .cursor_list_file_annotations(path_params.file_id, pagination.into()) .await?; let response = AnnotationsPage::from_cursor_page(page, Annotation::from_model); @@ -160,10 +158,10 @@ async fn get_annotation( let mut conn = pg_client.get_connection().await?; let annotation = find_annotation(&mut conn, path_params.annotation_id).await?; - let file = find_file(&mut conn, annotation.document_file_id).await?; + let file = find_file(&mut conn, annotation.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::ViewDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::ViewFiles) .await?; tracing::debug!(target: TRACING_TARGET, "Annotation retrieved"); @@ -204,14 +202,14 @@ async fn update_annotation( return Err(ErrorKind::Forbidden.with_message("You can only update your own annotations")); } - let file = find_file(&mut conn, annotation.document_file_id).await?; + let file = find_file(&mut conn, annotation.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::CreateDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::AnnotateFiles) .await?; let updated = conn - .update_document_annotation(path_params.annotation_id, request.into_model()) + .update_file_annotation(path_params.annotation_id, request.into_model()) .await?; tracing::info!(target: TRACING_TARGET, "Annotation updated"); @@ -252,13 +250,13 @@ async fn delete_annotation( return Err(ErrorKind::Forbidden.with_message("You can only delete your own annotations")); } - let file = find_file(&mut conn, annotation.document_file_id).await?; + let file = find_file(&mut conn, annotation.file_id).await?; auth_state - .authorize_workspace(&mut conn, file.workspace_id, Permission::CreateDocuments) + .authorize_workspace(&mut conn, file.workspace_id, Permission::AnnotateFiles) .await?; - conn.delete_document_annotation(path_params.annotation_id) + conn.delete_file_annotation(path_params.annotation_id) .await?; tracing::info!(target: TRACING_TARGET, "Annotation deleted"); diff --git a/crates/nvisy-server/src/handler/chat.rs b/crates/nvisy-server/src/handler/chat.rs deleted file mode 100644 index bf5ebe7..0000000 --- a/crates/nvisy-server/src/handler/chat.rs +++ /dev/null @@ -1,420 +0,0 @@ -//! Chat session handlers for LLM-assisted document editing. -//! -//! This module provides comprehensive chat session management functionality within workspaces, -//! including creation, reading, updating, and deletion of sessions. All operations -//! are secured with proper authorization and follow workspace-based access control. -//! -//! ## Streaming -//! -//! The `/chat/sessions/{sessionId}/messages` endpoint uses Server-Sent Events (SSE) to stream -//! LLM responses back to the client. Clients can cancel generation by closing the connection -//! (e.g., using `AbortController` in JavaScript). - -use std::convert::Infallible; - -use aide::axum::ApiRouter; -use aide::transform::TransformOperation; -use axum::extract::State; -use axum::http::StatusCode; -use axum::response::sse::{Event, KeepAlive, Sse}; -use futures::StreamExt; -use nvisy_postgres::PgClient; -use nvisy_postgres::query::ChatSessionRepository; -use nvisy_rig::RigService; -use tokio_stream::wrappers::ReceiverStream; - -use crate::extract::{AuthProvider, AuthState, Json, Path, Permission, Query, ValidateJson}; -use crate::handler::request::{ - ChatSessionPathParams, CreateChatSession, CursorPagination, SendChatMessage, UpdateChatSession, - WorkspacePathParams, -}; -use crate::handler::response::{ChatSession, ChatSessionsPage, ChatStreamEvent, ErrorResponse}; -use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; - -/// Tracing target for chat session operations. -const TRACING_TARGET: &str = "nvisy_server::handler::chat"; - -/// Creates a new chat session. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn create_chat_session( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Creating chat session"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_workspace( - &mut conn, - path_params.workspace_id, - Permission::CreateDocuments, - ) - .await?; - - let new_session = request.into_model(path_params.workspace_id, auth_state.account_id); - let session = conn.create_chat_session(new_session).await?; - - tracing::info!( - target: TRACING_TARGET, - session_id = %session.id, - "Chat session created", - ); - - Ok((StatusCode::CREATED, Json(ChatSession::from_model(session)))) -} - -fn create_chat_session_docs(op: TransformOperation) -> TransformOperation { - op.summary("Create chat session") - .description("Creates a new LLM-assisted editing session for a document file.") - .response::<201, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() -} - -/// Returns all chat sessions for a workspace. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn get_all_chat_sessions( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - Query(pagination): Query, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Listing chat sessions"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_workspace( - &mut conn, - path_params.workspace_id, - Permission::ViewDocuments, - ) - .await?; - - let page = conn - .cursor_list_chat_sessions(path_params.workspace_id, pagination.into()) - .await?; - - let response = ChatSessionsPage::from_cursor_page(page, ChatSession::from_model); - - tracing::debug!( - target: TRACING_TARGET, - session_count = response.items.len(), - "Chat sessions listed", - ); - - Ok((StatusCode::OK, Json(response))) -} - -fn get_all_chat_sessions_docs(op: TransformOperation) -> TransformOperation { - op.summary("List chat sessions") - .description("Lists all chat sessions in a workspace with pagination.") - .response::<200, Json>() - .response::<401, Json>() - .response::<403, Json>() -} - -/// Gets a chat session by its session ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - session_id = %path_params.session_id, - ) -)] -async fn get_chat_session( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Reading chat session"); - - let mut conn = pg_client.get_connection().await?; - - let session = find_chat_session(&mut conn, path_params.session_id).await?; - - auth_state - .authorize_workspace(&mut conn, session.workspace_id, Permission::ViewDocuments) - .await?; - - tracing::info!(target: TRACING_TARGET, "Chat session read"); - - Ok((StatusCode::OK, Json(ChatSession::from_model(session)))) -} - -fn get_chat_session_docs(op: TransformOperation) -> TransformOperation { - op.summary("Get chat session") - .description("Returns chat session details by ID.") - .response::<200, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Updates a chat session by its session ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - session_id = %path_params.session_id, - ) -)] -async fn update_chat_session( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Updating chat session"); - - let mut conn = pg_client.get_connection().await?; - - let existing = find_chat_session(&mut conn, path_params.session_id).await?; - - auth_state - .authorize_workspace( - &mut conn, - existing.workspace_id, - Permission::UpdateDocuments, - ) - .await?; - - let update_data = request.into_model(); - let session = conn - .update_chat_session(path_params.session_id, update_data) - .await?; - - tracing::info!(target: TRACING_TARGET, "Chat session updated"); - - Ok((StatusCode::OK, Json(ChatSession::from_model(session)))) -} - -fn update_chat_session_docs(op: TransformOperation) -> TransformOperation { - op.summary("Update chat session") - .description("Updates chat session metadata and configuration.") - .response::<200, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Deletes (archives) a chat session by its session ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - session_id = %path_params.session_id, - ) -)] -async fn delete_chat_session( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, -) -> Result { - tracing::debug!(target: TRACING_TARGET, "Deleting chat session"); - - let mut conn = pg_client.get_connection().await?; - - let session = find_chat_session(&mut conn, path_params.session_id).await?; - - auth_state - .authorize_workspace(&mut conn, session.workspace_id, Permission::DeleteDocuments) - .await?; - - conn.delete_chat_session(path_params.session_id).await?; - - tracing::info!(target: TRACING_TARGET, "Chat session deleted"); - - Ok(StatusCode::OK) -} - -fn delete_chat_session_docs(op: TransformOperation) -> TransformOperation { - op.summary("Delete chat session") - .description("Archives the chat session (soft delete).") - .response_with::<200, (), _>(|res| res.description("Chat session deleted.")) - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Sends a message to a chat session and streams the response via SSE. -/// -/// The response is streamed as Server-Sent Events with different event types: -/// - `thinking`: Agent is processing/planning -/// - `text_delta`: Incremental text from the LLM -/// - `tool_call`: Agent is calling a tool -/// - `tool_result`: Tool execution completed -/// - `proposed_edit`: Agent proposes a document edit -/// - `edit_applied`: Edit was auto-applied -/// - `done`: Response completed with final summary -/// - `error`: An error occurred -/// -/// Clients can cancel generation by closing the connection (AbortController). -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - session_id = %path_params.session_id, - ) -)] -async fn send_message( - State(pg_client): State, - State(rig_service): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result { - tracing::debug!(target: TRACING_TARGET, "Sending chat message"); - - let mut conn = pg_client.get_connection().await?; - - // Verify session exists and user has access - let session = find_chat_session(&mut conn, path_params.session_id).await?; - - auth_state - .authorize_workspace(&mut conn, session.workspace_id, Permission::UpdateDocuments) - .await?; - - // Create SSE stream - let (tx, rx) = tokio::sync::mpsc::channel::>(32); - - // Get the chat stream from rig service - let chat_stream = rig_service - .chat() - .chat(path_params.session_id, &request.content) - .await - .map_err(|e| { - tracing::error!(target: TRACING_TARGET, error = %e, "Failed to create chat stream"); - ErrorKind::InternalServerError - .with_message("Failed to start chat") - .with_context(e.to_string()) - })?; - - // Spawn task to process the chat stream and send SSE events - let session_id = path_params.session_id; - tokio::spawn(async move { - let mut stream = std::pin::pin!(chat_stream); - - while let Some(result) = stream.next().await { - let event = match result { - Ok(chat_event) => { - let stream_event = ChatStreamEvent::new(chat_event); - let event_type = stream_event.event_type(); - - match serde_json::to_string(&stream_event) { - Ok(json) => Event::default().event(event_type).data(json), - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - session_id = %session_id, - error = %e, - "Failed to serialize chat event" - ); - continue; - } - } - } - Err(e) => { - tracing::error!( - target: TRACING_TARGET, - session_id = %session_id, - error = %e, - "Chat stream error" - ); - // Send error event and break - let error_event = ChatStreamEvent::new(nvisy_rig::chat::ChatEvent::Error { - message: e.to_string(), - }); - if let Ok(json) = serde_json::to_string(&error_event) { - let _ = tx - .send(Ok(Event::default().event("error").data(json))) - .await; - } - break; - } - }; - - // Send the event; if send fails, client disconnected (cancelled) - if tx.send(Ok(event)).await.is_err() { - tracing::info!( - target: TRACING_TARGET, - session_id = %session_id, - "Client disconnected, cancelling chat stream" - ); - break; - } - } - - tracing::debug!( - target: TRACING_TARGET, - session_id = %session_id, - "Chat stream completed" - ); - }); - - tracing::info!( - target: TRACING_TARGET, - session_id = %path_params.session_id, - "Chat message stream started" - ); - - Ok(Sse::new(ReceiverStream::new(rx)).keep_alive(KeepAlive::default())) -} - -/// Finds a chat session by ID or returns NotFound error. -async fn find_chat_session( - conn: &mut nvisy_postgres::PgConn, - session_id: uuid::Uuid, -) -> Result { - conn.find_chat_session_by_id(session_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("Chat session not found.") - .with_resource("chat_session") - }) -} - -/// Returns a [`Router`] with all related routes. -/// -/// [`Router`]: axum::routing::Router -pub fn routes() -> ApiRouter { - use aide::axum::routing::*; - - ApiRouter::new() - .api_route( - "/workspaces/{workspaceId}/chat/sessions", - post_with(create_chat_session, create_chat_session_docs) - .get_with(get_all_chat_sessions, get_all_chat_sessions_docs), - ) - .api_route( - "/chat/sessions/{sessionId}", - get_with(get_chat_session, get_chat_session_docs) - .patch_with(update_chat_session, update_chat_session_docs) - .delete_with(delete_chat_session, delete_chat_session_docs), - ) - // SSE endpoint - uses regular axum routing as aide doesn't support SSE in OpenAPI - .route( - "/chat/sessions/{sessionId}/messages", - axum::routing::post(send_message), - ) - .with_path_items(|item| item.tag("Chat")) -} diff --git a/crates/nvisy-server/src/handler/comments.rs b/crates/nvisy-server/src/handler/comments.rs deleted file mode 100644 index eea3b9d..0000000 --- a/crates/nvisy-server/src/handler/comments.rs +++ /dev/null @@ -1,258 +0,0 @@ -//! File comment management handlers for CRUD operations. -//! -//! This module provides comment management functionality for files. -//! Supports threaded conversations and @mentions. - -use aide::axum::ApiRouter; -use aide::transform::TransformOperation; -use axum::extract::State; -use axum::http::StatusCode; -use nvisy_postgres::PgClient; -use nvisy_postgres::query::{DocumentCommentRepository, DocumentFileRepository}; - -use crate::extract::{AuthState, Json, Path, Query, ValidateJson}; -use crate::handler::request::{ - CommentPathParams, CreateComment, CursorPagination, FilePathParams, UpdateComment, -}; -use crate::handler::response::{Comment, CommentsPage, ErrorResponse}; -use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; - -/// Tracing target for file comment operations. -const TRACING_TARGET: &str = "nvisy_server::handler::comments"; - -/// Creates a new comment on a file. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - file_id = %path_params.file_id, - ) -)] -async fn post_comment( - State(pg_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Creating comment"); - - let mut conn = pg_client.get_connection().await?; - - // Verify file exists - let _ = find_file(&mut conn, path_params.file_id).await?; - - // Validate parent comment if provided - if let Some(parent_id) = request.parent_comment_id { - let parent_comment = find_comment(&mut conn, parent_id).await?; - - // Verify parent comment is on the same file - if parent_comment.file_id != path_params.file_id { - return Err(ErrorKind::BadRequest - .with_message("Parent comment must belong to the same file.") - .with_resource("comment")); - } - } - - let comment = conn - .create_document_comment(request.into_model(auth_claims.account_id, path_params.file_id)) - .await?; - - tracing::info!( - target: TRACING_TARGET, - comment_id = %comment.id, - "Comment created", - ); - - Ok((StatusCode::CREATED, Json(Comment::from_model(comment)))) -} - -fn post_comment_docs(op: TransformOperation) -> TransformOperation { - op.summary("Create comment") - .description("Creates a new comment on a file.") - .response::<201, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<404, Json>() -} - -/// Returns all comments for a file. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - file_id = %path_params.file_id, - ) -)] -async fn list_comments( - State(pg_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, - Query(pagination): Query, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Listing comments"); - - let mut conn = pg_client.get_connection().await?; - - // Verify file exists - let _ = find_file(&mut conn, path_params.file_id).await?; - - let page = conn - .cursor_list_file_document_comments(path_params.file_id, pagination.into()) - .await?; - - let response = CommentsPage::from_cursor_page(page, Comment::from_model); - - tracing::debug!( - target: TRACING_TARGET, - comment_count = response.items.len(), - "Comments listed", - ); - - Ok((StatusCode::OK, Json(response))) -} - -fn list_comments_docs(op: TransformOperation) -> TransformOperation { - op.summary("List comments") - .description("Returns all comments for a file.") - .response::<200, Json>() - .response::<401, Json>() - .response::<404, Json>() -} - -/// Updates a comment by ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - comment_id = %path_params.comment_id, - ) -)] -async fn update_comment( - State(pg_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Updating comment"); - - let mut conn = pg_client.get_connection().await?; - - // Fetch comment and verify ownership - let existing_comment = find_comment(&mut conn, path_params.comment_id).await?; - - // Check ownership - if existing_comment.account_id != auth_claims.account_id { - return Err(ErrorKind::Forbidden - .with_message("You can only update your own comments.") - .with_resource("comment")); - } - - let comment = conn - .update_document_comment(path_params.comment_id, request.into_model()) - .await?; - - tracing::info!(target: TRACING_TARGET, "Comment updated"); - - Ok((StatusCode::OK, Json(Comment::from_model(comment)))) -} - -fn update_comment_docs(op: TransformOperation) -> TransformOperation { - op.summary("Update comment") - .description("Updates a comment by ID.") - .response::<200, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Deletes a comment by ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - comment_id = %path_params.comment_id, - ) -)] -async fn delete_comment( - State(pg_client): State, - AuthState(auth_claims): AuthState, - Path(path_params): Path, -) -> Result { - tracing::debug!(target: TRACING_TARGET, "Deleting comment"); - - let mut conn = pg_client.get_connection().await?; - - // Fetch comment and verify ownership - let existing_comment = find_comment(&mut conn, path_params.comment_id).await?; - - // Check ownership - if existing_comment.account_id != auth_claims.account_id { - return Err(ErrorKind::Forbidden - .with_message("You can only delete your own comments.") - .with_resource("comment")); - } - - conn.delete_document_comment(path_params.comment_id).await?; - - tracing::info!(target: TRACING_TARGET, "Comment deleted"); - - Ok(StatusCode::NO_CONTENT) -} - -fn delete_comment_docs(op: TransformOperation) -> TransformOperation { - op.summary("Delete comment") - .description("Deletes a comment by ID.") - .response_with::<204, (), _>(|res| res.description("Comment deleted.")) - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Finds a file by ID or returns NotFound error. -async fn find_file( - conn: &mut nvisy_postgres::PgConn, - file_id: uuid::Uuid, -) -> Result { - conn.find_document_file_by_id(file_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("File not found.") - .with_resource("file") - }) -} - -/// Finds a comment by ID or returns NotFound error. -async fn find_comment( - conn: &mut nvisy_postgres::PgConn, - comment_id: uuid::Uuid, -) -> Result { - conn.find_document_comment_by_id(comment_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("Comment not found.") - .with_resource("comment") - }) -} - -/// Returns a [`Router`] with all comment-related routes. -/// -/// [`Router`]: axum::routing::Router -pub fn routes() -> ApiRouter { - use aide::axum::routing::*; - - ApiRouter::new() - .api_route( - "/files/{fileId}/comments", - post_with(post_comment, post_comment_docs).get_with(list_comments, list_comments_docs), - ) - .api_route( - "/comments/{commentId}", - patch_with(update_comment, update_comment_docs) - .delete_with(delete_comment, delete_comment_docs), - ) - .with_path_items(|item| item.tag("Comments")) -} diff --git a/crates/nvisy-server/src/handler/documents.rs b/crates/nvisy-server/src/handler/documents.rs deleted file mode 100644 index 5a68419..0000000 --- a/crates/nvisy-server/src/handler/documents.rs +++ /dev/null @@ -1,288 +0,0 @@ -//! Document management handlers for document CRUD operations. -//! -//! This module provides comprehensive document management functionality within workspaces, -//! including creation, reading, updating, and deletion of documents. All operations -//! are secured with proper authorization and follow workspace-based access control. - -use aide::axum::ApiRouter; -use aide::transform::TransformOperation; -use axum::extract::State; -use axum::http::StatusCode; -use nvisy_nats::NatsClient; -use nvisy_postgres::PgClient; -use nvisy_postgres::query::DocumentRepository; - -use crate::extract::{AuthProvider, AuthState, Json, Path, Permission, Query, ValidateJson}; -use crate::handler::request::{ - CreateDocument, CursorPagination, DocumentPathParams, UpdateDocument, WorkspacePathParams, -}; -use crate::handler::response::{Document, DocumentsPage, ErrorResponse}; -use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; - -/// Tracing target for document operations. -const TRACING_TARGET: &str = "nvisy_server::handler::documents"; - -/// Creates a new document. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn create_document( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Creating document"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_workspace( - &mut conn, - path_params.workspace_id, - Permission::CreateDocuments, - ) - .await?; - - let new_document = request.into_model(path_params.workspace_id, auth_state.account_id); - let document = conn.create_document(new_document).await?; - - tracing::info!( - target: TRACING_TARGET, - document_id = %document.id, - "Document created", - ); - - Ok((StatusCode::CREATED, Json(Document::from_model(document)))) -} - -fn create_document_docs(op: TransformOperation) -> TransformOperation { - op.summary("Create document") - .description("Creates a new document container for organizing files.") - .response::<201, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() -} - -/// Returns all documents for a workspace. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn get_all_documents( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - Query(pagination): Query, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Listing documents"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_workspace( - &mut conn, - path_params.workspace_id, - Permission::ViewDocuments, - ) - .await?; - - let page = conn - .cursor_list_workspace_documents(path_params.workspace_id, pagination.into()) - .await?; - - let response = DocumentsPage::from_cursor_page(page, Document::from_model); - - tracing::debug!( - target: TRACING_TARGET, - document_count = response.items.len(), - "Documents listed", - ); - - Ok((StatusCode::OK, Json(response))) -} - -fn get_all_documents_docs(op: TransformOperation) -> TransformOperation { - op.summary("List documents") - .description("Lists all documents in a workspace with pagination.") - .response::<200, Json>() - .response::<401, Json>() - .response::<403, Json>() -} - -/// Gets a document by its document ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - document_id = %path_params.document_id, - ) -)] -async fn get_document( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Reading document"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_document( - &mut conn, - path_params.document_id, - Permission::ViewDocuments, - ) - .await?; - - let document = find_document(&mut conn, path_params.document_id).await?; - - tracing::info!(target: TRACING_TARGET, "Document read"); - - Ok((StatusCode::OK, Json(Document::from_model(document)))) -} - -fn get_document_docs(op: TransformOperation) -> TransformOperation { - op.summary("Get document") - .description("Returns document details by ID.") - .response::<200, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Updates a document by its document ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - document_id = %path_params.document_id, - ) -)] -async fn update_document( - State(pg_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, - ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { - tracing::debug!(target: TRACING_TARGET, "Updating document"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_document( - &mut conn, - path_params.document_id, - Permission::UpdateDocuments, - ) - .await?; - - // Verify document exists - let _ = find_document(&mut conn, path_params.document_id).await?; - - let update_data = request.into_model(); - let document = conn - .update_document(path_params.document_id, update_data) - .await?; - - tracing::info!(target: TRACING_TARGET, "Document updated"); - - Ok((StatusCode::OK, Json(Document::from_model(document)))) -} - -fn update_document_docs(op: TransformOperation) -> TransformOperation { - op.summary("Update document") - .description("Updates document metadata.") - .response::<200, Json>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Deletes a document by its document ID. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_state.account_id, - document_id = %path_params.document_id, - ) -)] -async fn delete_document( - State(pg_client): State, - State(_nats_client): State, - AuthState(auth_state): AuthState, - Path(path_params): Path, -) -> Result { - tracing::debug!(target: TRACING_TARGET, "Deleting document"); - - let mut conn = pg_client.get_connection().await?; - - auth_state - .authorize_document( - &mut conn, - path_params.document_id, - Permission::DeleteDocuments, - ) - .await?; - - // Verify document exists - let _ = find_document(&mut conn, path_params.document_id).await?; - - conn.delete_document(path_params.document_id).await?; - - tracing::info!(target: TRACING_TARGET, "Document deleted"); - - Ok(StatusCode::OK) -} - -fn delete_document_docs(op: TransformOperation) -> TransformOperation { - op.summary("Delete document") - .description("Soft-deletes the document and associated files.") - .response_with::<200, (), _>(|res| res.description("Document deleted.")) - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Finds a document by ID or returns NotFound error. -async fn find_document( - conn: &mut nvisy_postgres::PgConn, - document_id: uuid::Uuid, -) -> Result { - conn.find_document_by_id(document_id).await?.ok_or_else(|| { - ErrorKind::NotFound - .with_message("Document not found.") - .with_resource("document") - }) -} - -/// Returns a [`Router`] with all related routes. -/// -/// [`Router`]: axum::routing::Router -pub fn routes() -> ApiRouter { - use aide::axum::routing::*; - - ApiRouter::new() - .api_route( - "/workspaces/{workspaceId}/documents", - post_with(create_document, create_document_docs) - .get_with(get_all_documents, get_all_documents_docs), - ) - .api_route( - "/documents/{documentId}", - get_with(get_document, get_document_docs) - .patch_with(update_document, update_document_docs) - .delete_with(delete_document, delete_document_docs), - ) - .with_path_items(|item| item.tag("Documents")) -} diff --git a/crates/nvisy-server/src/handler/error/archive_error.rs b/crates/nvisy-server/src/handler/error/archive_error.rs deleted file mode 100644 index cf2c346..0000000 --- a/crates/nvisy-server/src/handler/error/archive_error.rs +++ /dev/null @@ -1,31 +0,0 @@ -//! Archive error to HTTP error conversion implementation. -//! -//! This module provides conversion from nvisy-runtime archive errors to appropriate -//! HTTP errors with proper status codes and user-friendly messages. - -use nvisy_runtime::ArchiveError; - -use super::http_error::{Error as HttpError, ErrorKind}; - -/// Tracing target for archive error conversions. -const TRACING_TARGET: &str = "nvisy_server::handler::archive"; - -impl From for HttpError<'static> { - fn from(error: ArchiveError) -> Self { - tracing::error!( - target: TRACING_TARGET, - error = %error, - "Archive operation failed" - ); - - match error { - ArchiveError::Archive(e) => ErrorKind::InternalServerError - .with_message("Failed to create archive") - .with_context(e.to_string()), - - ArchiveError::Io(e) => ErrorKind::InternalServerError - .with_message("Archive I/O error") - .with_context(e.to_string()), - } - } -} diff --git a/crates/nvisy-server/src/handler/error/mod.rs b/crates/nvisy-server/src/handler/error/mod.rs index 7512732..8e75e77 100644 --- a/crates/nvisy-server/src/handler/error/mod.rs +++ b/crates/nvisy-server/src/handler/error/mod.rs @@ -1,12 +1,11 @@ //! [`Error`], [`ErrorKind`] and [`Result`]. -mod archive_error; mod http_error; mod nats_error; mod pg_account; -mod pg_chat; mod pg_document; mod pg_error; +mod pg_pipeline; mod pg_workspace; mod service_error; diff --git a/crates/nvisy-server/src/handler/error/pg_chat.rs b/crates/nvisy-server/src/handler/error/pg_chat.rs deleted file mode 100644 index 5550e72..0000000 --- a/crates/nvisy-server/src/handler/error/pg_chat.rs +++ /dev/null @@ -1,65 +0,0 @@ -//! Chat-related constraint violation error handlers. - -use nvisy_postgres::types::{ - ChatOperationConstraints, ChatSessionConstraints, ChatToolCallConstraints, -}; - -use crate::handler::{Error, ErrorKind}; - -impl From for Error<'static> { - fn from(c: ChatSessionConstraints) -> Self { - let error = match c { - ChatSessionConstraints::DisplayNameLength => ErrorKind::BadRequest - .with_message("Session name must be between 1 and 255 characters long"), - ChatSessionConstraints::ModelConfigSize => { - ErrorKind::BadRequest.with_message("Model configuration size is invalid") - } - ChatSessionConstraints::MessageCountMin => ErrorKind::InternalServerError.into_error(), - ChatSessionConstraints::TokenCountMin => ErrorKind::InternalServerError.into_error(), - ChatSessionConstraints::UpdatedAfterCreated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("chat_session") - } -} - -impl From for Error<'static> { - fn from(c: ChatToolCallConstraints) -> Self { - let error = match c { - ChatToolCallConstraints::ToolNameLength => ErrorKind::BadRequest - .with_message("Tool name must be between 1 and 128 characters long"), - ChatToolCallConstraints::ToolInputSize => { - ErrorKind::BadRequest.with_message("Tool input size exceeds maximum allowed") - } - ChatToolCallConstraints::ToolOutputSize => { - ErrorKind::BadRequest.with_message("Tool output size exceeds maximum allowed") - } - ChatToolCallConstraints::CompletedAfterStarted => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("chat_tool_call") - } -} - -impl From for Error<'static> { - fn from(c: ChatOperationConstraints) -> Self { - let error = match c { - ChatOperationConstraints::OperationTypeLength => ErrorKind::BadRequest - .with_message("Operation type must be between 1 and 64 characters long"), - ChatOperationConstraints::OperationDiffSize => { - ErrorKind::BadRequest.with_message("Operation diff size exceeds maximum allowed") - } - ChatOperationConstraints::RevertRequiresApplied => ErrorKind::BadRequest - .with_message("Cannot revert an operation that has not been applied"), - ChatOperationConstraints::AppliedAfterCreated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("chat_operation") - } -} diff --git a/crates/nvisy-server/src/handler/error/pg_document.rs b/crates/nvisy-server/src/handler/error/pg_document.rs index 9b7a925..9968d05 100644 --- a/crates/nvisy-server/src/handler/error/pg_document.rs +++ b/crates/nvisy-server/src/handler/error/pg_document.rs @@ -1,209 +1,95 @@ -//! Document-related constraint violation error handlers. +//! File-related constraint violation error handlers. -use nvisy_postgres::types::{ - DocumentAnnotationConstraints, DocumentChunkConstraints, DocumentCommentConstraints, - DocumentConstraints, DocumentFileConstraints, DocumentVersionConstraints, -}; +use nvisy_postgres::types::{FileAnnotationConstraints, FileChunkConstraints, FileConstraints}; use crate::handler::{Error, ErrorKind}; -impl From for Error<'static> { - fn from(c: DocumentConstraints) -> Self { +impl From for Error<'static> { + fn from(c: FileConstraints) -> Self { let error = match c { - DocumentConstraints::DisplayNameLength => ErrorKind::BadRequest - .with_message("Document name must be between 1 and 255 characters long"), - DocumentConstraints::DescriptionLengthMax => ErrorKind::BadRequest - .with_message("Document description cannot exceed 2048 characters"), - DocumentConstraints::TagsCountMax => { - ErrorKind::BadRequest.with_message("Cannot have more than 32 tags") - } - DocumentConstraints::MetadataSize => { - ErrorKind::BadRequest.with_message("Document metadata size is invalid") - } - DocumentConstraints::UpdatedAfterCreated - | DocumentConstraints::DeletedAfterCreated - | DocumentConstraints::DeletedAfterUpdated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("document") - } -} - -impl From for Error<'static> { - fn from(c: DocumentFileConstraints) -> Self { - let error = - match c { - DocumentFileConstraints::DisplayNameLength => ErrorKind::BadRequest - .with_message("File name must be between 1 and 255 characters long"), - DocumentFileConstraints::OriginalFilenameLength => ErrorKind::BadRequest - .with_message("Original filename must be between 1 and 255 characters long"), - DocumentFileConstraints::FileExtensionFormat => { - ErrorKind::BadRequest.with_message("Invalid file extension format") - } - DocumentFileConstraints::ProcessingPriorityRange => ErrorKind::BadRequest - .with_message("Processing priority must be between 1 and 10"), - DocumentFileConstraints::FileSizeMin => ErrorKind::BadRequest - .with_message("File size must be greater than or equal to 0"), - DocumentFileConstraints::StoragePathNotEmpty => { - ErrorKind::InternalServerError.into_error() - } - DocumentFileConstraints::StorageBucketNotEmpty => { - ErrorKind::InternalServerError.into_error() - } - DocumentFileConstraints::FileHashSha256Length => { - ErrorKind::InternalServerError.into_error() - } - DocumentFileConstraints::MetadataSize => { - ErrorKind::BadRequest.with_message("File metadata size is invalid") - } - DocumentFileConstraints::RetentionPeriod => ErrorKind::BadRequest - .with_message("File retention period must be between 1 hour and 5 years"), - DocumentFileConstraints::TagsCountMax => { - ErrorKind::BadRequest.with_message("Maximum number of tags exceeded") - } - DocumentFileConstraints::VersionNumberMin => { - ErrorKind::BadRequest.with_message("Version number must be at least 1") - } - DocumentFileConstraints::ParentSameDocument => ErrorKind::BadRequest - .with_message("Parent file must belong to the same document"), - DocumentFileConstraints::UpdatedAfterCreated - | DocumentFileConstraints::DeletedAfterCreated - | DocumentFileConstraints::DeletedAfterUpdated - | DocumentFileConstraints::AutoDeleteAfterCreated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("document_file") - } -} - -impl From for Error<'static> { - fn from(c: DocumentVersionConstraints) -> Self { - let error = match c { - DocumentVersionConstraints::VersionNumberMin => { - ErrorKind::BadRequest.with_message("Version number must be at least 1") - } - DocumentVersionConstraints::DisplayNameLength => ErrorKind::BadRequest - .with_message("Version name must be between 1 and 255 characters long"), - DocumentVersionConstraints::FileExtensionFormat => { + FileConstraints::DisplayNameLength => ErrorKind::BadRequest + .with_message("File name must be between 1 and 255 characters long"), + FileConstraints::OriginalFilenameLength => ErrorKind::BadRequest + .with_message("Original filename must be between 1 and 255 characters long"), + FileConstraints::FileExtensionFormat => { ErrorKind::BadRequest.with_message("Invalid file extension format") } - DocumentVersionConstraints::ProcessingCreditsMin => { - ErrorKind::InternalServerError.into_error() - } - DocumentVersionConstraints::ProcessingDurationMin => { - ErrorKind::InternalServerError.into_error() + FileConstraints::MimeTypeFormat => { + ErrorKind::BadRequest.with_message("Invalid MIME type format") } - DocumentVersionConstraints::ApiCallsMin => ErrorKind::InternalServerError.into_error(), - DocumentVersionConstraints::FileSizeMin => { + FileConstraints::FileSizeMin => { ErrorKind::BadRequest.with_message("File size must be greater than or equal to 0") } - DocumentVersionConstraints::StoragePathNotEmpty => { - ErrorKind::InternalServerError.into_error() - } - DocumentVersionConstraints::StorageBucketNotEmpty => { - ErrorKind::InternalServerError.into_error() - } - DocumentVersionConstraints::FileHashSha256Length => { - ErrorKind::InternalServerError.into_error() + FileConstraints::StoragePathNotEmpty => ErrorKind::InternalServerError.into_error(), + FileConstraints::StorageBucketNotEmpty => ErrorKind::InternalServerError.into_error(), + FileConstraints::FileHashSha256Length => ErrorKind::InternalServerError.into_error(), + FileConstraints::MetadataSize => { + ErrorKind::BadRequest.with_message("File metadata size is invalid") } - DocumentVersionConstraints::ResultsSize => { - ErrorKind::BadRequest.with_message("Processing results size is invalid") + FileConstraints::TagsCountMax => { + ErrorKind::BadRequest.with_message("Maximum number of tags exceeded") } - DocumentVersionConstraints::MetadataSize => { - ErrorKind::BadRequest.with_message("Version metadata size is invalid") - } - DocumentVersionConstraints::RetentionPeriod => ErrorKind::BadRequest - .with_message("Version retention period must be between 1 hour and 5 years"), - DocumentVersionConstraints::UpdatedAfterCreated - | DocumentVersionConstraints::DeletedAfterCreated - | DocumentVersionConstraints::DeletedAfterUpdated - | DocumentVersionConstraints::AutoDeleteAfterCreated => { - ErrorKind::InternalServerError.into_error() - } - }; - - error.with_resource("document_version") - } -} - -impl From for Error<'static> { - fn from(c: DocumentCommentConstraints) -> Self { - let error = match c { - DocumentCommentConstraints::ContentLength => ErrorKind::BadRequest - .with_message("Comment content must be between 1 and 10,000 characters"), - DocumentCommentConstraints::OneTarget => ErrorKind::BadRequest.with_message( - "Comment must be attached to exactly one target (document, file, or version)", - ), - DocumentCommentConstraints::MetadataSize => { - ErrorKind::BadRequest.with_message("Comment metadata size is invalid") - } - DocumentCommentConstraints::UpdatedAfterCreated - | DocumentCommentConstraints::DeletedAfterCreated - | DocumentCommentConstraints::DeletedAfterUpdated => { - ErrorKind::InternalServerError.into_error() + FileConstraints::VersionNumberMin => { + ErrorKind::BadRequest.with_message("Version number must be at least 1") } + FileConstraints::UpdatedAfterCreated + | FileConstraints::DeletedAfterCreated + | FileConstraints::DeletedAfterUpdated => ErrorKind::InternalServerError.into_error(), }; - error.with_resource("document_comment") + error.with_resource("file") } } -impl From for Error<'static> { - fn from(c: DocumentAnnotationConstraints) -> Self { +impl From for Error<'static> { + fn from(c: FileAnnotationConstraints) -> Self { let error = match c { - DocumentAnnotationConstraints::ContentLength => { + FileAnnotationConstraints::ContentLength => { ErrorKind::BadRequest.with_message("Annotation content length is invalid") } - DocumentAnnotationConstraints::TypeFormat => { - ErrorKind::BadRequest.with_message("Annotation type format is invalid") - } - DocumentAnnotationConstraints::MetadataSize => { + FileAnnotationConstraints::MetadataSize => { ErrorKind::BadRequest.with_message("Annotation metadata size is invalid") } - DocumentAnnotationConstraints::UpdatedAfterCreated - | DocumentAnnotationConstraints::DeletedAfterCreated - | DocumentAnnotationConstraints::DeletedAfterUpdated => { + FileAnnotationConstraints::UpdatedAfterCreated + | FileAnnotationConstraints::DeletedAfterCreated + | FileAnnotationConstraints::DeletedAfterUpdated => { ErrorKind::InternalServerError.into_error() } }; - error.with_resource("document_annotation") + error.with_resource("file_annotation") } } -impl From for Error<'static> { - fn from(c: DocumentChunkConstraints) -> Self { +impl From for Error<'static> { + fn from(c: FileChunkConstraints) -> Self { let error = match c { - DocumentChunkConstraints::ChunkIndexMin => { + FileChunkConstraints::ChunkIndexMin => { ErrorKind::BadRequest.with_message("Chunk index must be at least 0") } - DocumentChunkConstraints::ContentSha256Length => { + FileChunkConstraints::ContentSha256Length => { ErrorKind::InternalServerError.into_error() } - DocumentChunkConstraints::ContentSizeMin => { + FileChunkConstraints::ContentSizeMin => { ErrorKind::BadRequest.with_message("Chunk content size must be at least 0") } - DocumentChunkConstraints::TokenCountMin => { + FileChunkConstraints::TokenCountMin => { ErrorKind::BadRequest.with_message("Token count must be at least 0") } - DocumentChunkConstraints::EmbeddingModelFormat => { + FileChunkConstraints::EmbeddingModelFormat => { ErrorKind::BadRequest.with_message("Invalid embedding model format") } - DocumentChunkConstraints::MetadataSize => { + FileChunkConstraints::MetadataSize => { ErrorKind::BadRequest.with_message("Chunk metadata size is invalid") } - DocumentChunkConstraints::UpdatedAfterCreated => { + FileChunkConstraints::UpdatedAfterCreated => { ErrorKind::InternalServerError.into_error() } - DocumentChunkConstraints::FileChunkUnique => { + FileChunkConstraints::FileChunkUnique => { ErrorKind::Conflict.with_message("Chunk with this index already exists for file") } }; - error.with_resource("document_chunk") + error.with_resource("file_chunk") } } diff --git a/crates/nvisy-server/src/handler/error/pg_error.rs b/crates/nvisy-server/src/handler/error/pg_error.rs index 6b086c7..f8aff47 100644 --- a/crates/nvisy-server/src/handler/error/pg_error.rs +++ b/crates/nvisy-server/src/handler/error/pg_error.rs @@ -27,16 +27,12 @@ impl From for Error<'static> { ConstraintViolation::WorkspaceActivityLog(c) => c.into(), ConstraintViolation::WorkspaceIntegration(c) => c.into(), ConstraintViolation::WorkspaceIntegrationRun(c) => c.into(), - ConstraintViolation::Document(c) => c.into(), - ConstraintViolation::DocumentChunk(c) => c.into(), - ConstraintViolation::DocumentComment(c) => c.into(), - ConstraintViolation::DocumentAnnotation(c) => c.into(), - ConstraintViolation::DocumentFile(c) => c.into(), - ConstraintViolation::DocumentVersion(c) => c.into(), ConstraintViolation::WorkspaceWebhook(c) => c.into(), - ConstraintViolation::ChatSession(c) => c.into(), - ConstraintViolation::ChatToolCall(c) => c.into(), - ConstraintViolation::ChatOperation(c) => c.into(), + ConstraintViolation::File(c) => c.into(), + ConstraintViolation::FileAnnotation(c) => c.into(), + ConstraintViolation::FileChunk(c) => c.into(), + ConstraintViolation::Pipeline(c) => c.into(), + ConstraintViolation::PipelineRun(c) => c.into(), } } } diff --git a/crates/nvisy-server/src/handler/error/pg_pipeline.rs b/crates/nvisy-server/src/handler/error/pg_pipeline.rs new file mode 100644 index 0000000..b66c9a6 --- /dev/null +++ b/crates/nvisy-server/src/handler/error/pg_pipeline.rs @@ -0,0 +1,50 @@ +//! Pipeline-related constraint violation error handlers. + +use nvisy_postgres::types::{PipelineConstraints, PipelineRunConstraints}; + +use crate::handler::{Error, ErrorKind}; + +impl From for Error<'static> { + fn from(c: PipelineConstraints) -> Self { + let error = match c { + PipelineConstraints::NameLength => ErrorKind::BadRequest + .with_message("Pipeline name must be between 1 and 255 characters long"), + PipelineConstraints::DescriptionLength => ErrorKind::BadRequest + .with_message("Pipeline description must be at most 4096 characters long"), + PipelineConstraints::DefinitionSize => { + ErrorKind::BadRequest.with_message("Pipeline definition size exceeds maximum limit") + } + PipelineConstraints::MetadataSize => { + ErrorKind::BadRequest.with_message("Pipeline metadata size exceeds maximum limit") + } + PipelineConstraints::UpdatedAfterCreated | PipelineConstraints::DeletedAfterCreated => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("pipeline") + } +} + +impl From for Error<'static> { + fn from(c: PipelineRunConstraints) -> Self { + let error = match c { + PipelineRunConstraints::InputConfigSize => ErrorKind::BadRequest + .with_message("Pipeline run input configuration size exceeds maximum limit"), + PipelineRunConstraints::OutputConfigSize => ErrorKind::BadRequest + .with_message("Pipeline run output configuration size exceeds maximum limit"), + PipelineRunConstraints::DefinitionSnapshotSize => ErrorKind::BadRequest + .with_message("Pipeline run definition snapshot size exceeds maximum limit"), + PipelineRunConstraints::ErrorSize => ErrorKind::BadRequest + .with_message("Pipeline run error details size exceeds maximum limit"), + PipelineRunConstraints::MetricsSize => ErrorKind::BadRequest + .with_message("Pipeline run metrics size exceeds maximum limit"), + PipelineRunConstraints::StartedAfterCreated + | PipelineRunConstraints::CompletedAfterStarted => { + ErrorKind::InternalServerError.into_error() + } + }; + + error.with_resource("pipeline_run") + } +} diff --git a/crates/nvisy-server/src/handler/files.rs b/crates/nvisy-server/src/handler/files.rs index b56239e..916baf2 100644 --- a/crates/nvisy-server/src/handler/files.rs +++ b/crates/nvisy-server/src/handler/files.rs @@ -17,35 +17,31 @@ use nvisy_nats::NatsClient; use nvisy_nats::object::{DocumentKey, DocumentStore, Files as FilesBucket}; use nvisy_nats::stream::{DocumentJobPublisher, PreprocessingData}; use nvisy_postgres::PgClient; -use nvisy_postgres::model::{DocumentFile, NewDocumentFile}; -use nvisy_postgres::query::DocumentFileRepository; -use nvisy_postgres::types::ProcessingStatus; +use nvisy_postgres::model::{File as FileModel, NewFile}; +use nvisy_postgres::query::FileRepository; use uuid::Uuid; use crate::extract::{ AuthProvider, AuthState, Json, Multipart, Path, Permission, Query, ValidateJson, }; use crate::handler::request::{ - CursorPagination, DeleteFiles, DownloadFiles, FilePathParams, ListFiles, UpdateFile, - WorkspacePathParams, + CursorPagination, FilePathParams, ListFiles, UpdateFile, WorkspacePathParams, }; use crate::handler::response::{self, ErrorResponse, File, Files, FilesPage}; use crate::handler::{ErrorKind, Result}; use crate::middleware::DEFAULT_MAX_FILE_BODY_SIZE; -use crate::service::{ArchiveFormat, ArchiveService, ServiceState}; +use crate::service::ServiceState; /// Tracing target for workspace file operations. const TRACING_TARGET: &str = "nvisy_server::handler::workspace_files"; /// Finds a file by ID or returns NotFound error. -async fn find_file(conn: &mut nvisy_postgres::PgConn, file_id: Uuid) -> Result { - conn.find_document_file_by_id(file_id) - .await? - .ok_or_else(|| { - ErrorKind::NotFound - .with_message("File not found") - .with_resource("file") - }) +async fn find_file(conn: &mut nvisy_postgres::PgConn, file_id: Uuid) -> Result { + conn.find_file_by_id(file_id).await?.ok_or_else(|| { + ErrorKind::NotFound + .with_message("File not found") + .with_resource("file") + }) } /// Lists files in a workspace with cursor-based pagination. @@ -115,7 +111,7 @@ async fn process_single_file( conn: &mut nvisy_postgres::PgConn, ctx: &FileUploadContext, field: axum::extract::multipart::Field<'_>, -) -> Result { +) -> Result { let filename = field .file_name() .map(ToString::to_string) @@ -152,7 +148,7 @@ async fn process_single_file( ); // Step 2: Create DB record with all storage info (Postgres generates its own id) - let file_record = NewDocumentFile { + let file_record = NewFile { workspace_id: ctx.workspace_id, account_id: ctx.account_id, display_name: Some(filename.clone()), @@ -162,11 +158,10 @@ async fn process_single_file( file_hash_sha256: put_result.sha256().to_vec(), storage_path: document_key.to_string(), storage_bucket: ctx.document_store.bucket().to_owned(), - processing_status: Some(ProcessingStatus::Pending), ..Default::default() }; - let created_file = conn.create_document_file(file_record).await?; + let created_file = conn.create_file(file_record).await?; // Step 3: Publish job to queue (use Postgres-generated file ID) let job = nvisy_nats::stream::DocumentJob::new( @@ -331,7 +326,7 @@ async fn update_file( let updates = request.into_model(); let updated_file = conn - .update_document_file(path_params.file_id, updates) + .update_file(path_params.file_id, updates) .await .map_err(|err| { tracing::error!(target: TRACING_TARGET, error = %err, "Failed to update file"); @@ -491,14 +486,12 @@ async fn delete_file( .authorize_workspace(&mut conn, file.workspace_id, Permission::DeleteFiles) .await?; - conn.delete_document_file(path_params.file_id) - .await - .map_err(|err| { - tracing::error!(target: TRACING_TARGET, error = %err, "Failed to soft delete file"); - ErrorKind::InternalServerError - .with_message("Failed to delete file") - .with_context(format!("Database error: {}", err)) - })?; + conn.delete_file(path_params.file_id).await.map_err(|err| { + tracing::error!(target: TRACING_TARGET, error = %err, "Failed to soft delete file"); + ErrorKind::InternalServerError + .with_message("Failed to delete file") + .with_context(format!("Database error: {}", err)) + })?; tracing::info!(target: TRACING_TARGET, "File deleted"); Ok(StatusCode::NO_CONTENT) @@ -513,196 +506,6 @@ fn delete_file_docs(op: TransformOperation) -> TransformOperation { .response::<404, Json>() } -/// Deletes multiple files (soft delete). -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn delete_multiple_files( - State(pg_client): State, - Path(path_params): Path, - AuthState(auth_claims): AuthState, - ValidateJson(request): ValidateJson, -) -> Result { - tracing::info!(target: TRACING_TARGET, file_count = request.file_ids.len(), "Deleting multiple files"); - - let mut conn = pg_client.get_connection().await?; - - auth_claims - .authorize_workspace(&mut conn, path_params.workspace_id, Permission::DeleteFiles) - .await?; - - // Soft delete all files in a single query - let deleted_count = conn - .delete_document_files(path_params.workspace_id, &request.file_ids) - .await?; - - // Check if all requested files were deleted - if deleted_count != request.file_ids.len() { - tracing::warn!( - target: TRACING_TARGET, - requested = request.file_ids.len(), - deleted = deleted_count, - "Some files were not found or already deleted" - ); - return Err(ErrorKind::NotFound - .with_message("One or more files not found") - .with_resource("file")); - } - - tracing::info!(target: TRACING_TARGET, file_count = deleted_count, "Files deleted"); - - Ok(StatusCode::NO_CONTENT) -} - -fn delete_multiple_files_docs(op: TransformOperation) -> TransformOperation { - op.summary("Delete multiple files") - .description("Soft deletes multiple files by setting deleted timestamps. Files can be recovered within the retention period.") - .response::<204, ()>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - -/// Downloads all or specific workspace files as an archive. -#[tracing::instrument( - skip_all, - fields( - account_id = %auth_claims.account_id, - workspace_id = %path_params.workspace_id, - ) -)] -async fn download_archived_files( - State(pg_client): State, - State(nats_client): State, - State(archive): State, - Path(path_params): Path, - AuthState(auth_claims): AuthState, - Json(request): Json, -) -> Result<(StatusCode, HeaderMap, Vec)> { - tracing::debug!(target: TRACING_TARGET, "Downloading archived files"); - - let mut conn = pg_client.get_connection().await?; - - auth_claims - .authorize_workspace( - &mut conn, - path_params.workspace_id, - Permission::DownloadFiles, - ) - .await?; - - let document_store = nats_client - .document_store::() - .await - .map_err(|err| { - tracing::error!( - target: TRACING_TARGET, - error = %err, - "Failed to create document store" - ); - ErrorKind::InternalServerError.with_message("Failed to initialize file storage") - })?; - - // Determine which files to download - let files = if let Some(specific_ids) = request.file_ids { - // Batch fetch specific files - conn.find_document_files_by_ids(&specific_ids).await? - } else { - // Get all workspace files using the workspace-scoped query - conn.cursor_list_workspace_files( - path_params.workspace_id, - Default::default(), - Default::default(), - ) - .await? - .items - }; - - // Filter to only files belonging to this workspace and not deleted - let valid_files: Vec<_> = files - .into_iter() - .filter(|f| f.workspace_id == path_params.workspace_id && f.deleted_at.is_none()) - .collect(); - - if valid_files.is_empty() { - return Err(ErrorKind::NotFound.with_message("No files found for archive")); - } - - // Fetch all file contents - let mut files_data = Vec::new(); - - for file in &valid_files { - let document_key = DocumentKey::from_str(&file.storage_path).map_err(|err| { - ErrorKind::InternalServerError - .with_message("Invalid file storage path") - .with_context(format!("Parse error: {}", err)) - })?; - - if let Ok(Some(mut get_result)) = document_store.get(&document_key).await { - let mut buffer = Vec::with_capacity(get_result.size()); - if tokio::io::AsyncReadExt::read_to_end(get_result.reader(), &mut buffer) - .await - .is_ok() - { - files_data.push((file.display_name.clone(), buffer)); - } - } - } - - if files_data.is_empty() { - return Err(ErrorKind::NotFound.with_message("No files found for archive")); - } - - // Create archive - let archive_bytes = archive.create_archive(files_data, request.format).await?; - - // Determine content type and file extension based on format - let (content_type, extension) = match request.format { - ArchiveFormat::Tar => ("application/x-tar", "tar.gz"), - ArchiveFormat::Zip => ("application/zip", "zip"), - }; - - // Set up response headers - let mut headers = HeaderMap::new(); - headers.insert( - "content-disposition", - format!( - "attachment; filename=\"workspace_{}_archive.{}\"", - path_params.workspace_id, extension - ) - .parse() - .unwrap(), - ); - headers.insert("content-type", content_type.parse().unwrap()); - headers.insert( - "content-length", - archive_bytes.len().to_string().parse().unwrap(), - ); - - tracing::debug!( - target: TRACING_TARGET, - file_count = valid_files.len(), - "Workspace files downloaded as archive", - ); - - Ok((StatusCode::OK, headers, archive_bytes)) -} - -fn download_archived_files_docs(op: TransformOperation) -> TransformOperation { - op.summary("Download archived files") - .description("Downloads all or specific workspace files as a compressed archive. Supports zip and tar.gz formats.") - .response::<200, ()>() - .response::<400, Json>() - .response::<401, Json>() - .response::<403, Json>() - .response::<404, Json>() -} - /// Returns a [`Router`] with all related routes. /// /// [`Router`]: axum::routing::Router @@ -717,11 +520,6 @@ pub fn routes() -> ApiRouter { .layer(DefaultBodyLimit::max(DEFAULT_MAX_FILE_BODY_SIZE)) .get_with(list_files, list_files_docs), ) - .api_route( - "/workspaces/{workspaceId}/files/batch", - get_with(download_archived_files, download_archived_files_docs) - .delete_with(delete_multiple_files, delete_multiple_files_docs), - ) // File-specific routes (file ID is globally unique) .api_route( "/files/{fileId}", diff --git a/crates/nvisy-server/src/handler/mod.rs b/crates/nvisy-server/src/handler/mod.rs index 7ac13c1..c2f9460 100644 --- a/crates/nvisy-server/src/handler/mod.rs +++ b/crates/nvisy-server/src/handler/mod.rs @@ -6,15 +6,13 @@ mod accounts; mod annotations; mod authentication; -mod chat; -mod comments; -mod documents; mod error; mod files; mod integrations; mod invites; mod members; mod monitors; +mod pipelines; pub mod request; pub mod response; mod runs; @@ -52,10 +50,8 @@ fn private_routes( .merge(members::routes()) .merge(webhooks::routes()) .merge(files::routes()) - .merge(documents::routes()) - .merge(comments::routes()) .merge(annotations::routes()) - .merge(chat::routes()); + .merge(pipelines::routes()); if let Some(additional) = additional_routes { router = router.merge(additional); diff --git a/crates/nvisy-server/src/handler/pipelines.rs b/crates/nvisy-server/src/handler/pipelines.rs new file mode 100644 index 0000000..578d3c7 --- /dev/null +++ b/crates/nvisy-server/src/handler/pipelines.rs @@ -0,0 +1,316 @@ +//! Pipeline management handlers for CRUD operations. +//! +//! This module provides comprehensive pipeline management functionality including +//! creating, reading, updating, deleting pipelines, and listing pipelines within +//! a workspace. All operations are secured with role-based access control. + +use aide::axum::ApiRouter; +use aide::transform::TransformOperation; +use axum::extract::State; +use axum::http::StatusCode; +use nvisy_postgres::PgClient; +use nvisy_postgres::query::PipelineRepository; + +use crate::extract::{AuthProvider, AuthState, Json, Path, Permission, Query, ValidateJson}; +use crate::handler::request::{ + CreatePipeline, CursorPagination, PipelineFilter, PipelinePathParams, UpdatePipeline, + WorkspacePathParams, +}; +use crate::handler::response::{ErrorResponse, Page, Pipeline, PipelineSummary}; +use crate::handler::{ErrorKind, Result}; +use crate::service::ServiceState; + +/// Tracing target for pipeline operations. +const TRACING_TARGET: &str = "nvisy_server::handler::pipelines"; + +/// Creates a new pipeline within a workspace. +/// +/// The creator is automatically set as the owner of the pipeline. +/// Requires `UploadFiles` permission for the workspace. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + workspace_id = %path_params.workspace_id, + ) +)] +async fn create_pipeline( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + ValidateJson(request): ValidateJson, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Creating pipeline"); + + let mut conn = pg_client.get_connection().await?; + + auth_state + .authorize_workspace( + &mut conn, + path_params.workspace_id, + Permission::CreatePipelines, + ) + .await?; + + let new_pipeline = request.into_model(path_params.workspace_id, auth_state.account_id); + let pipeline = conn.create_pipeline(new_pipeline).await?; + + let response = Pipeline::from_model(pipeline); + + tracing::info!( + target: TRACING_TARGET, + pipeline_id = %response.pipeline_id, + "Pipeline created", + ); + + Ok((StatusCode::CREATED, Json(response))) +} + +fn create_pipeline_docs(op: TransformOperation) -> TransformOperation { + op.summary("Create pipeline") + .description("Creates a new pipeline in the workspace. The creator is set as the owner.") + .response::<201, Json>() + .response::<400, Json>() + .response::<401, Json>() + .response::<403, Json>() +} + +/// Lists all pipelines in a workspace with optional filtering. +/// +/// Supports filtering by status and searching by name. +/// Requires `ViewFiles` permission for the workspace. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + workspace_id = %path_params.workspace_id, + ) +)] +async fn list_pipelines( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + Query(pagination): Query, + Query(filter): Query, +) -> Result<(StatusCode, Json>)> { + tracing::debug!(target: TRACING_TARGET, "Listing pipelines"); + + let mut conn = pg_client.get_connection().await?; + + auth_state + .authorize_workspace( + &mut conn, + path_params.workspace_id, + Permission::ViewPipelines, + ) + .await?; + + let page = conn + .cursor_list_workspace_pipelines( + path_params.workspace_id, + pagination.into(), + filter.status, + filter.search.as_deref(), + ) + .await?; + + let response = Page::from_cursor_page(page, PipelineSummary::from_model); + + tracing::debug!( + target: TRACING_TARGET, + pipeline_count = response.items.len(), + "Pipelines listed", + ); + + Ok((StatusCode::OK, Json(response))) +} + +fn list_pipelines_docs(op: TransformOperation) -> TransformOperation { + op.summary("List pipelines") + .description("Returns all pipelines in the workspace with optional filtering by status and name search.") + .response::<200, Json>>() + .response::<401, Json>() + .response::<403, Json>() +} + +/// Retrieves a pipeline by ID. +/// +/// The workspace is derived from the pipeline record for authorization. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + pipeline_id = %path_params.pipeline_id, + ) +)] +async fn get_pipeline( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Getting pipeline"); + + let mut conn = pg_client.get_connection().await?; + + let Some(pipeline) = conn.find_pipeline_by_id(path_params.pipeline_id).await? else { + return Err(ErrorKind::NotFound + .with_message("Pipeline not found") + .with_resource("pipeline")); + }; + + auth_state + .authorize_workspace(&mut conn, pipeline.workspace_id, Permission::ViewPipelines) + .await?; + + let response = Pipeline::from_model(pipeline); + + tracing::info!(target: TRACING_TARGET, "Pipeline retrieved"); + + Ok((StatusCode::OK, Json(response))) +} + +fn get_pipeline_docs(op: TransformOperation) -> TransformOperation { + op.summary("Get pipeline") + .description("Returns a pipeline by its unique identifier.") + .response::<200, Json>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Updates an existing pipeline. +/// +/// Only the pipeline owner or users with `UpdateFiles` permission can update. +/// Only provided fields are updated. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + pipeline_id = %path_params.pipeline_id, + ) +)] +async fn update_pipeline( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, + ValidateJson(request): ValidateJson, +) -> Result<(StatusCode, Json)> { + tracing::debug!(target: TRACING_TARGET, "Updating pipeline"); + + let mut conn = pg_client.get_connection().await?; + + let Some(existing) = conn.find_pipeline_by_id(path_params.pipeline_id).await? else { + return Err(ErrorKind::NotFound + .with_message("Pipeline not found") + .with_resource("pipeline")); + }; + + auth_state + .authorize_workspace( + &mut conn, + existing.workspace_id, + Permission::UpdatePipelines, + ) + .await?; + + // Check if pipeline is editable + if !existing.is_editable() { + return Err(ErrorKind::BadRequest + .with_message("Pipeline cannot be edited in its current state") + .with_resource("pipeline")); + } + + let update_data = request.into_model(); + let pipeline = conn + .update_pipeline(path_params.pipeline_id, update_data) + .await?; + + let response = Pipeline::from_model(pipeline); + + tracing::info!(target: TRACING_TARGET, "Pipeline updated"); + + Ok((StatusCode::OK, Json(response))) +} + +fn update_pipeline_docs(op: TransformOperation) -> TransformOperation { + op.summary("Update pipeline") + .description("Updates an existing pipeline. Only provided fields are updated. Pipeline must be in an editable state.") + .response::<200, Json>() + .response::<400, Json>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Soft-deletes a pipeline. +/// +/// Requires `DeleteFiles` permission. The pipeline is marked as deleted +/// but data is retained for potential recovery. +#[tracing::instrument( + skip_all, + fields( + account_id = %auth_state.account_id, + pipeline_id = %path_params.pipeline_id, + ) +)] +async fn delete_pipeline( + State(pg_client): State, + AuthState(auth_state): AuthState, + Path(path_params): Path, +) -> Result { + tracing::debug!(target: TRACING_TARGET, "Deleting pipeline"); + + let mut conn = pg_client.get_connection().await?; + + let Some(pipeline) = conn.find_pipeline_by_id(path_params.pipeline_id).await? else { + return Err(ErrorKind::NotFound + .with_message("Pipeline not found") + .with_resource("pipeline")); + }; + + auth_state + .authorize_workspace( + &mut conn, + pipeline.workspace_id, + Permission::DeletePipelines, + ) + .await?; + + conn.delete_pipeline(path_params.pipeline_id).await?; + + tracing::info!(target: TRACING_TARGET, "Pipeline deleted"); + + Ok(StatusCode::OK) +} + +fn delete_pipeline_docs(op: TransformOperation) -> TransformOperation { + op.summary("Delete pipeline") + .description("Soft-deletes a pipeline. Data is retained for potential recovery.") + .response::<200, ()>() + .response::<401, Json>() + .response::<403, Json>() + .response::<404, Json>() +} + +/// Returns a [`Router`] with all pipeline-related routes. +/// +/// [`Router`]: axum::routing::Router +pub fn routes() -> ApiRouter { + use aide::axum::routing::*; + + ApiRouter::new() + // Workspace-scoped routes for listing and creating + .api_route( + "/workspaces/{workspaceId}/pipelines/", + post_with(create_pipeline, create_pipeline_docs) + .get_with(list_pipelines, list_pipelines_docs), + ) + // Pipeline operations by ID + .api_route( + "/pipelines/{pipelineId}/", + get_with(get_pipeline, get_pipeline_docs) + .patch_with(update_pipeline, update_pipeline_docs) + .delete_with(delete_pipeline, delete_pipeline_docs), + ) + .with_path_items(|item| item.tag("Pipelines")) +} diff --git a/crates/nvisy-server/src/handler/request/annotations.rs b/crates/nvisy-server/src/handler/request/annotations.rs index b0270a8..cc735eb 100644 --- a/crates/nvisy-server/src/handler/request/annotations.rs +++ b/crates/nvisy-server/src/handler/request/annotations.rs @@ -1,6 +1,6 @@ //! Annotation request types. -use nvisy_postgres::model::{NewDocumentAnnotation, UpdateDocumentAnnotation}; +use nvisy_postgres::model::{NewFileAnnotation, UpdateFileAnnotation}; use nvisy_postgres::types::AnnotationType; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -24,9 +24,9 @@ pub struct CreateAnnotation { impl CreateAnnotation { /// Converts to database model. - pub fn into_model(self, file_id: Uuid, account_id: Uuid) -> NewDocumentAnnotation { - NewDocumentAnnotation { - document_file_id: file_id, + pub fn into_model(self, file_id: Uuid, account_id: Uuid) -> NewFileAnnotation { + NewFileAnnotation { + file_id, account_id, content: self.content, annotation_type: Some(self.annotation_type), @@ -52,11 +52,12 @@ pub struct UpdateAnnotation { } impl UpdateAnnotation { - pub fn into_model(self) -> UpdateDocumentAnnotation { - UpdateDocumentAnnotation { + pub fn into_model(self) -> UpdateFileAnnotation { + UpdateFileAnnotation { content: self.content, annotation_type: self.annotation_type, metadata: self.metadata, + deleted_at: None, } } } diff --git a/crates/nvisy-server/src/handler/request/chat.rs b/crates/nvisy-server/src/handler/request/chat.rs deleted file mode 100644 index 2ea181d..0000000 --- a/crates/nvisy-server/src/handler/request/chat.rs +++ /dev/null @@ -1,74 +0,0 @@ -//! Chat session request types. - -use nvisy_postgres::model::{NewChatSession, UpdateChatSession as UpdateChatSessionModel}; -use nvisy_postgres::types::ChatSessionStatus; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; -use validator::Validate; - -/// Request payload for creating a new chat session. -#[must_use] -#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct CreateChatSession { - /// ID of the primary file being edited in this session. - pub primary_file_id: Uuid, - /// Display name of the session. - #[validate(length(min = 1, max = 255))] - pub display_name: Option, - /// LLM configuration (model, temperature, max tokens, etc.). - pub model_config: Option, -} - -impl CreateChatSession { - /// Converts this request into a database model. - pub fn into_model(self, workspace_id: Uuid, account_id: Uuid) -> NewChatSession { - NewChatSession { - workspace_id, - account_id, - primary_file_id: self.primary_file_id, - display_name: self.display_name, - model_config: self.model_config, - session_status: None, - } - } -} - -/// Request payload for updating a chat session. -#[must_use] -#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct UpdateChatSession { - /// Updated display name. - #[validate(length(min = 1, max = 255))] - pub display_name: Option, - /// Updated session status. - pub session_status: Option, - /// Updated LLM configuration. - pub model_config: Option, -} - -impl UpdateChatSession { - /// Converts this request into a database model. - pub fn into_model(self) -> UpdateChatSessionModel { - UpdateChatSessionModel { - display_name: self.display_name, - session_status: self.session_status, - model_config: self.model_config, - ..Default::default() - } - } -} - -/// Request payload for sending a chat message. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct SendChatMessage { - /// The message content to send. - #[validate(length(min = 1, max = 32000))] - pub content: String, - /// Optional model override for this message. - pub model: Option, -} diff --git a/crates/nvisy-server/src/handler/request/comments.rs b/crates/nvisy-server/src/handler/request/comments.rs deleted file mode 100644 index f62d430..0000000 --- a/crates/nvisy-server/src/handler/request/comments.rs +++ /dev/null @@ -1,57 +0,0 @@ -//! Document comment request types. - -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; -use validator::Validate; - -/// Request payload for creating a new document comment. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct CreateComment { - /// Comment text content. - #[validate(length(min = 1, max = 10000))] - pub content: String, - /// Parent comment ID for threaded replies. - pub parent_comment_id: Option, - /// Account being replied to (@mention). - pub reply_to_account_id: Option, -} - -impl CreateComment { - /// Converts to database model. - pub fn into_model( - self, - account_id: Uuid, - file_id: Uuid, - ) -> nvisy_postgres::model::NewDocumentComment { - nvisy_postgres::model::NewDocumentComment { - file_id, - account_id, - parent_comment_id: self.parent_comment_id, - reply_to_account_id: self.reply_to_account_id, - content: self.content, - ..Default::default() - } - } -} - -/// Request payload to update a document comment. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct UpdateComment { - /// Updated comment content. - #[validate(length(min = 1, max = 10000))] - pub content: Option, -} - -impl UpdateComment { - pub fn into_model(self) -> nvisy_postgres::model::UpdateDocumentComment { - nvisy_postgres::model::UpdateDocumentComment { - content: self.content, - ..Default::default() - } - } -} diff --git a/crates/nvisy-server/src/handler/request/documents.rs b/crates/nvisy-server/src/handler/request/documents.rs deleted file mode 100644 index 1eb9d93..0000000 --- a/crates/nvisy-server/src/handler/request/documents.rs +++ /dev/null @@ -1,67 +0,0 @@ -//! Document request types. - -use nvisy_postgres::model::{NewDocument, UpdateDocument as UpdateDocumentModel}; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; -use validator::Validate; - -use super::validations::is_alphanumeric; - -/// Request payload for creating a new document. -#[must_use] -#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct CreateDocument { - /// Display name of the document. - #[validate(length(min = 1, max = 255))] - pub display_name: String, - /// Description of the document. - #[validate(length(max = 200))] - pub description: Option, - /// Tags for document classification. - #[validate(length(max = 20))] - pub tags: Option>, -} - -impl CreateDocument { - /// Converts this request into a database model. - pub fn into_model(self, workspace_id: Uuid, account_id: Uuid) -> NewDocument { - NewDocument { - workspace_id, - account_id, - display_name: Some(self.display_name), - description: self.description, - tags: self.tags.map(|t| t.into_iter().map(Some).collect()), - ..Default::default() - } - } -} - -/// Request payload for updating a document. -#[must_use] -#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] -#[serde(rename_all = "camelCase")] -pub struct UpdateDocument { - /// Updated display name. - #[validate(length(min = 1, max = 255))] - pub display_name: Option, - /// Updated description. - #[validate(length(max = 2000))] - pub description: Option, - /// Updated tags (must be alphanumeric). - #[validate(length(min = 1, max = 20))] - #[validate(custom(function = "is_alphanumeric"))] - pub tags: Option>, -} - -impl UpdateDocument { - pub fn into_model(self) -> UpdateDocumentModel { - UpdateDocumentModel { - display_name: self.display_name, - description: self.description.map(Some), - tags: self.tags.map(|t| t.into_iter().map(Some).collect()), - ..Default::default() - } - } -} diff --git a/crates/nvisy-server/src/handler/request/files.rs b/crates/nvisy-server/src/handler/request/files.rs index e1e6947..5b4a75a 100644 --- a/crates/nvisy-server/src/handler/request/files.rs +++ b/crates/nvisy-server/src/handler/request/files.rs @@ -1,14 +1,11 @@ -//! Document file request types. +//! File request types. -use nvisy_postgres::model::UpdateDocumentFile; -use nvisy_postgres::types::{ContentSegmentation, FileFilter, FileFormat}; +use nvisy_postgres::model::UpdateFile as UpdateFileModel; +use nvisy_postgres::types::{FileFilter, FileFormat}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use uuid::Uuid; use validator::Validate; -use crate::service::ArchiveFormat; - /// Request to update file metadata. #[must_use] #[derive(Debug, Default, Serialize, Deserialize, Validate, JsonSchema)] @@ -17,63 +14,23 @@ pub struct UpdateFile { /// New display name for the file. #[validate(length(min = 1, max = 255))] pub display_name: Option, - /// New processing priority (1-10, higher = more priority). - #[validate(range(min = 1, max = 10))] - pub processing_priority: Option, - /// Document ID to assign the file to. - pub document_id: Option, - /// Knowledge extraction settings update. - #[serde(flatten)] - pub knowledge: Option, + /// Updated tags. + pub tags: Option>, + /// Updated metadata. + pub metadata: Option, } impl UpdateFile { - pub fn into_model(self) -> UpdateDocumentFile { - UpdateDocumentFile { + pub fn into_model(self) -> UpdateFileModel { + UpdateFileModel { display_name: self.display_name, - processing_priority: self.processing_priority, - document_id: self.document_id.map(Some), - is_indexed: self.knowledge.as_ref().and_then(|k| k.is_indexed), - content_segmentation: self.knowledge.as_ref().and_then(|k| k.content_segmentation), - visual_support: self.knowledge.as_ref().and_then(|k| k.visual_support), + tags: self.tags.map(|t| t.into_iter().map(Some).collect()), + metadata: self.metadata, ..Default::default() } } } -/// Request to update file knowledge extraction settings. -#[must_use] -#[derive(Debug, Default, Serialize, Deserialize, Validate, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct UpdateFileKnowledge { - /// Whether the file is indexed for knowledge extraction. - pub is_indexed: Option, - /// Content segmentation strategy for knowledge extraction. - pub content_segmentation: Option, - /// Whether visual elements are supported for knowledge extraction. - pub visual_support: Option, -} - -/// Request to delete multiple files. -#[derive(Debug, Deserialize, Validate, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct DeleteFiles { - /// File IDs to delete (1-100 files). - #[validate(length(min = 1, max = 100))] - pub file_ids: Vec, -} - -/// Request to download files as an archive. -#[derive(Debug, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct DownloadFiles { - /// Archive format. - pub format: ArchiveFormat, - /// Optional specific file IDs (if None, downloads all workspace files). - #[serde(skip_serializing_if = "Option::is_none")] - pub file_ids: Option>, -} - /// Query parameters for listing files. #[must_use] #[derive(Debug, Default, Serialize, Deserialize, JsonSchema)] diff --git a/crates/nvisy-server/src/handler/request/mod.rs b/crates/nvisy-server/src/handler/request/mod.rs index d6f24a7..972df00 100644 --- a/crates/nvisy-server/src/handler/request/mod.rs +++ b/crates/nvisy-server/src/handler/request/mod.rs @@ -3,9 +3,6 @@ mod accounts; mod annotations; mod authentications; -mod chat; -mod comments; -mod documents; mod files; mod integrations; mod invites; @@ -13,6 +10,7 @@ mod members; mod monitors; mod paginations; mod paths; +mod pipelines; mod tokens; mod validations; mod webhooks; @@ -21,9 +19,6 @@ mod workspaces; pub use accounts::*; pub use annotations::*; pub use authentications::*; -pub use chat::*; -pub use comments::*; -pub use documents::*; pub use files::*; pub use integrations::*; pub use invites::*; @@ -31,6 +26,7 @@ pub use members::*; pub use monitors::*; pub use paginations::*; pub use paths::*; +pub use pipelines::*; pub use tokens::*; pub use validations::*; pub use webhooks::*; diff --git a/crates/nvisy-server/src/handler/request/paths.rs b/crates/nvisy-server/src/handler/request/paths.rs index b59c919..a7a9275 100644 --- a/crates/nvisy-server/src/handler/request/paths.rs +++ b/crates/nvisy-server/src/handler/request/paths.rs @@ -13,15 +13,6 @@ pub struct WorkspacePathParams { pub workspace_id: Uuid, } -/// Path parameters for document operations. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct DocumentPathParams { - /// Unique identifier of the document. - pub document_id: Uuid, -} - /// Path parameters for workspace member operations. #[must_use] #[derive(Debug, Serialize, Deserialize, JsonSchema)] @@ -98,18 +89,6 @@ pub struct VersionPathParams { pub version_id: Uuid, } -/// Path parameters for comment operations (comment ID only). -/// -/// Since comment IDs are globally unique UUIDs, file/workspace context can be -/// derived from the comment record itself for authorization purposes. -#[must_use] -#[derive(Debug, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct CommentPathParams { - /// Unique identifier of the comment. - pub comment_id: Uuid, -} - /// Path parameters for webhook operations (webhook ID only). /// /// Since webhook IDs are globally unique UUIDs, workspace context can be @@ -170,14 +149,14 @@ pub struct AccountPathParams { pub account_id: Uuid, } -/// Path parameters for chat session operations (session ID only). +/// Path parameters for pipeline operations. /// -/// Since session IDs are globally unique UUIDs, workspace context can be -/// derived from the session record itself for authorization purposes. +/// Since pipeline IDs are globally unique UUIDs, workspace context can be +/// derived from the pipeline record itself for authorization purposes. #[must_use] #[derive(Debug, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] -pub struct ChatSessionPathParams { - /// Unique identifier of the chat session. - pub session_id: Uuid, +pub struct PipelinePathParams { + /// Unique identifier of the pipeline. + pub pipeline_id: Uuid, } diff --git a/crates/nvisy-server/src/handler/request/pipelines.rs b/crates/nvisy-server/src/handler/request/pipelines.rs new file mode 100644 index 0000000..acb5a19 --- /dev/null +++ b/crates/nvisy-server/src/handler/request/pipelines.rs @@ -0,0 +1,100 @@ +//! Pipeline request types. +//! +//! This module provides request DTOs for pipeline management operations including +//! creation, updates, and filtering. All request types support JSON serialization +//! and validation. + +use nvisy_postgres::model::{NewPipeline, UpdatePipeline as UpdatePipelineModel}; +use nvisy_postgres::types::PipelineStatus; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; +use validator::Validate; + +/// Request payload for creating a new pipeline. +/// +/// Creates a new pipeline with the specified configuration. The creator is +/// automatically set as the owner of the pipeline. +#[must_use] +#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct CreatePipeline { + /// Pipeline name (3-100 characters). + #[validate(length(min = 3, max = 100))] + pub name: String, + /// Optional description of the pipeline (max 500 characters). + #[validate(length(max = 500))] + pub description: Option, + /// Pipeline definition containing steps and configuration. + pub definition: Option, + /// Extended metadata for the pipeline. + pub metadata: Option, +} + +impl CreatePipeline { + /// Converts this request into a [`NewPipeline`] model for database insertion. + /// + /// # Arguments + /// + /// * `workspace_id` - The ID of the workspace this pipeline belongs to. + /// * `account_id` - The ID of the account creating the pipeline. + #[inline] + pub fn into_model(self, workspace_id: Uuid, account_id: Uuid) -> NewPipeline { + NewPipeline { + workspace_id, + account_id, + name: self.name, + description: self.description, + definition: self.definition, + metadata: self.metadata, + ..Default::default() + } + } +} + +/// Request payload to update an existing pipeline. +/// +/// All fields are optional; only provided fields will be updated. +#[must_use] +#[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct UpdatePipeline { + /// New name for the pipeline (3-100 characters). + #[validate(length(min = 3, max = 100))] + pub name: Option, + /// New description for the pipeline (max 500 characters). + #[validate(length(max = 500))] + pub description: Option, + /// New status for the pipeline. + pub status: Option, + /// New definition for the pipeline. + pub definition: Option, + /// New metadata for the pipeline. + pub metadata: Option, +} + +impl UpdatePipeline { + /// Converts this request into an [`UpdatePipelineModel`] for database update. + pub fn into_model(self) -> UpdatePipelineModel { + UpdatePipelineModel { + name: self.name, + description: self.description.map(Some), + status: self.status, + definition: self.definition, + metadata: self.metadata, + ..Default::default() + } + } +} + +/// Query parameters for filtering pipelines. +#[must_use] +#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema, Validate)] +#[serde(rename_all = "camelCase")] +pub struct PipelineFilter { + /// Filter by pipeline status. + pub status: Option, + /// Search by pipeline name (trigram similarity). + #[validate(length(max = 100))] + pub search: Option, +} diff --git a/crates/nvisy-server/src/handler/response/annotations.rs b/crates/nvisy-server/src/handler/response/annotations.rs index a77d0bd..f56077e 100644 --- a/crates/nvisy-server/src/handler/response/annotations.rs +++ b/crates/nvisy-server/src/handler/response/annotations.rs @@ -1,7 +1,7 @@ //! Document annotation response types. use jiff::Timestamp; -use nvisy_postgres::model::DocumentAnnotation; +use nvisy_postgres::model::FileAnnotation; use nvisy_postgres::types::AnnotationType; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -33,10 +33,10 @@ pub struct Annotation { pub type AnnotationsPage = Page; impl Annotation { - pub fn from_model(annotation: DocumentAnnotation) -> Self { + pub fn from_model(annotation: FileAnnotation) -> Self { Self { id: annotation.id, - file_id: annotation.document_file_id, + file_id: annotation.file_id, account_id: annotation.account_id, content: annotation.content, annotation_type: annotation.annotation_type, diff --git a/crates/nvisy-server/src/handler/response/chat.rs b/crates/nvisy-server/src/handler/response/chat.rs deleted file mode 100644 index 31f64c1..0000000 --- a/crates/nvisy-server/src/handler/response/chat.rs +++ /dev/null @@ -1,94 +0,0 @@ -//! Chat session response types. - -use jiff::Timestamp; -use nvisy_postgres::model; -use nvisy_postgres::types::ChatSessionStatus; -use nvisy_rig::chat::ChatEvent; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::Page; - -/// Represents a chat session with full details. -#[must_use] -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct ChatSession { - /// Unique session identifier. - pub session_id: Uuid, - /// ID of the workspace this session belongs to. - pub workspace_id: Uuid, - /// ID of the account that owns this session. - pub account_id: Uuid, - /// ID of the primary file being edited. - pub primary_file_id: Uuid, - /// Display name of the session. - pub display_name: String, - /// Current session status. - pub session_status: ChatSessionStatus, - /// LLM configuration. - pub model_config: serde_json::Value, - /// Total number of messages in this session. - pub message_count: i32, - /// Total tokens used in this session. - pub token_count: i32, - /// Timestamp when the session was created. - pub created_at: Timestamp, - /// Timestamp when the session was last updated. - pub updated_at: Timestamp, -} - -impl ChatSession { - /// Creates a response from a database model. - pub fn from_model(session: model::ChatSession) -> Self { - Self { - session_id: session.id, - workspace_id: session.workspace_id, - account_id: session.account_id, - primary_file_id: session.primary_file_id, - display_name: session.display_name, - session_status: session.session_status, - model_config: session.model_config, - message_count: session.message_count, - token_count: session.token_count, - created_at: session.created_at.into(), - updated_at: session.updated_at.into(), - } - } -} - -/// Paginated list of chat sessions. -pub type ChatSessionsPage = Page; - -/// SSE event wrapper for chat streaming. -/// -/// This wraps `ChatEvent` from nvisy-rig and provides SSE-compatible serialization. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct ChatStreamEvent { - /// The underlying chat event. - #[serde(flatten)] - pub event: ChatEvent, -} - -impl ChatStreamEvent { - /// Creates a new stream event from a chat event. - pub fn new(event: ChatEvent) -> Self { - Self { event } - } - - /// Returns the SSE event type name. - pub fn event_type(&self) -> &'static str { - match &self.event { - ChatEvent::Thinking { .. } => "thinking", - ChatEvent::TextDelta { .. } => "text_delta", - ChatEvent::ToolCall { .. } => "tool_call", - ChatEvent::ToolResult { .. } => "tool_result", - ChatEvent::ProposedEdit { .. } => "proposed_edit", - ChatEvent::EditApplied { .. } => "edit_applied", - ChatEvent::Done { .. } => "done", - ChatEvent::Error { .. } => "error", - } - } -} diff --git a/crates/nvisy-server/src/handler/response/comments.rs b/crates/nvisy-server/src/handler/response/comments.rs deleted file mode 100644 index cfc84ba..0000000 --- a/crates/nvisy-server/src/handler/response/comments.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! Document comment response types. - -use jiff::Timestamp; -use nvisy_postgres::model; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::Page; - -/// Represents a document comment. -#[must_use] -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct Comment { - /// ID of the comment. - pub comment_id: Uuid, - /// ID of the file this comment belongs to. - pub file_id: Uuid, - /// ID of the account that created the comment. - pub account_id: Uuid, - /// Parent comment ID for threaded replies. - pub parent_comment_id: Option, - /// Account being replied to (@mention). - pub reply_to_account_id: Option, - /// Comment text content. - pub content: Option, - /// Timestamp when the comment was created. - pub created_at: Timestamp, - /// Timestamp when the comment was last updated. - pub updated_at: Timestamp, -} - -/// Paginated list of comments. -pub type CommentsPage = Page; - -impl Comment { - pub fn from_model(comment: model::DocumentComment) -> Self { - Self { - comment_id: comment.id, - file_id: comment.file_id, - account_id: comment.account_id, - parent_comment_id: comment.parent_comment_id, - reply_to_account_id: comment.reply_to_account_id, - content: comment.get_content(), - created_at: comment.created_at.into(), - updated_at: comment.updated_at.into(), - } - } -} diff --git a/crates/nvisy-server/src/handler/response/documents.rs b/crates/nvisy-server/src/handler/response/documents.rs deleted file mode 100644 index 0973ad9..0000000 --- a/crates/nvisy-server/src/handler/response/documents.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! Document response types. - -use jiff::Timestamp; -use nvisy_postgres::model; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::Page; - -/// Represents a document with full details. -#[must_use] -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct Document { - /// ID of the document. - pub document_id: Uuid, - /// ID of the workspace that the document belongs to. - pub workspace_id: Uuid, - /// ID of the account that owns the document. - pub account_id: Uuid, - /// Display name of the document. - pub display_name: String, - /// Description of the document. - pub description: Option, - /// Tags associated with the document. - pub tags: Vec, - /// Timestamp when the document was created. - pub created_at: Timestamp, - /// Timestamp when the document was last updated. - pub updated_at: Timestamp, -} - -/// Paginated list of documents. -pub type DocumentsPage = Page; - -impl Document { - pub fn from_model(document: model::Document) -> Self { - Self { - tags: document.tags(), - document_id: document.id, - workspace_id: document.workspace_id, - account_id: document.account_id, - display_name: document.display_name, - description: document.description, - created_at: document.created_at.into(), - updated_at: document.updated_at.into(), - } - } -} diff --git a/crates/nvisy-server/src/handler/response/files.rs b/crates/nvisy-server/src/handler/response/files.rs index bd103a4..89e8587 100644 --- a/crates/nvisy-server/src/handler/response/files.rs +++ b/crates/nvisy-server/src/handler/response/files.rs @@ -1,51 +1,45 @@ -//! Document file response types. +//! File response types. use jiff::Timestamp; -use nvisy_postgres::model::DocumentFile; -use nvisy_postgres::types::{ContentSegmentation, FileSource, ProcessingStatus}; +use nvisy_postgres::model::File as FileModel; +use nvisy_postgres::types::FileSource; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::Page; -/// Knowledge-related fields for file responses. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct FileKnowledge { - /// Whether the file is indexed for knowledge extraction. - pub is_indexed: bool, - - /// Content segmentation strategy. - pub content_segmentation: ContentSegmentation, - - /// Whether visual elements are supported. - pub visual_support: bool, -} - -/// Represents an uploaded file. +/// Represents a file in responses. #[must_use] #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct File { /// Unique file identifier. - pub file_id: Uuid, + pub id: Uuid, + /// Workspace this file belongs to. + pub workspace_id: Uuid, /// Display name. pub display_name: String, + /// Original filename when uploaded. + pub original_filename: String, + /// File extension (without dot). + pub file_extension: String, + /// MIME type. + #[serde(skip_serializing_if = "Option::is_none")] + pub mime_type: Option, /// File size in bytes. pub file_size: i64, - /// Processing status. - pub status: ProcessingStatus, - /// Processing priority (1-10). - pub processing_priority: i32, /// Classification tags. pub tags: Vec, /// How the file was created (uploaded, imported, generated). pub source: FileSource, /// Account ID of the user who uploaded/created the file. pub uploaded_by: Uuid, - /// Knowledge extraction settings. - pub file_knowledge: FileKnowledge, + /// Version number (1 for original, higher for newer versions). + pub version_number: i32, + /// Parent file ID if this is a newer version. + #[serde(skip_serializing_if = "Option::is_none")] + pub parent_id: Option, /// Creation timestamp. pub created_at: Timestamp, /// Last update timestamp. @@ -53,21 +47,20 @@ pub struct File { } impl File { - pub fn from_model(file: DocumentFile) -> Self { + pub fn from_model(file: FileModel) -> Self { Self { - file_id: file.id, + id: file.id, + workspace_id: file.workspace_id, display_name: file.display_name, + original_filename: file.original_filename, + file_extension: file.file_extension, + mime_type: file.mime_type, file_size: file.file_size_bytes, - status: file.processing_status, - processing_priority: file.processing_priority, tags: file.tags.into_iter().flatten().collect(), source: file.source, uploaded_by: file.account_id, - file_knowledge: FileKnowledge { - is_indexed: file.is_indexed, - content_segmentation: file.content_segmentation, - visual_support: file.visual_support, - }, + version_number: file.version_number, + parent_id: file.parent_id, created_at: file.created_at.into(), updated_at: file.updated_at.into(), } diff --git a/crates/nvisy-server/src/handler/response/mod.rs b/crates/nvisy-server/src/handler/response/mod.rs index eb2bcd8..377db11 100644 --- a/crates/nvisy-server/src/handler/response/mod.rs +++ b/crates/nvisy-server/src/handler/response/mod.rs @@ -8,9 +8,6 @@ mod accounts; mod activities; mod annotations; mod authentications; -mod chat; -mod comments; -mod documents; mod errors; mod files; mod integrations; @@ -18,6 +15,7 @@ mod invites; mod members; mod monitors; mod notifications; +mod pipelines; mod runs; mod tokens; mod webhooks; @@ -27,9 +25,6 @@ pub use accounts::*; pub use activities::*; pub use annotations::*; pub use authentications::*; -pub use chat::*; -pub use comments::*; -pub use documents::*; pub use errors::*; pub use files::*; pub use integrations::*; @@ -37,6 +32,7 @@ pub use invites::*; pub use members::*; pub use monitors::*; pub use notifications::*; +pub use pipelines::*; pub use runs::*; pub use tokens::*; pub use webhooks::*; diff --git a/crates/nvisy-server/src/handler/response/pipelines.rs b/crates/nvisy-server/src/handler/response/pipelines.rs new file mode 100644 index 0000000..7a1fa6a --- /dev/null +++ b/crates/nvisy-server/src/handler/response/pipelines.rs @@ -0,0 +1,111 @@ +//! Pipeline response types. + +use jiff::Timestamp; +use nvisy_postgres::model; +use nvisy_postgres::types::PipelineStatus; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::Page; + +/// Pipeline response. +#[must_use] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct Pipeline { + /// Unique pipeline identifier. + pub pipeline_id: Uuid, + /// Workspace this pipeline belongs to. + pub workspace_id: Uuid, + /// Account that created this pipeline. + pub account_id: Uuid, + /// Pipeline name. + pub name: String, + /// Pipeline description. + pub description: Option, + /// Pipeline lifecycle status. + pub status: PipelineStatus, + /// Pipeline definition (steps, configuration). + pub definition: serde_json::Value, + /// Extended metadata. + pub metadata: serde_json::Value, + /// Number of steps in the pipeline. + pub step_count: usize, + /// Whether the pipeline can be executed. + pub is_runnable: bool, + /// Whether the pipeline can be edited. + pub is_editable: bool, + /// Timestamp when the pipeline was created. + pub created_at: Timestamp, + /// Timestamp when the pipeline was last updated. + pub updated_at: Timestamp, +} + +impl Pipeline { + /// Creates a new instance of [`Pipeline`] from the database model. + pub fn from_model(pipeline: model::Pipeline) -> Self { + Self { + pipeline_id: pipeline.id, + workspace_id: pipeline.workspace_id, + account_id: pipeline.account_id, + name: pipeline.name.clone(), + description: pipeline.description.clone(), + status: pipeline.status, + step_count: pipeline.step_count(), + is_runnable: pipeline.is_runnable(), + is_editable: pipeline.is_editable(), + definition: pipeline.definition.clone(), + metadata: pipeline.metadata.clone(), + created_at: pipeline.created_at.into(), + updated_at: pipeline.updated_at.into(), + } + } +} + +/// Paginated list of pipelines. +pub type PipelinesPage = Page; + +/// Summary response for pipeline (used in lists). +#[must_use] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct PipelineSummary { + /// Unique pipeline identifier. + pub pipeline_id: Uuid, + /// Pipeline name. + pub name: String, + /// Pipeline description. + pub description: Option, + /// Pipeline lifecycle status. + pub status: PipelineStatus, + /// Number of steps in the pipeline. + pub step_count: usize, + /// Whether the pipeline can be executed. + pub is_runnable: bool, + /// Timestamp when the pipeline was created. + pub created_at: Timestamp, + /// Timestamp when the pipeline was last updated. + pub updated_at: Timestamp, +} + +impl PipelineSummary { + /// Creates a new instance of [`PipelineSummary`] from the database model. + pub fn from_model(pipeline: model::Pipeline) -> Self { + let step_count = pipeline.step_count(); + let is_runnable = pipeline.is_runnable(); + Self { + pipeline_id: pipeline.id, + name: pipeline.name, + description: pipeline.description, + status: pipeline.status, + step_count, + is_runnable, + created_at: pipeline.created_at.into(), + updated_at: pipeline.updated_at.into(), + } + } +} + +/// Paginated list of pipeline summaries. +pub type PipelineSummariesPage = Page; diff --git a/crates/nvisy-server/src/middleware/specification.rs b/crates/nvisy-server/src/middleware/specification.rs index fa49681..0a0d58c 100644 --- a/crates/nvisy-server/src/middleware/specification.rs +++ b/crates/nvisy-server/src/middleware/specification.rs @@ -161,19 +161,14 @@ fn api_docs(api: TransformOpenApi) -> TransformOpenApi { description: Some("Workspace creation and management".into()), ..Default::default() }) - .tag(Tag { - name: "Documents".into(), - description: Some("Document upload, processing, and retrieval".into()), - ..Default::default() - }) .tag(Tag { name: "Files".into(), description: Some("File upload, download, and management".into()), ..Default::default() }) .tag(Tag { - name: "Comments".into(), - description: Some("Document and file annotations".into()), + name: "Annotations".into(), + description: Some("File annotations".into()), ..Default::default() }) .tag(Tag { @@ -201,9 +196,4 @@ fn api_docs(api: TransformOpenApi) -> TransformOpenApi { description: Some("Webhook configuration".into()), ..Default::default() }) - .tag(Tag { - name: "Chat".into(), - description: Some("AI chat and document interaction".into()), - ..Default::default() - }) } diff --git a/crates/nvisy-server/src/service/mod.rs b/crates/nvisy-server/src/service/mod.rs index 29af2b7..6394045 100644 --- a/crates/nvisy-server/src/service/mod.rs +++ b/crates/nvisy-server/src/service/mod.rs @@ -8,12 +8,8 @@ mod security; use nvisy_nats::NatsClient; use nvisy_postgres::PgClient; use nvisy_rig::RigService; -use nvisy_runtime::RuntimeService; use nvisy_webhook::WebhookService; -// Re-export archive types for handler use -pub use nvisy_runtime::{ArchiveFormat, ArchiveResult, ArchiveService}; - use crate::Result; pub use crate::service::cache::HealthCache; pub use crate::service::config::ServiceConfig; @@ -35,10 +31,8 @@ pub struct ServiceState { pub nats: NatsClient, pub webhook: WebhookService, - // AI & document services: + // AI services: pub rig: RigService, - pub runtime: RuntimeService, - pub archive: ArchiveService, // Internal services: pub health_cache: HealthCache, @@ -77,8 +71,6 @@ impl ServiceState { webhook: webhook_service, rig, - runtime: RuntimeService::new(), - archive: ArchiveService::new(), health_cache: HealthCache::new(), integration_provider: IntegrationProvider::new(), @@ -107,10 +99,8 @@ impl_di!(postgres: PgClient); impl_di!(nats: NatsClient); impl_di!(webhook: WebhookService); -// AI and document services: +// AI services: impl_di!(rig: RigService); -impl_di!(runtime: RuntimeService); -impl_di!(archive: ArchiveService); // Internal services: impl_di!(health_cache: HealthCache); diff --git a/migrations/2025-05-27-011852_documents/down.sql b/migrations/2025-05-27-011852_documents/down.sql index 1af64ba..b9ff2d3 100644 --- a/migrations/2025-05-27-011852_documents/down.sql +++ b/migrations/2025-05-27-011852_documents/down.sql @@ -1,24 +1,20 @@ -- Drop all objects created in the documents migration -- Drop in reverse order of creation to avoid dependency issues --- Drop functions -DROP FUNCTION IF EXISTS find_duplicate_files(_document_id UUID); +-- Drop tables (indexes dropped automatically with tables) +DROP TABLE IF EXISTS file_annotations; +DROP TABLE IF EXISTS file_chunks; --- Drop views -DROP VIEW IF EXISTS processing_queue; -DROP VIEW IF EXISTS document_processing_summary; +-- Drop trigger before the function it depends on +DROP TRIGGER IF EXISTS files_set_version_trigger ON files; --- Drop tables (indexes and remaining triggers dropped automatically with tables) -DROP TABLE IF EXISTS document_annotations; -DROP TABLE IF EXISTS document_comments; -DROP TABLE IF EXISTS document_chunks; -DROP TABLE IF EXISTS document_files; -DROP TABLE IF EXISTS documents; +-- Drop files table +DROP TABLE IF EXISTS files; + +-- Drop functions (after triggers that depend on them) +DROP FUNCTION IF EXISTS find_duplicate_files(UUID); +DROP FUNCTION IF EXISTS set_file_version_number(); -- Drop enum types DROP TYPE IF EXISTS ANNOTATION_TYPE; DROP TYPE IF EXISTS FILE_SOURCE; -DROP TYPE IF EXISTS CONTENT_SEGMENTATION; -DROP TYPE IF EXISTS REQUIRE_MODE; -DROP TYPE IF EXISTS PROCESSING_STATUS; -DROP TYPE IF EXISTS DOCUMENT_STATUS; diff --git a/migrations/2025-05-27-011852_documents/up.sql b/migrations/2025-05-27-011852_documents/up.sql index 4b64708..37da9d5 100644 --- a/migrations/2025-05-27-011852_documents/up.sql +++ b/migrations/2025-05-27-011852_documents/up.sql @@ -1,158 +1,43 @@ --- This migration creates tables for documents, files, processing pipeline, and security features - --- Create documents table - Document containers/folders -CREATE TABLE documents ( - -- Primary identifiers - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, - account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - - -- Core attributes - display_name TEXT NOT NULL DEFAULT 'Untitled', - description TEXT DEFAULT NULL, - tags TEXT[] NOT NULL DEFAULT '{}', - - CONSTRAINT documents_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), - CONSTRAINT documents_description_length_max CHECK (length(description) <= 2048), - CONSTRAINT documents_tags_count_max CHECK (array_length(tags, 1) IS NULL OR array_length(tags, 1) <= 32), - - -- Configuration - metadata JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT documents_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 16384), - - -- Lifecycle timestamps - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - deleted_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT documents_updated_after_created CHECK (updated_at >= created_at), - CONSTRAINT documents_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), - CONSTRAINT documents_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) -); - --- Set up automatic updated_at trigger -SELECT setup_updated_at('documents'); - --- Create indexes for documents -CREATE INDEX documents_workspace_idx - ON documents (workspace_id, created_at DESC) - WHERE deleted_at IS NULL; - -CREATE INDEX documents_account_recent_idx - ON documents (account_id, updated_at DESC) - WHERE deleted_at IS NULL; - -CREATE INDEX documents_tags_search_idx - ON documents USING gin (tags) - WHERE array_length(tags, 1) > 0 AND deleted_at IS NULL; - -CREATE INDEX documents_metadata_search_idx - ON documents USING gin (metadata) - WHERE deleted_at IS NULL; - -CREATE INDEX documents_display_name_trgm_idx - ON documents USING gin (display_name gin_trgm_ops) - WHERE deleted_at IS NULL; - --- Add table and column comments -COMMENT ON TABLE documents IS - 'Document containers for organizing and managing file collections with metadata.'; - -COMMENT ON COLUMN documents.id IS 'Unique document identifier'; -COMMENT ON COLUMN documents.workspace_id IS 'Parent workspace reference'; -COMMENT ON COLUMN documents.account_id IS 'Creating account reference'; -COMMENT ON COLUMN documents.display_name IS 'Human-readable document name (1-255 chars)'; -COMMENT ON COLUMN documents.description IS 'Document description (up to 2048 chars)'; -COMMENT ON COLUMN documents.tags IS 'Classification tags (max 32)'; -COMMENT ON COLUMN documents.metadata IS 'Extended metadata (JSON, 2B-16KB)'; -COMMENT ON COLUMN documents.created_at IS 'Creation timestamp'; -COMMENT ON COLUMN documents.updated_at IS 'Last modification timestamp'; -COMMENT ON COLUMN documents.deleted_at IS 'Soft deletion timestamp'; - --- Create file processing status enum -CREATE TYPE PROCESSING_STATUS AS ENUM ( - 'pending', -- File is queued for processing - 'processing', -- File is currently being processed - 'ready', -- Processing completed, file is ready for use - 'canceled' -- Processing was canceled -); - -COMMENT ON TYPE PROCESSING_STATUS IS - 'File processing pipeline status for tracking processing workflows.'; - --- Create processing requirements enum -CREATE TYPE REQUIRE_MODE AS ENUM ( - 'none', -- No special processing required - 'optical', -- Requires OCR to extract text from images - 'language', -- Requires VLM for advanced content understanding - 'both' -- Requires both OCR and VLM processing -); - -COMMENT ON TYPE REQUIRE_MODE IS - 'Processing requirements for input files based on content type.'; - --- Create content segmentation enum -CREATE TYPE CONTENT_SEGMENTATION AS ENUM ( - 'none', -- No segmentation applied - 'semantic', -- Semantic-based segmentation - 'fixed' -- Fixed-size segmentation -); - -COMMENT ON TYPE CONTENT_SEGMENTATION IS - 'Content segmentation strategy for document processing.'; +-- This migration creates tables for files, chunks, and annotations -- Create file source enum CREATE TYPE FILE_SOURCE AS ENUM ( 'uploaded', -- File was manually uploaded by a user 'imported', -- File was imported from an external source - 'generated' -- File was generated by the system + 'generated' -- File was generated by the system (pipeline output) ); COMMENT ON TYPE FILE_SOURCE IS 'Indicates how a file was created in the system.'; --- Create document files table - Source files for processing -CREATE TABLE document_files ( +-- Create files table (renamed from document_files, standalone without documents container) +CREATE TABLE files ( -- Primary identifiers id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- References - workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, - document_id UUID DEFAULT NULL REFERENCES documents (id) ON DELETE CASCADE, + workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - parent_id UUID DEFAULT NULL REFERENCES document_files (id) ON DELETE SET NULL, + parent_id UUID DEFAULT NULL REFERENCES files (id) ON DELETE SET NULL, -- Version tracking (parent_id links to previous version, version_number tracks sequence) version_number INTEGER NOT NULL DEFAULT 1, - CONSTRAINT document_files_version_number_min CHECK (version_number >= 1), + CONSTRAINT files_version_number_min CHECK (version_number >= 1), -- File metadata display_name TEXT NOT NULL DEFAULT 'Untitled', original_filename TEXT NOT NULL DEFAULT 'Untitled', file_extension TEXT NOT NULL DEFAULT 'txt', + mime_type TEXT DEFAULT NULL, tags TEXT[] NOT NULL DEFAULT '{}', source FILE_SOURCE NOT NULL DEFAULT 'uploaded', - CONSTRAINT document_files_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), - CONSTRAINT document_files_original_filename_length CHECK (length(original_filename) BETWEEN 1 AND 255), - CONSTRAINT document_files_file_extension_format CHECK (file_extension ~ '^[a-zA-Z0-9]{1,20}$'), - CONSTRAINT document_files_tags_count_max CHECK (array_length(tags, 1) IS NULL OR array_length(tags, 1) <= 32), - - -- Processing configuration - require_mode REQUIRE_MODE NOT NULL DEFAULT 'none', - processing_priority INTEGER NOT NULL DEFAULT 5, - processing_status PROCESSING_STATUS NOT NULL DEFAULT 'pending', - - CONSTRAINT document_files_processing_priority_range CHECK (processing_priority BETWEEN 1 AND 10), - - -- Knowledge extraction configuration - is_indexed BOOLEAN NOT NULL DEFAULT FALSE, - content_segmentation CONTENT_SEGMENTATION NOT NULL DEFAULT 'semantic', - visual_support BOOLEAN NOT NULL DEFAULT FALSE, + CONSTRAINT files_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), + CONSTRAINT files_original_filename_length CHECK (length(original_filename) BETWEEN 1 AND 255), + CONSTRAINT files_file_extension_format CHECK (file_extension ~ '^[a-zA-Z0-9]{1,20}$'), + CONSTRAINT files_mime_type_format CHECK (mime_type IS NULL OR mime_type ~ '^[a-zA-Z0-9\-]+/[a-zA-Z0-9\-\.\+]+$'), + CONSTRAINT files_tags_count_max CHECK (array_length(tags, 1) IS NULL OR array_length(tags, 1) <= 32), -- Storage and integrity file_size_bytes BIGINT NOT NULL, @@ -160,84 +45,57 @@ CREATE TABLE document_files ( storage_path TEXT NOT NULL, storage_bucket TEXT NOT NULL, - CONSTRAINT document_files_file_size_min CHECK (file_size_bytes >= 0), - CONSTRAINT document_files_file_hash_sha256_length CHECK (octet_length(file_hash_sha256) = 32), - CONSTRAINT document_files_storage_path_not_empty CHECK (trim(storage_path) <> ''), - CONSTRAINT document_files_storage_bucket_not_empty CHECK (trim(storage_bucket) <> ''), + CONSTRAINT files_file_size_min CHECK (file_size_bytes >= 0), + CONSTRAINT files_file_hash_sha256_length CHECK (octet_length(file_hash_sha256) = 32), + CONSTRAINT files_storage_path_not_empty CHECK (trim(storage_path) <> ''), + CONSTRAINT files_storage_bucket_not_empty CHECK (trim(storage_bucket) <> ''), -- Configuration metadata JSONB NOT NULL DEFAULT '{}', - CONSTRAINT document_files_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 8192), + CONSTRAINT files_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 65536), -- Lifecycle timestamps created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, deleted_at TIMESTAMPTZ DEFAULT NULL, - CONSTRAINT document_files_updated_after_created CHECK (updated_at >= created_at), - CONSTRAINT document_files_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), - CONSTRAINT document_files_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) + CONSTRAINT files_updated_after_created CHECK (updated_at >= created_at), + CONSTRAINT files_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), + CONSTRAINT files_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) ); -- Set up automatic updated_at trigger -SELECT setup_updated_at('document_files'); +SELECT setup_updated_at('files'); --- Create indexes for document files -CREATE INDEX document_files_processing_status_idx - ON document_files (document_id, processing_status, created_at DESC) +-- Create indexes for files +CREATE INDEX files_workspace_idx + ON files (workspace_id, created_at DESC) WHERE deleted_at IS NULL; -CREATE INDEX document_files_processing_queue_idx - ON document_files (processing_status, processing_priority DESC, created_at ASC) - WHERE processing_status = 'pending' AND deleted_at IS NULL; +CREATE INDEX files_account_idx + ON files (account_id, created_at DESC) + WHERE deleted_at IS NULL; -CREATE INDEX document_files_hash_dedup_idx - ON document_files (file_hash_sha256, file_size_bytes) +CREATE INDEX files_hash_dedup_idx + ON files (file_hash_sha256, file_size_bytes) WHERE deleted_at IS NULL; -CREATE INDEX document_files_tags_search_idx - ON document_files USING gin (tags) +CREATE INDEX files_tags_search_idx + ON files USING gin (tags) WHERE array_length(tags, 1) > 0 AND deleted_at IS NULL; -CREATE INDEX document_files_indexed_idx - ON document_files (is_indexed, content_segmentation) - WHERE is_indexed = TRUE AND deleted_at IS NULL; - -CREATE INDEX document_files_display_name_trgm_idx - ON document_files USING gin (display_name gin_trgm_ops) +CREATE INDEX files_display_name_trgm_idx + ON files USING gin (display_name gin_trgm_ops) WHERE deleted_at IS NULL; -CREATE INDEX document_files_version_chain_idx - ON document_files (parent_id, version_number DESC) +CREATE INDEX files_version_chain_idx + ON files (parent_id, version_number DESC) WHERE parent_id IS NOT NULL AND deleted_at IS NULL; --- Trigger function to ensure parent file is from the same document -CREATE OR REPLACE FUNCTION check_parent_same_document() -RETURNS TRIGGER AS $$ -BEGIN - IF NEW.parent_id IS NOT NULL THEN - IF NOT EXISTS ( - SELECT 1 FROM document_files - WHERE id = NEW.parent_id - AND (document_id IS NOT DISTINCT FROM NEW.document_id) - ) THEN - RAISE EXCEPTION 'Parent file must belong to the same document' - USING ERRCODE = 'check_violation', - CONSTRAINT = 'document_files_parent_same_document'; - END IF; - END IF; - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - -CREATE TRIGGER document_files_parent_same_document_trigger - BEFORE INSERT OR UPDATE OF parent_id, document_id ON document_files - FOR EACH ROW - EXECUTE FUNCTION check_parent_same_document(); - -COMMENT ON FUNCTION check_parent_same_document() IS - 'Ensures parent_id references a file in the same document.'; +CREATE INDEX files_source_idx + ON files (source, workspace_id) + WHERE deleted_at IS NULL; -- Trigger function to auto-set version_number based on parent CREATE OR REPLACE FUNCTION set_file_version_number() @@ -246,7 +104,7 @@ BEGIN -- If parent_id is set, calculate version as parent's version + 1 IF NEW.parent_id IS NOT NULL THEN SELECT version_number + 1 INTO NEW.version_number - FROM document_files + FROM files WHERE id = NEW.parent_id; ELSE -- No parent means version 1 @@ -256,8 +114,8 @@ BEGIN END; $$ LANGUAGE plpgsql; -CREATE TRIGGER document_files_set_version_trigger - BEFORE INSERT ON document_files +CREATE TRIGGER files_set_version_trigger + BEFORE INSERT ON files FOR EACH ROW EXECUTE FUNCTION set_file_version_number(); @@ -265,42 +123,36 @@ COMMENT ON FUNCTION set_file_version_number() IS 'Automatically sets version_number based on parent file version.'; -- Add table and column comments -COMMENT ON TABLE document_files IS - 'Source files for document processing with pipeline management and version tracking.'; - -COMMENT ON COLUMN document_files.id IS 'Unique file identifier'; -COMMENT ON COLUMN document_files.workspace_id IS 'Parent workspace reference (required)'; -COMMENT ON COLUMN document_files.document_id IS 'Parent document reference (optional)'; -COMMENT ON COLUMN document_files.account_id IS 'Uploading account reference'; -COMMENT ON COLUMN document_files.version_number IS 'Version number (1 for original, increments for new versions via parent_id chain)'; -COMMENT ON COLUMN document_files.display_name IS 'Display name (1-255 chars)'; -COMMENT ON COLUMN document_files.original_filename IS 'Original upload filename (1-255 chars)'; -COMMENT ON COLUMN document_files.file_extension IS 'File extension (1-20 alphanumeric)'; -COMMENT ON COLUMN document_files.tags IS 'Classification tags (max 32)'; -COMMENT ON COLUMN document_files.source IS 'How the file was created (uploaded, imported, generated)'; -COMMENT ON COLUMN document_files.require_mode IS 'Processing mode required'; -COMMENT ON COLUMN document_files.processing_priority IS 'Priority 1-10 (10=highest)'; -COMMENT ON COLUMN document_files.processing_status IS 'Current processing status'; -COMMENT ON COLUMN document_files.is_indexed IS 'Whether file content has been indexed for search'; -COMMENT ON COLUMN document_files.content_segmentation IS 'Content segmentation strategy'; -COMMENT ON COLUMN document_files.visual_support IS 'Whether to enable visual content processing'; -COMMENT ON COLUMN document_files.file_size_bytes IS 'File size in bytes'; -COMMENT ON COLUMN document_files.file_hash_sha256 IS 'SHA256 content hash'; -COMMENT ON COLUMN document_files.storage_path IS 'Storage system path'; -COMMENT ON COLUMN document_files.storage_bucket IS 'Storage bucket/container'; -COMMENT ON COLUMN document_files.metadata IS 'Extended metadata (JSON, 2B-8KB)'; -COMMENT ON COLUMN document_files.parent_id IS 'Parent file reference for hierarchical relationships'; -COMMENT ON COLUMN document_files.created_at IS 'Upload timestamp'; -COMMENT ON COLUMN document_files.updated_at IS 'Last modification timestamp'; -COMMENT ON COLUMN document_files.deleted_at IS 'Soft deletion timestamp'; - --- Create document chunks table - Text chunks with vector embeddings for semantic search -CREATE TABLE document_chunks ( +COMMENT ON TABLE files IS + 'Files stored in the system with version tracking and deduplication.'; + +COMMENT ON COLUMN files.id IS 'Unique file identifier'; +COMMENT ON COLUMN files.workspace_id IS 'Parent workspace reference'; +COMMENT ON COLUMN files.account_id IS 'Uploading/creating account reference'; +COMMENT ON COLUMN files.parent_id IS 'Parent file reference for version chains'; +COMMENT ON COLUMN files.version_number IS 'Version number (1 for original, increments via parent_id chain)'; +COMMENT ON COLUMN files.display_name IS 'Display name (1-255 chars)'; +COMMENT ON COLUMN files.original_filename IS 'Original upload filename (1-255 chars)'; +COMMENT ON COLUMN files.file_extension IS 'File extension (1-20 alphanumeric)'; +COMMENT ON COLUMN files.mime_type IS 'MIME type of the file'; +COMMENT ON COLUMN files.tags IS 'Classification tags (max 32)'; +COMMENT ON COLUMN files.source IS 'How the file was created (uploaded, imported, generated)'; +COMMENT ON COLUMN files.file_size_bytes IS 'File size in bytes'; +COMMENT ON COLUMN files.file_hash_sha256 IS 'SHA256 content hash'; +COMMENT ON COLUMN files.storage_path IS 'Storage system path'; +COMMENT ON COLUMN files.storage_bucket IS 'Storage bucket/container'; +COMMENT ON COLUMN files.metadata IS 'Extended metadata (JSON)'; +COMMENT ON COLUMN files.created_at IS 'Upload timestamp'; +COMMENT ON COLUMN files.updated_at IS 'Last modification timestamp'; +COMMENT ON COLUMN files.deleted_at IS 'Soft deletion timestamp'; + +-- Create file chunks table - Text chunks with vector embeddings for semantic search +CREATE TABLE file_chunks ( -- Primary identifiers id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- References - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, + file_id UUID NOT NULL REFERENCES files (id) ON DELETE CASCADE, -- Chunk position and content info chunk_index INTEGER NOT NULL DEFAULT 0, @@ -308,135 +160,63 @@ CREATE TABLE document_chunks ( content_size INTEGER NOT NULL DEFAULT 0, token_count INTEGER NOT NULL DEFAULT 0, - CONSTRAINT document_chunks_chunk_index_min CHECK (chunk_index >= 0), - CONSTRAINT document_chunks_content_sha256_length CHECK (octet_length(content_sha256) = 32), - CONSTRAINT document_chunks_content_size_min CHECK (content_size >= 0), - CONSTRAINT document_chunks_token_count_min CHECK (token_count >= 0), + CONSTRAINT file_chunks_chunk_index_min CHECK (chunk_index >= 0), + CONSTRAINT file_chunks_content_sha256_length CHECK (octet_length(content_sha256) = 32), + CONSTRAINT file_chunks_content_size_min CHECK (content_size >= 0), + CONSTRAINT file_chunks_token_count_min CHECK (token_count >= 0), -- Vector embedding (1536 dimensions for OpenAI ada-002, adjust as needed) embedding VECTOR(1536) NOT NULL, embedding_model TEXT NOT NULL, - CONSTRAINT document_chunks_embedding_model_format CHECK (embedding_model ~ '^[a-zA-Z0-9_\-:/\.]+$'), + CONSTRAINT file_chunks_embedding_model_format CHECK (embedding_model ~ '^[a-zA-Z0-9_\-:/\.]+$'), -- Chunk metadata (positions, page numbers, etc.) metadata JSONB NOT NULL DEFAULT '{}', - CONSTRAINT document_chunks_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), + CONSTRAINT file_chunks_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), -- Lifecycle timestamps created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - CONSTRAINT document_chunks_updated_after_created CHECK (updated_at >= created_at), + CONSTRAINT file_chunks_updated_after_created CHECK (updated_at >= created_at), -- Unique constraint on file + chunk index - CONSTRAINT document_chunks_file_chunk_unique UNIQUE (file_id, chunk_index) + CONSTRAINT file_chunks_file_chunk_unique UNIQUE (file_id, chunk_index) ); -- Set up automatic updated_at trigger -SELECT setup_updated_at('document_chunks'); +SELECT setup_updated_at('file_chunks'); --- Create indexes for document chunks -CREATE INDEX document_chunks_file_idx - ON document_chunks (file_id, chunk_index ASC); +-- Create indexes for file chunks +CREATE INDEX file_chunks_file_idx + ON file_chunks (file_id, chunk_index ASC); -CREATE INDEX document_chunks_embedded_idx - ON document_chunks (file_id) +CREATE INDEX file_chunks_embedded_idx + ON file_chunks (file_id) WHERE embedding IS NOT NULL; --- Create HNSW index for vector similarity search (L2 distance) -CREATE INDEX document_chunks_embedding_idx - ON document_chunks USING hnsw (embedding vector_cosine_ops) +-- Create HNSW index for vector similarity search (cosine distance) +CREATE INDEX file_chunks_embedding_idx + ON file_chunks USING hnsw (embedding vector_cosine_ops) WHERE embedding IS NOT NULL; -- Add table and column comments -COMMENT ON TABLE document_chunks IS - 'Text chunks extracted from document files with vector embeddings for semantic search.'; - -COMMENT ON COLUMN document_chunks.id IS 'Unique chunk identifier'; -COMMENT ON COLUMN document_chunks.file_id IS 'Parent document file reference'; -COMMENT ON COLUMN document_chunks.chunk_index IS 'Sequential index of chunk within file (0-based)'; -COMMENT ON COLUMN document_chunks.content_sha256 IS 'SHA-256 hash of chunk content'; -COMMENT ON COLUMN document_chunks.content_size IS 'Size of chunk content in bytes'; -COMMENT ON COLUMN document_chunks.token_count IS 'Approximate token count for the chunk'; -COMMENT ON COLUMN document_chunks.embedding IS 'Vector embedding (1536 dimensions)'; -COMMENT ON COLUMN document_chunks.embedding_model IS 'Model used to generate the embedding'; -COMMENT ON COLUMN document_chunks.metadata IS 'Extended metadata (positions, page numbers, etc.)'; -COMMENT ON COLUMN document_chunks.created_at IS 'Chunk creation timestamp'; -COMMENT ON COLUMN document_chunks.updated_at IS 'Last modification timestamp'; - --- Create document comments table - User discussions and annotations -CREATE TABLE document_comments ( - -- Primary identifiers - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - - -- Thread references - parent_comment_id UUID DEFAULT NULL REFERENCES document_comments (id) ON DELETE CASCADE, - reply_to_account_id UUID DEFAULT NULL REFERENCES accounts (id) ON DELETE SET NULL, - - -- Comment content - content TEXT NOT NULL, - - CONSTRAINT document_comments_content_length CHECK (length(trim(content)) BETWEEN 1 AND 10000), - - -- Metadata - metadata JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT document_comments_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), - - -- Lifecycle timestamps - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - deleted_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT document_comments_updated_after_created CHECK (updated_at >= created_at), - CONSTRAINT document_comments_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), - CONSTRAINT document_comments_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) -); - --- Set up automatic updated_at trigger -SELECT setup_updated_at('document_comments'); - --- Create indexes for document comments -CREATE INDEX document_comments_file_idx - ON document_comments (file_id, created_at DESC) - WHERE deleted_at IS NULL; - -CREATE INDEX document_comments_account_idx - ON document_comments (account_id, created_at DESC) - WHERE deleted_at IS NULL; - -CREATE INDEX document_comments_thread_idx - ON document_comments (parent_comment_id, created_at ASC) - WHERE parent_comment_id IS NOT NULL AND deleted_at IS NULL; - -CREATE INDEX document_comments_reply_to_idx - ON document_comments (reply_to_account_id, created_at DESC) - WHERE reply_to_account_id IS NOT NULL AND deleted_at IS NULL; - -CREATE INDEX document_comments_metadata_idx - ON document_comments USING gin (metadata) - WHERE deleted_at IS NULL; - --- Add table and column comments -COMMENT ON TABLE document_comments IS - 'User comments and discussions on files, supporting threaded conversations and @mentions.'; - -COMMENT ON COLUMN document_comments.id IS 'Unique comment identifier'; -COMMENT ON COLUMN document_comments.file_id IS 'Parent file reference'; -COMMENT ON COLUMN document_comments.account_id IS 'Comment author reference'; -COMMENT ON COLUMN document_comments.parent_comment_id IS 'Parent comment for threaded replies (NULL for top-level)'; -COMMENT ON COLUMN document_comments.reply_to_account_id IS 'Account being replied to (@mention)'; -COMMENT ON COLUMN document_comments.content IS 'Comment text content (1-10000 chars)'; -COMMENT ON COLUMN document_comments.metadata IS 'Extended metadata (JSON, 2B-4KB)'; -COMMENT ON COLUMN document_comments.created_at IS 'Comment creation timestamp'; -COMMENT ON COLUMN document_comments.updated_at IS 'Last edit timestamp'; -COMMENT ON COLUMN document_comments.deleted_at IS 'Soft deletion timestamp'; +COMMENT ON TABLE file_chunks IS + 'Text chunks extracted from files with vector embeddings for semantic search.'; + +COMMENT ON COLUMN file_chunks.id IS 'Unique chunk identifier'; +COMMENT ON COLUMN file_chunks.file_id IS 'Parent file reference'; +COMMENT ON COLUMN file_chunks.chunk_index IS 'Sequential index of chunk within file (0-based)'; +COMMENT ON COLUMN file_chunks.content_sha256 IS 'SHA-256 hash of chunk content'; +COMMENT ON COLUMN file_chunks.content_size IS 'Size of chunk content in bytes'; +COMMENT ON COLUMN file_chunks.token_count IS 'Approximate token count for the chunk'; +COMMENT ON COLUMN file_chunks.embedding IS 'Vector embedding (1536 dimensions)'; +COMMENT ON COLUMN file_chunks.embedding_model IS 'Model used to generate the embedding'; +COMMENT ON COLUMN file_chunks.metadata IS 'Extended metadata (positions, page numbers, etc.)'; +COMMENT ON COLUMN file_chunks.created_at IS 'Chunk creation timestamp'; +COMMENT ON COLUMN file_chunks.updated_at IS 'Last modification timestamp'; -- Create annotation type enum CREATE TYPE ANNOTATION_TYPE AS ENUM ( @@ -445,111 +225,70 @@ CREATE TYPE ANNOTATION_TYPE AS ENUM ( ); COMMENT ON TYPE ANNOTATION_TYPE IS - 'Type classification for document annotations.'; + 'Type classification for file annotations.'; --- Create document annotations table - Annotations for document content -CREATE TABLE document_annotations ( +-- Create file annotations table +CREATE TABLE file_annotations ( -- Primary identifiers id UUID PRIMARY KEY DEFAULT gen_random_uuid(), -- References - document_file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, + file_id UUID NOT NULL REFERENCES files (id) ON DELETE CASCADE, account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, -- Annotation content content TEXT NOT NULL, annotation_type ANNOTATION_TYPE NOT NULL DEFAULT 'annotation', - CONSTRAINT document_annotations_content_length CHECK (length(trim(content)) BETWEEN 1 AND 10000), + CONSTRAINT file_annotations_content_length CHECK (length(trim(content)) BETWEEN 1 AND 10000), - -- Metadata + -- Metadata (position, page, bounds, etc.) metadata JSONB NOT NULL DEFAULT '{}', - CONSTRAINT document_annotations_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), + CONSTRAINT file_annotations_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 4096), -- Lifecycle timestamps created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, deleted_at TIMESTAMPTZ DEFAULT NULL, - CONSTRAINT document_annotations_updated_after_created CHECK (updated_at >= created_at), - CONSTRAINT document_annotations_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), - CONSTRAINT document_annotations_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) + CONSTRAINT file_annotations_updated_after_created CHECK (updated_at >= created_at), + CONSTRAINT file_annotations_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at), + CONSTRAINT file_annotations_deleted_after_updated CHECK (deleted_at IS NULL OR deleted_at >= updated_at) ); -- Set up automatic updated_at trigger -SELECT setup_updated_at('document_annotations'); +SELECT setup_updated_at('file_annotations'); --- Create indexes for document annotations -CREATE INDEX document_annotations_file_idx - ON document_annotations (document_file_id, created_at DESC) +-- Create indexes for file annotations +CREATE INDEX file_annotations_file_idx + ON file_annotations (file_id, created_at DESC) WHERE deleted_at IS NULL; -CREATE INDEX document_annotations_account_idx - ON document_annotations (account_id, created_at DESC) +CREATE INDEX file_annotations_account_idx + ON file_annotations (account_id, created_at DESC) WHERE deleted_at IS NULL; -CREATE INDEX document_annotations_type_idx - ON document_annotations (annotation_type, document_file_id) +CREATE INDEX file_annotations_type_idx + ON file_annotations (annotation_type, file_id) WHERE deleted_at IS NULL; -- Add table and column comments -COMMENT ON TABLE document_annotations IS - 'User annotations and highlights on document content.'; - -COMMENT ON COLUMN document_annotations.id IS 'Unique annotation identifier'; -COMMENT ON COLUMN document_annotations.document_file_id IS 'Parent document file reference'; -COMMENT ON COLUMN document_annotations.account_id IS 'Annotation author reference'; -COMMENT ON COLUMN document_annotations.content IS 'Annotation text content (1-10000 chars)'; -COMMENT ON COLUMN document_annotations.annotation_type IS 'Type of annotation (note, highlight, etc.)'; -COMMENT ON COLUMN document_annotations.metadata IS 'Extended metadata including position/location (JSON, 2B-4KB)'; -COMMENT ON COLUMN document_annotations.created_at IS 'Annotation creation timestamp'; -COMMENT ON COLUMN document_annotations.updated_at IS 'Last edit timestamp'; -COMMENT ON COLUMN document_annotations.deleted_at IS 'Soft deletion timestamp'; - --- Create document processing summary view -CREATE VIEW document_processing_summary AS -SELECT - d.id, - d.display_name, - d.workspace_id, - COUNT(df.id) FILTER (WHERE df.deleted_at IS NULL) AS input_files_count, - d.created_at, - d.updated_at -FROM documents d - LEFT JOIN document_files df ON d.id = df.document_id -WHERE d.deleted_at IS NULL -GROUP BY d.id, d.display_name, d.workspace_id, d.created_at, d.updated_at; - -COMMENT ON VIEW document_processing_summary IS - 'Overview of document processing status, metrics, and costs.'; - --- Create processing queue view -CREATE VIEW processing_queue AS -SELECT - df.id, - df.document_id, - d.display_name AS document_name, - d.workspace_id, - df.display_name AS file_name, - df.require_mode, - df.processing_priority, - df.processing_status, - df.file_size_bytes, - df.created_at, - EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - df.created_at)) AS queue_time_seconds -FROM document_files df - JOIN documents d ON df.document_id = d.id -WHERE df.processing_status IN ('pending', 'processing') - AND df.deleted_at IS NULL - AND d.deleted_at IS NULL -ORDER BY df.processing_priority DESC, df.created_at ASC; - -COMMENT ON VIEW processing_queue IS - 'Files queued for processing, ordered by priority and age.'; +COMMENT ON TABLE file_annotations IS + 'User annotations and highlights on file content.'; + +COMMENT ON COLUMN file_annotations.id IS 'Unique annotation identifier'; +COMMENT ON COLUMN file_annotations.file_id IS 'Parent file reference'; +COMMENT ON COLUMN file_annotations.account_id IS 'Annotation author reference'; +COMMENT ON COLUMN file_annotations.content IS 'Annotation text content (1-10000 chars)'; +COMMENT ON COLUMN file_annotations.annotation_type IS 'Type of annotation (annotation, highlight)'; +COMMENT ON COLUMN file_annotations.metadata IS 'Extended metadata including position/location (JSON)'; +COMMENT ON COLUMN file_annotations.created_at IS 'Annotation creation timestamp'; +COMMENT ON COLUMN file_annotations.updated_at IS 'Last edit timestamp'; +COMMENT ON COLUMN file_annotations.deleted_at IS 'Soft deletion timestamp'; -- Create duplicate detection function -CREATE OR REPLACE FUNCTION find_duplicate_files(_document_id UUID DEFAULT NULL) +CREATE OR REPLACE FUNCTION find_duplicate_files(_workspace_id UUID DEFAULT NULL) RETURNS TABLE ( file_hash TEXT, file_size BIGINT, @@ -560,18 +299,18 @@ LANGUAGE plpgsql AS $$ BEGIN RETURN QUERY SELECT - ENCODE(df.file_hash_sha256, 'hex'), - df.file_size_bytes, + ENCODE(f.file_hash_sha256, 'hex'), + f.file_size_bytes, COUNT(*), - ARRAY_AGG(df.id) - FROM document_files df - WHERE (_document_id IS NULL OR df.document_id = _document_id) - AND df.deleted_at IS NULL - GROUP BY df.file_hash_sha256, df.file_size_bytes + ARRAY_AGG(f.id) + FROM files f + WHERE (_workspace_id IS NULL OR f.workspace_id = _workspace_id) + AND f.deleted_at IS NULL + GROUP BY f.file_hash_sha256, f.file_size_bytes HAVING COUNT(*) > 1 ORDER BY COUNT(*) DESC; END; $$; COMMENT ON FUNCTION find_duplicate_files(UUID) IS - 'Finds duplicate files by hash and size. Optionally scoped to a specific document.'; + 'Finds duplicate files by hash and size. Optionally scoped to a specific workspace.'; diff --git a/migrations/2026-01-09-002114_chat/down.sql b/migrations/2026-01-09-002114_chat/down.sql deleted file mode 100644 index 26897ab..0000000 --- a/migrations/2026-01-09-002114_chat/down.sql +++ /dev/null @@ -1,10 +0,0 @@ --- Revert chat migration - --- Drop tables in reverse order (respecting foreign key dependencies) -DROP TABLE IF EXISTS chat_operations; -DROP TABLE IF EXISTS chat_tool_calls; -DROP TABLE IF EXISTS chat_sessions; - --- Drop enums -DROP TYPE IF EXISTS CHAT_TOOL_STATUS; -DROP TYPE IF EXISTS CHAT_SESSION_STATUS; diff --git a/migrations/2026-01-09-002114_chat/up.sql b/migrations/2026-01-09-002114_chat/up.sql deleted file mode 100644 index cf5dae8..0000000 --- a/migrations/2026-01-09-002114_chat/up.sql +++ /dev/null @@ -1,202 +0,0 @@ --- Chat: LLM-powered document editing sessions and operations tracking - --- Chat session lifecycle status -CREATE TYPE CHAT_SESSION_STATUS AS ENUM ( - 'active', - 'paused', - 'archived' -); - -COMMENT ON TYPE CHAT_SESSION_STATUS IS - 'Lifecycle status for chat editing sessions.'; - --- Chat sessions table definition -CREATE TABLE chat_sessions ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, - account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, - primary_file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - - -- Session attributes - display_name TEXT NOT NULL DEFAULT 'Untitled Session', - session_status CHAT_SESSION_STATUS NOT NULL DEFAULT 'active', - - CONSTRAINT chat_sessions_display_name_length CHECK (length(trim(display_name)) BETWEEN 1 AND 255), - - -- Model configuration (model name, temperature, max tokens, etc.) - model_config JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT chat_sessions_model_config_size CHECK (length(model_config::TEXT) BETWEEN 2 AND 8192), - - -- Usage statistics - message_count INTEGER NOT NULL DEFAULT 0, - token_count INTEGER NOT NULL DEFAULT 0, - - CONSTRAINT chat_sessions_message_count_min CHECK (message_count >= 0), - CONSTRAINT chat_sessions_token_count_min CHECK (token_count >= 0), - - -- Lifecycle timestamps - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - - CONSTRAINT chat_sessions_updated_after_created CHECK (updated_at >= created_at) -); - --- Triggers for chat_sessions table -SELECT setup_updated_at('chat_sessions'); - --- Indexes for chat_sessions table -CREATE INDEX chat_sessions_workspace_idx - ON chat_sessions (workspace_id, created_at DESC); - -CREATE INDEX chat_sessions_account_idx - ON chat_sessions (account_id, created_at DESC); - -CREATE INDEX chat_sessions_file_idx - ON chat_sessions (primary_file_id); - -CREATE INDEX chat_sessions_status_idx - ON chat_sessions (session_status, workspace_id) - WHERE session_status = 'active'; - --- Comments for chat_sessions table -COMMENT ON TABLE chat_sessions IS - 'LLM-assisted document editing sessions.'; - -COMMENT ON COLUMN chat_sessions.id IS 'Unique session identifier'; -COMMENT ON COLUMN chat_sessions.workspace_id IS 'Reference to the workspace'; -COMMENT ON COLUMN chat_sessions.account_id IS 'Account that created the session'; -COMMENT ON COLUMN chat_sessions.primary_file_id IS 'Primary file being edited in this session'; -COMMENT ON COLUMN chat_sessions.display_name IS 'User-friendly session name (1-255 chars)'; -COMMENT ON COLUMN chat_sessions.session_status IS 'Session lifecycle status (active, paused, archived)'; -COMMENT ON COLUMN chat_sessions.model_config IS 'LLM configuration (model, temperature, etc.)'; -COMMENT ON COLUMN chat_sessions.message_count IS 'Total number of messages exchanged in this session'; -COMMENT ON COLUMN chat_sessions.token_count IS 'Total tokens used in this session'; -COMMENT ON COLUMN chat_sessions.created_at IS 'Timestamp when session was created'; -COMMENT ON COLUMN chat_sessions.updated_at IS 'Timestamp when session was last modified'; - --- Tool execution status -CREATE TYPE CHAT_TOOL_STATUS AS ENUM ( - 'pending', - 'running', - 'completed', - 'cancelled' -); - -COMMENT ON TYPE CHAT_TOOL_STATUS IS - 'Execution status for chat tool calls.'; - --- Chat tool calls table definition -CREATE TABLE chat_tool_calls ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - session_id UUID NOT NULL REFERENCES chat_sessions (id) ON DELETE CASCADE, - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - chunk_id UUID DEFAULT NULL REFERENCES document_chunks (id) ON DELETE SET NULL, - - -- Tool attributes - tool_name TEXT NOT NULL, - tool_input JSONB NOT NULL DEFAULT '{}', - tool_output JSONB NOT NULL DEFAULT '{}', - tool_status CHAT_TOOL_STATUS NOT NULL DEFAULT 'pending', - - CONSTRAINT chat_tool_calls_tool_name_length CHECK (length(trim(tool_name)) BETWEEN 1 AND 128), - CONSTRAINT chat_tool_calls_tool_input_size CHECK (length(tool_input::TEXT) BETWEEN 2 AND 65536), - CONSTRAINT chat_tool_calls_tool_output_size CHECK (length(tool_output::TEXT) BETWEEN 2 AND 65536), - - -- Timing - started_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - completed_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT chat_tool_calls_completed_after_started CHECK (completed_at IS NULL OR completed_at >= started_at) -); - --- Indexes for chat_tool_calls table -CREATE INDEX chat_tool_calls_session_idx - ON chat_tool_calls (session_id, started_at DESC); - -CREATE INDEX chat_tool_calls_file_idx - ON chat_tool_calls (file_id, started_at DESC); - -CREATE INDEX chat_tool_calls_status_idx - ON chat_tool_calls (tool_status, started_at DESC) - WHERE tool_status IN ('pending', 'running'); - -CREATE INDEX chat_tool_calls_tool_name_idx - ON chat_tool_calls (tool_name); - --- Comments for chat_tool_calls table -COMMENT ON TABLE chat_tool_calls IS - 'Tool invocations for debugging and usage tracking. Input/output contain references, not document content.'; - -COMMENT ON COLUMN chat_tool_calls.id IS 'Unique tool call identifier'; -COMMENT ON COLUMN chat_tool_calls.session_id IS 'Reference to the chat session'; -COMMENT ON COLUMN chat_tool_calls.file_id IS 'Reference to the file being operated on'; -COMMENT ON COLUMN chat_tool_calls.chunk_id IS 'Optional reference to a specific chunk'; -COMMENT ON COLUMN chat_tool_calls.tool_name IS 'Name of the tool (merge, split, redact, translate, etc.)'; -COMMENT ON COLUMN chat_tool_calls.tool_input IS 'Tool parameters as JSON (references, not content)'; -COMMENT ON COLUMN chat_tool_calls.tool_output IS 'Tool result as JSON (references, not content)'; -COMMENT ON COLUMN chat_tool_calls.tool_status IS 'Execution status (pending, running, completed, cancelled)'; -COMMENT ON COLUMN chat_tool_calls.started_at IS 'Timestamp when tool call was created/started'; -COMMENT ON COLUMN chat_tool_calls.completed_at IS 'Timestamp when tool execution completed'; - --- Chat operations table definition -CREATE TABLE chat_operations ( - -- Primary identifier - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- References - tool_call_id UUID NOT NULL REFERENCES chat_tool_calls (id) ON DELETE CASCADE, - file_id UUID NOT NULL REFERENCES document_files (id) ON DELETE CASCADE, - chunk_id UUID DEFAULT NULL REFERENCES document_chunks (id) ON DELETE SET NULL, - - -- Operation attributes - operation_type TEXT NOT NULL, - operation_diff JSONB NOT NULL DEFAULT '{}', - - CONSTRAINT chat_operations_operation_type_length CHECK (length(trim(operation_type)) BETWEEN 1 AND 64), - CONSTRAINT chat_operations_operation_diff_size CHECK (length(operation_diff::TEXT) BETWEEN 2 AND 131072), - - -- Application state - applied BOOLEAN NOT NULL DEFAULT FALSE, - reverted BOOLEAN NOT NULL DEFAULT FALSE, - - CONSTRAINT chat_operations_revert_requires_applied CHECK (NOT reverted OR applied), - - -- Timing - created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, - applied_at TIMESTAMPTZ DEFAULT NULL, - - CONSTRAINT chat_operations_applied_after_created CHECK (applied_at IS NULL OR applied_at >= created_at) -); - --- Indexes for chat_operations table -CREATE INDEX chat_operations_tool_call_idx - ON chat_operations (tool_call_id); - -CREATE INDEX chat_operations_file_idx - ON chat_operations (file_id, created_at DESC); - -CREATE INDEX chat_operations_pending_idx - ON chat_operations (file_id, applied) - WHERE NOT applied; - --- Comments for chat_operations table -COMMENT ON TABLE chat_operations IS - 'Document operations (diffs) produced by tool calls. Stores positions, not content.'; - -COMMENT ON COLUMN chat_operations.id IS 'Unique operation identifier'; -COMMENT ON COLUMN chat_operations.tool_call_id IS 'Reference to the tool call that produced this operation'; -COMMENT ON COLUMN chat_operations.file_id IS 'Reference to the file being modified'; -COMMENT ON COLUMN chat_operations.chunk_id IS 'Optional reference to a specific chunk'; -COMMENT ON COLUMN chat_operations.operation_type IS 'Type of operation (insert, replace, delete, format, merge, split, etc.)'; -COMMENT ON COLUMN chat_operations.operation_diff IS 'The diff specification as JSON (positions, not content)'; -COMMENT ON COLUMN chat_operations.applied IS 'Whether this operation has been applied to the document'; -COMMENT ON COLUMN chat_operations.reverted IS 'Whether this operation was reverted by the user'; -COMMENT ON COLUMN chat_operations.created_at IS 'Timestamp when operation was created'; -COMMENT ON COLUMN chat_operations.applied_at IS 'Timestamp when operation was applied'; diff --git a/migrations/2026-01-19-045012_pipelines/down.sql b/migrations/2026-01-19-045012_pipelines/down.sql new file mode 100644 index 0000000..fdd315c --- /dev/null +++ b/migrations/2026-01-19-045012_pipelines/down.sql @@ -0,0 +1,11 @@ +-- Revert pipeline tables + +DROP VIEW IF EXISTS pipeline_run_history; +DROP VIEW IF EXISTS active_pipeline_runs; + +DROP TABLE IF EXISTS pipeline_runs; +DROP TABLE IF EXISTS pipelines; + +DROP TYPE IF EXISTS PIPELINE_TRIGGER_TYPE; +DROP TYPE IF EXISTS PIPELINE_RUN_STATUS; +DROP TYPE IF EXISTS PIPELINE_STATUS; diff --git a/migrations/2026-01-19-045012_pipelines/up.sql b/migrations/2026-01-19-045012_pipelines/up.sql new file mode 100644 index 0000000..b0329f8 --- /dev/null +++ b/migrations/2026-01-19-045012_pipelines/up.sql @@ -0,0 +1,231 @@ +-- Pipeline: Workflow definitions and execution tracking +-- This migration creates tables for user-defined processing pipelines + +-- Pipeline status enum +CREATE TYPE PIPELINE_STATUS AS ENUM ( + 'draft', -- Pipeline is being configured + 'active', -- Pipeline is ready to run + 'disabled' -- Pipeline is disabled +); + +COMMENT ON TYPE PIPELINE_STATUS IS + 'Lifecycle status for pipeline definitions.'; + +-- Pipeline run status enum +CREATE TYPE PIPELINE_RUN_STATUS AS ENUM ( + 'queued', -- Run is waiting to start + 'running', -- Run is in progress + 'completed', -- Run finished successfully + 'failed', -- Run failed with error + 'cancelled' -- Run was cancelled by user +); + +COMMENT ON TYPE PIPELINE_RUN_STATUS IS + 'Execution status for pipeline runs.'; + +-- Pipeline run trigger type enum +CREATE TYPE PIPELINE_TRIGGER_TYPE AS ENUM ( + 'manual', -- Manually triggered by user + 'source', -- Triggered by source connector (upload, webhook, etc.) + 'scheduled' -- Triggered by schedule (future) +); + +COMMENT ON TYPE PIPELINE_TRIGGER_TYPE IS + 'How a pipeline run was initiated.'; + +-- Pipeline definitions table +CREATE TABLE pipelines ( + -- Primary identifier + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + + -- References + workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, + account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, + + -- Core attributes + name TEXT NOT NULL, + description TEXT DEFAULT NULL, + status PIPELINE_STATUS NOT NULL DEFAULT 'draft', + + CONSTRAINT pipelines_name_length CHECK (length(trim(name)) BETWEEN 1 AND 255), + CONSTRAINT pipelines_description_length CHECK (description IS NULL OR length(description) <= 4096), + + -- Pipeline definition (flexible JSONB structure) + -- Contains: steps[], input_schema, output_schema, variables, etc. + definition JSONB NOT NULL DEFAULT '{"steps": []}', + + CONSTRAINT pipelines_definition_size CHECK (length(definition::TEXT) BETWEEN 2 AND 1048576), + + -- Configuration + metadata JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT pipelines_metadata_size CHECK (length(metadata::TEXT) BETWEEN 2 AND 65536), + + -- Lifecycle timestamps + created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + updated_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + deleted_at TIMESTAMPTZ DEFAULT NULL, + + CONSTRAINT pipelines_updated_after_created CHECK (updated_at >= created_at), + CONSTRAINT pipelines_deleted_after_created CHECK (deleted_at IS NULL OR deleted_at >= created_at) +); + +-- Triggers +SELECT setup_updated_at('pipelines'); + +-- Indexes +CREATE INDEX pipelines_workspace_idx + ON pipelines (workspace_id, created_at DESC) + WHERE deleted_at IS NULL; + +CREATE INDEX pipelines_account_idx + ON pipelines (account_id, created_at DESC) + WHERE deleted_at IS NULL; + +CREATE INDEX pipelines_status_idx + ON pipelines (status, workspace_id) + WHERE deleted_at IS NULL; + +CREATE INDEX pipelines_name_trgm_idx + ON pipelines USING gin (name gin_trgm_ops) + WHERE deleted_at IS NULL; + +-- Comments +COMMENT ON TABLE pipelines IS + 'User-defined processing pipeline definitions with step configurations.'; + +COMMENT ON COLUMN pipelines.id IS 'Unique pipeline identifier'; +COMMENT ON COLUMN pipelines.workspace_id IS 'Parent workspace reference'; +COMMENT ON COLUMN pipelines.account_id IS 'Creator account reference'; +COMMENT ON COLUMN pipelines.name IS 'Pipeline name (1-255 chars)'; +COMMENT ON COLUMN pipelines.description IS 'Pipeline description (up to 4096 chars)'; +COMMENT ON COLUMN pipelines.status IS 'Pipeline lifecycle status'; +COMMENT ON COLUMN pipelines.definition IS 'Pipeline definition JSON (steps, input/output schemas, etc.)'; +COMMENT ON COLUMN pipelines.metadata IS 'Extended metadata'; +COMMENT ON COLUMN pipelines.created_at IS 'Creation timestamp'; +COMMENT ON COLUMN pipelines.updated_at IS 'Last modification timestamp'; +COMMENT ON COLUMN pipelines.deleted_at IS 'Soft deletion timestamp'; + +-- Pipeline runs table (execution instances) +CREATE TABLE pipeline_runs ( + -- Primary identifier + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + + -- References + pipeline_id UUID NOT NULL REFERENCES pipelines (id) ON DELETE CASCADE, + workspace_id UUID NOT NULL REFERENCES workspaces (id) ON DELETE CASCADE, + account_id UUID NOT NULL REFERENCES accounts (id) ON DELETE CASCADE, + + -- Run attributes + trigger_type PIPELINE_TRIGGER_TYPE NOT NULL DEFAULT 'manual', + status PIPELINE_RUN_STATUS NOT NULL DEFAULT 'queued', + + -- Input/output configuration for this run + input_config JSONB NOT NULL DEFAULT '{}', + output_config JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT pipeline_runs_input_config_size CHECK (length(input_config::TEXT) BETWEEN 2 AND 262144), + CONSTRAINT pipeline_runs_output_config_size CHECK (length(output_config::TEXT) BETWEEN 2 AND 262144), + + -- Snapshot of pipeline definition at run time (for reproducibility) + definition_snapshot JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT pipeline_runs_definition_snapshot_size CHECK (length(definition_snapshot::TEXT) BETWEEN 2 AND 1048576), + + -- Error details (if failed) + error JSONB DEFAULT NULL, + + CONSTRAINT pipeline_runs_error_size CHECK (error IS NULL OR length(error::TEXT) <= 65536), + + -- Metrics + metrics JSONB NOT NULL DEFAULT '{}', + + CONSTRAINT pipeline_runs_metrics_size CHECK (length(metrics::TEXT) BETWEEN 2 AND 65536), + + -- Timing + started_at TIMESTAMPTZ DEFAULT NULL, + completed_at TIMESTAMPTZ DEFAULT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT current_timestamp, + + CONSTRAINT pipeline_runs_started_after_created CHECK (started_at IS NULL OR started_at >= created_at), + CONSTRAINT pipeline_runs_completed_after_started CHECK (completed_at IS NULL OR (started_at IS NOT NULL AND completed_at >= started_at)) +); + +-- Indexes +CREATE INDEX pipeline_runs_pipeline_idx + ON pipeline_runs (pipeline_id, created_at DESC); + +CREATE INDEX pipeline_runs_workspace_idx + ON pipeline_runs (workspace_id, created_at DESC); + +CREATE INDEX pipeline_runs_account_idx + ON pipeline_runs (account_id, created_at DESC); + +CREATE INDEX pipeline_runs_status_idx + ON pipeline_runs (status, created_at DESC) + WHERE status IN ('queued', 'running'); + +CREATE INDEX pipeline_runs_trigger_idx + ON pipeline_runs (trigger_type, workspace_id); + +-- Comments +COMMENT ON TABLE pipeline_runs IS + 'Pipeline execution instances with status tracking and metrics.'; + +COMMENT ON COLUMN pipeline_runs.id IS 'Unique run identifier'; +COMMENT ON COLUMN pipeline_runs.pipeline_id IS 'Reference to pipeline definition'; +COMMENT ON COLUMN pipeline_runs.workspace_id IS 'Parent workspace reference'; +COMMENT ON COLUMN pipeline_runs.account_id IS 'Account that triggered the run'; +COMMENT ON COLUMN pipeline_runs.trigger_type IS 'How the run was initiated'; +COMMENT ON COLUMN pipeline_runs.status IS 'Current execution status'; +COMMENT ON COLUMN pipeline_runs.input_config IS 'Runtime input configuration'; +COMMENT ON COLUMN pipeline_runs.output_config IS 'Runtime output configuration'; +COMMENT ON COLUMN pipeline_runs.definition_snapshot IS 'Pipeline definition snapshot at run time'; +COMMENT ON COLUMN pipeline_runs.error IS 'Error details if run failed'; +COMMENT ON COLUMN pipeline_runs.metrics IS 'Run metrics (duration, resources, etc.)'; +COMMENT ON COLUMN pipeline_runs.started_at IS 'When execution started'; +COMMENT ON COLUMN pipeline_runs.completed_at IS 'When execution completed'; +COMMENT ON COLUMN pipeline_runs.created_at IS 'When run was created/queued'; + +-- View for active pipeline runs +CREATE VIEW active_pipeline_runs AS +SELECT + pr.id, + pr.pipeline_id, + p.name AS pipeline_name, + pr.workspace_id, + pr.account_id, + pr.trigger_type, + pr.status, + pr.started_at, + pr.created_at, + EXTRACT(EPOCH FROM (COALESCE(pr.completed_at, current_timestamp) - pr.started_at)) AS duration_seconds +FROM pipeline_runs pr + JOIN pipelines p ON pr.pipeline_id = p.id +WHERE pr.status IN ('queued', 'running') +ORDER BY pr.created_at DESC; + +COMMENT ON VIEW active_pipeline_runs IS + 'Currently active pipeline runs with progress information.'; + +-- View for pipeline run history +CREATE VIEW pipeline_run_history AS +SELECT + pr.id, + pr.pipeline_id, + p.name AS pipeline_name, + pr.workspace_id, + pr.trigger_type, + pr.status, + pr.started_at, + pr.completed_at, + EXTRACT(EPOCH FROM (pr.completed_at - pr.started_at)) AS duration_seconds, + pr.error IS NOT NULL AS has_error, + pr.created_at +FROM pipeline_runs pr + JOIN pipelines p ON pr.pipeline_id = p.id +WHERE pr.status IN ('completed', 'failed', 'cancelled') +ORDER BY pr.completed_at DESC; + +COMMENT ON VIEW pipeline_run_history IS + 'Completed pipeline runs for history and analytics.'; From a632410b1f76b84de2fd552bd5e6bd774e038cf5 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 19 Jan 2026 11:18:00 +0100 Subject: [PATCH 05/28] feat(opendal, vector): add nvisy-vector crate, reorganize backend configs - Create nvisy-vector crate with qdrant, milvus, pinecone, pgvector backends - Reorganize nvisy-opendal with folder-per-backend structure (s3, gcs, azblob, gdrive, dropbox, onedrive) - Remove all feature gates from nvisy-opendal and nvisy-vector - Remove prelude modules from nvisy-core, nvisy-opendal, nvisy-vector, nvisy-runtime - Milvus and Pinecone backends are stub implementations pending SDK API alignment --- .gitignore | 1 - CONTRIBUTING.md | 17 - Cargo.lock | 381 ++++++++++++-- Cargo.toml | 16 +- README.md | 3 - crates/nvisy-core/src/lib.rs | 2 - crates/nvisy-core/src/prelude.rs | 4 - crates/nvisy-opendal/Cargo.toml | 34 +- crates/nvisy-opendal/src/azblob/config.rs | 42 ++ crates/nvisy-opendal/src/azblob/mod.rs | 5 + crates/nvisy-opendal/src/backend.rs | 249 ++++----- crates/nvisy-opendal/src/config.rs | 256 +-------- crates/nvisy-opendal/src/dropbox/config.rs | 58 ++ crates/nvisy-opendal/src/dropbox/mod.rs | 5 + crates/nvisy-opendal/src/gcs/config.rs | 39 ++ crates/nvisy-opendal/src/gcs/mod.rs | 5 + crates/nvisy-opendal/src/gdrive/config.rs | 58 ++ crates/nvisy-opendal/src/gdrive/mod.rs | 5 + crates/nvisy-opendal/src/lib.rs | 15 +- crates/nvisy-opendal/src/onedrive/config.rs | 58 ++ crates/nvisy-opendal/src/onedrive/mod.rs | 5 + crates/nvisy-opendal/src/prelude.rs | 5 - crates/nvisy-opendal/src/s3/config.rs | 61 +++ crates/nvisy-opendal/src/s3/mod.rs | 5 + .../src/model/account_api_token.rs | 6 +- .../src/model/account_notification.rs | 3 +- crates/nvisy-postgres/src/model/mod.rs | 3 - .../src/model/workspace_invite.rs | 3 +- crates/nvisy-postgres/src/query/mod.rs | 3 - crates/nvisy-postgres/src/types/enums/mod.rs | 7 +- crates/nvisy-rig/src/rag/indexer/mod.rs | 1 - crates/nvisy-rig/src/rag/searcher/mod.rs | 1 - crates/nvisy-runtime/Cargo.toml | 10 +- crates/nvisy-runtime/src/engine/config.rs | 8 +- crates/nvisy-runtime/src/engine/executor.rs | 3 +- crates/nvisy-runtime/src/graph/mod.rs | 4 +- crates/nvisy-runtime/src/graph/workflow.rs | 497 ++++++++++++------ crates/nvisy-runtime/src/lib.rs | 3 - crates/nvisy-runtime/src/node/data.rs | 237 ++------- crates/nvisy-runtime/src/node/input/config.rs | 3 + crates/nvisy-runtime/src/node/input/mod.rs | 48 ++ crates/nvisy-runtime/src/node/mod.rs | 12 +- .../nvisy-runtime/src/node/output/config.rs | 30 ++ crates/nvisy-runtime/src/node/output/mod.rs | 48 ++ .../src/node/transformer/chunking.rs | 288 ++++++++++ .../src/node/transformer/config.rs | 93 ++++ .../src/node/transformer/document.rs | 104 ++++ .../src/node/transformer/embedding.rs | 270 ++++++++++ .../src/node/transformer/extraction.rs | 136 +++++ .../nvisy-runtime/src/node/transformer/mod.rs | 88 ++++ .../src/node/transformer/processing.rs | 131 +++++ .../src/node/transformer/quality.rs | 147 ++++++ .../src/node/transformer/routing.rs | 134 +++++ crates/nvisy-runtime/src/prelude.rs | 14 - crates/nvisy-runtime/src/runtime/mod.rs | 4 +- crates/nvisy-vector/Cargo.toml | 48 ++ crates/nvisy-vector/README.md | 42 ++ crates/nvisy-vector/src/config.rs | 36 ++ crates/nvisy-vector/src/error.rs | 99 ++++ crates/nvisy-vector/src/lib.rs | 22 + crates/nvisy-vector/src/milvus/backend.rs | 124 +++++ crates/nvisy-vector/src/milvus/config.rs | 82 +++ crates/nvisy-vector/src/milvus/mod.rs | 7 + crates/nvisy-vector/src/pgvector/backend.rs | 278 ++++++++++ crates/nvisy-vector/src/pgvector/config.rs | 91 ++++ crates/nvisy-vector/src/pgvector/mod.rs | 7 + crates/nvisy-vector/src/pinecone/backend.rs | 124 +++++ crates/nvisy-vector/src/pinecone/config.rs | 49 ++ crates/nvisy-vector/src/pinecone/mod.rs | 7 + crates/nvisy-vector/src/qdrant/backend.rs | 331 ++++++++++++ crates/nvisy-vector/src/qdrant/config.rs | 59 +++ crates/nvisy-vector/src/qdrant/mod.rs | 7 + crates/nvisy-vector/src/store.rs | 265 ++++++++++ 73 files changed, 4482 insertions(+), 864 deletions(-) delete mode 100644 crates/nvisy-core/src/prelude.rs create mode 100644 crates/nvisy-opendal/src/azblob/config.rs create mode 100644 crates/nvisy-opendal/src/azblob/mod.rs create mode 100644 crates/nvisy-opendal/src/dropbox/config.rs create mode 100644 crates/nvisy-opendal/src/dropbox/mod.rs create mode 100644 crates/nvisy-opendal/src/gcs/config.rs create mode 100644 crates/nvisy-opendal/src/gcs/mod.rs create mode 100644 crates/nvisy-opendal/src/gdrive/config.rs create mode 100644 crates/nvisy-opendal/src/gdrive/mod.rs create mode 100644 crates/nvisy-opendal/src/onedrive/config.rs create mode 100644 crates/nvisy-opendal/src/onedrive/mod.rs delete mode 100644 crates/nvisy-opendal/src/prelude.rs create mode 100644 crates/nvisy-opendal/src/s3/config.rs create mode 100644 crates/nvisy-opendal/src/s3/mod.rs create mode 100644 crates/nvisy-runtime/src/node/input/config.rs create mode 100644 crates/nvisy-runtime/src/node/input/mod.rs create mode 100644 crates/nvisy-runtime/src/node/output/config.rs create mode 100644 crates/nvisy-runtime/src/node/output/mod.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/chunking.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/config.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/document.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/embedding.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/extraction.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/mod.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/processing.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/quality.rs create mode 100644 crates/nvisy-runtime/src/node/transformer/routing.rs delete mode 100644 crates/nvisy-runtime/src/prelude.rs create mode 100644 crates/nvisy-vector/Cargo.toml create mode 100644 crates/nvisy-vector/README.md create mode 100644 crates/nvisy-vector/src/config.rs create mode 100644 crates/nvisy-vector/src/error.rs create mode 100644 crates/nvisy-vector/src/lib.rs create mode 100644 crates/nvisy-vector/src/milvus/backend.rs create mode 100644 crates/nvisy-vector/src/milvus/config.rs create mode 100644 crates/nvisy-vector/src/milvus/mod.rs create mode 100644 crates/nvisy-vector/src/pgvector/backend.rs create mode 100644 crates/nvisy-vector/src/pgvector/config.rs create mode 100644 crates/nvisy-vector/src/pgvector/mod.rs create mode 100644 crates/nvisy-vector/src/pinecone/backend.rs create mode 100644 crates/nvisy-vector/src/pinecone/config.rs create mode 100644 crates/nvisy-vector/src/pinecone/mod.rs create mode 100644 crates/nvisy-vector/src/qdrant/backend.rs create mode 100644 crates/nvisy-vector/src/qdrant/config.rs create mode 100644 crates/nvisy-vector/src/qdrant/mod.rs create mode 100644 crates/nvisy-vector/src/store.rs diff --git a/.gitignore b/.gitignore index f8a71f2..412cca2 100644 --- a/.gitignore +++ b/.gitignore @@ -28,7 +28,6 @@ crates/nvisy-postgres/src/schema.rs.bak # Build output dist/ build/ -output/ # Environment files .env* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3f45705..1de2919 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,23 +17,6 @@ make install-all make generate-keys ``` -### SSH Access - -Some dependencies are fetched from private GitHub repositories via SSH. Ensure -your SSH key is added to your GitHub account and ssh-agent is running: - -```bash -eval "$(ssh-agent -s)" -ssh-add ~/.ssh/id_ed25519 -ssh -T git@github.com # verify access -``` - -If cargo fails to fetch git dependencies, enable CLI-based git fetching: - -```bash -export CARGO_NET_GIT_FETCH_WITH_CLI=true -``` - ## Development Run all CI checks locally before submitting a pull request: diff --git a/Cargo.lock b/Cargo.lock index 8ae0e9b..1027c19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -57,12 +57,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6966317188cdfe54c58c0900a195d021294afb3ece9b7073d09e4018dbb1e3a2" dependencies = [ "aide-macros", - "axum", + "axum 0.8.8", "axum-extra 0.10.3", "bytes", "cfg-if", "http", - "indexmap", + "indexmap 2.12.1", "schemars 0.9.0", "serde", "serde_json", @@ -237,7 +237,7 @@ dependencies = [ "rand 0.8.5", "regex", "ring", - "rustls-native-certs", + "rustls-native-certs 0.7.3", "rustls-pemfile", "rustls-webpki 0.102.8", "serde", @@ -336,13 +336,40 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core 0.4.5", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "itoa", + "matchit 0.7.3", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + [[package]] name = "axum" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ - "axum-core", + "axum-core 0.5.5", "axum-macros", "bytes", "form_urlencoded", @@ -353,7 +380,7 @@ dependencies = [ "hyper", "hyper-util", "itoa", - "matchit", + "matchit 0.8.4", "memchr", "mime", "multer", @@ -365,7 +392,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tower", + "tower 0.5.2", "tower-layer", "tower-service", "tracing", @@ -377,11 +404,31 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f08a543641554404b42acd0d2494df12ca2be034d7b8ee4dbbf7446f940a2ef" dependencies = [ - "axum", + "axum 0.8.8", "client-ip", "serde", ] +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", +] + [[package]] name = "axum-core" version = "0.5.5" @@ -407,8 +454,8 @@ version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9963ff19f40c6102c76756ef0a46004c0d58957d87259fc9208ff8441c12ab96" dependencies = [ - "axum", - "axum-core", + "axum 0.8.8", + "axum-core 0.5.5", "bytes", "futures-util", "http", @@ -429,8 +476,8 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fef252edff26ddba56bbcdf2ee3307b8129acb86f5749b68990c168a6fcc9c76" dependencies = [ - "axum", - "axum-core", + "axum 0.8.8", + "axum-core 0.5.5", "bytes", "form_urlencoded", "futures-core", @@ -489,7 +536,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf48df8b4be768081e11b7bb6d50e7dd96a3616b0b728f9e8d49bfbd8116f3c6" dependencies = [ "anyhow", - "axum", + "axum 0.8.8", "bytes", "bytesize", "cookie", @@ -507,7 +554,7 @@ dependencies = [ "serde_urlencoded", "smallvec", "tokio", - "tower", + "tower 0.5.2", "url", ] @@ -898,6 +945,16 @@ dependencies = [ "libc", ] +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1590,6 +1647,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flate2" version = "1.1.5" @@ -1607,6 +1670,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -1828,19 +1897,34 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap", + "indexmap 2.12.1", "slab", "tokio", "tokio-util", "tracing", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + [[package]] name = "hashbrown" version = "0.16.1" @@ -2033,6 +2117,19 @@ dependencies = [ "webpki-roots 1.0.4", ] +[[package]] +name = "hyper-timeout" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" +dependencies = [ + "hyper", + "hyper-util", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.19" @@ -2051,7 +2148,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.1", "system-configuration", "tokio", "tower-service", @@ -2237,6 +2334,16 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" version = "2.12.1" @@ -2552,6 +2659,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "matchit" version = "0.8.4" @@ -2831,7 +2944,7 @@ name = "nvisy-cli" version = "0.1.0" dependencies = [ "anyhow", - "axum", + "axum 0.8.8", "axum-server", "clap", "dotenvy", @@ -2955,7 +3068,7 @@ dependencies = [ [[package]] name = "nvisy-rt-archive" version = "0.1.0" -source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "bytes", "bzip2", @@ -2974,7 +3087,7 @@ dependencies = [ [[package]] name = "nvisy-rt-core" version = "0.1.0" -source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "bytes", "derive_more", @@ -2992,7 +3105,7 @@ dependencies = [ [[package]] name = "nvisy-rt-document" version = "0.1.0" -source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "async-trait", "base64", @@ -3011,7 +3124,7 @@ dependencies = [ [[package]] name = "nvisy-rt-docx" version = "0.1.0" -source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "async-trait", "bytes", @@ -3022,7 +3135,7 @@ dependencies = [ [[package]] name = "nvisy-rt-engine" version = "0.1.0" -source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "bytes", "jiff", @@ -3040,7 +3153,7 @@ dependencies = [ [[package]] name = "nvisy-rt-image" version = "0.1.0" -source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "async-trait", "bytes", @@ -3051,7 +3164,7 @@ dependencies = [ [[package]] name = "nvisy-rt-pdf" version = "0.1.0" -source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "async-trait", "bytes", @@ -3062,7 +3175,7 @@ dependencies = [ [[package]] name = "nvisy-rt-text" version = "0.1.0" -source = "git+ssh://git@github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" +source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "async-trait", "bytes", @@ -3084,6 +3197,8 @@ dependencies = [ "nvisy-opendal", "nvisy-rt-core", "nvisy-rt-engine", + "petgraph", + "semver", "serde", "serde_json", "thiserror 2.0.17", @@ -3100,7 +3215,7 @@ dependencies = [ "anyhow", "argon2", "async-trait", - "axum", + "axum 0.8.8", "axum-client-ip", "axum-extra 0.12.5", "axum-test", @@ -3129,7 +3244,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tower", + "tower 0.5.2", "tower-http", "tracing", "tracing-subscriber", @@ -3140,6 +3255,23 @@ dependencies = [ "zxcvbn", ] +[[package]] +name = "nvisy-vector" +version = "0.1.0" +dependencies = [ + "async-trait", + "derive_more", + "futures", + "nvisy-core", + "qdrant-client", + "reqwest", + "serde", + "serde_json", + "thiserror 2.0.17", + "tokio", + "tracing", +] + [[package]] name = "nvisy-webhook" version = "0.1.0" @@ -3220,6 +3352,12 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" +[[package]] +name = "openssl-probe" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" + [[package]] name = "ordered-float" version = "5.1.0" @@ -3314,6 +3452,19 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap 2.12.1", + "serde", + "serde_derive", +] + [[package]] name = "pgtrgm" version = "0.3.0" @@ -3567,6 +3718,60 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools 0.14.0", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +dependencies = [ + "prost", +] + +[[package]] +name = "qdrant-client" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76499f3e8385dae785d65a0216e0dfa8fadaddd18038adf04f438631683b26a" +dependencies = [ + "anyhow", + "derive_builder", + "futures", + "futures-util", + "parking_lot", + "prost", + "prost-types", + "reqwest", + "semver", + "serde", + "serde_json", + "thiserror 1.0.69", + "tokio", + "tonic", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -3590,7 +3795,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2", + "socket2 0.6.1", "thiserror 2.0.17", "tokio", "tracing", @@ -3627,7 +3832,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2", + "socket2 0.6.1", "tracing", "windows-sys 0.60.2", ] @@ -3856,7 +4061,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-util", - "tower", + "tower 0.5.2", "tower-http", "tower-service", "url", @@ -4008,6 +4213,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "aws-lc-rs", + "log", "once_cell", "ring", "rustls-pki-types", @@ -4022,11 +4228,23 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" dependencies = [ - "openssl-probe", + "openssl-probe 0.1.6", "rustls-pemfile", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 2.11.1", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +dependencies = [ + "openssl-probe 0.2.0", + "rustls-pki-types", + "schannel", + "security-framework 3.5.1", ] [[package]] @@ -4107,7 +4325,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" dependencies = [ "dyn-clone", - "indexmap", + "indexmap 2.12.1", "jiff", "ref-cast", "schemars_derive 0.9.0", @@ -4192,7 +4410,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ "bitflags", - "core-foundation", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework" +version = "3.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +dependencies = [ + "bitflags", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -4213,6 +4444,10 @@ name = "semver" version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +dependencies = [ + "serde", + "serde_core", +] [[package]] name = "serde" @@ -4262,7 +4497,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2f2d7ff8a2140333718bb329f5c40fc5f0865b84c426183ce14c97d2ab8154f" dependencies = [ "form_urlencoded", - "indexmap", + "indexmap 2.12.1", "itoa", "ryu", "serde_core", @@ -4307,7 +4542,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b417bedc008acbdf6d6b4bc482d29859924114bbe2650b7921fb68a261d0aa6" dependencies = [ - "axum", + "axum 0.8.8", "futures", "percent-encoding", "serde", @@ -4450,6 +4685,16 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.1" @@ -4564,7 +4809,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ "bitflags", - "core-foundation", + "core-foundation 0.9.4", "system-configuration-sys", ] @@ -4751,7 +4996,7 @@ dependencies = [ "mio", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.1", "tokio-macros", "windows-sys 0.61.2", ] @@ -4787,7 +5032,7 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.9.2", - "socket2", + "socket2 0.6.1", "tokio", "tokio-util", "whoami", @@ -4879,6 +5124,60 @@ dependencies = [ "winnow", ] +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-stream", + "async-trait", + "axum 0.7.9", + "base64", + "bytes", + "flate2", + "h2", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", + "rustls-native-certs 0.8.3", + "rustls-pemfile", + "socket2 0.5.10", + "tokio", + "tokio-rustls", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.5.2" @@ -4888,7 +5187,7 @@ dependencies = [ "futures-core", "futures-util", "hdrhistogram", - "indexmap", + "indexmap 2.12.1", "pin-project-lite", "slab", "sync_wrapper", @@ -4923,7 +5222,7 @@ dependencies = [ "pin-project-lite", "tokio", "tokio-util", - "tower", + "tower 0.5.2", "tower-layer", "tower-service", "tracing", @@ -5791,7 +6090,7 @@ dependencies = [ "generic-array", "getrandom 0.3.4", "hmac", - "indexmap", + "indexmap 2.12.1", "lzma-rust2", "memchr", "pbkdf2", diff --git a/Cargo.toml b/Cargo.toml index e34bb3f..92cc6da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "./crates/nvisy-rig", "./crates/nvisy-runtime", "./crates/nvisy-server", + "./crates/nvisy-vector", "./crates/nvisy-webhook", ] @@ -40,13 +41,14 @@ nvisy-postgres = { path = "./crates/nvisy-postgres", version = "0.1.0" } nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" } nvisy-runtime = { path = "./crates/nvisy-runtime", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } +nvisy-vector = { path = "./crates/nvisy-vector", version = "0.1.0" } nvisy-webhook = { path = "./crates/nvisy-webhook", version = "0.1.0" } # Runtime crates (from github.com/nvisycom/runtime) -nvisy-rt-archive = { git = "ssh://git@github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } -nvisy-rt-core = { git = "ssh://git@github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } -nvisy-rt-document = { git = "ssh://git@github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } -nvisy-rt-engine = { git = "ssh://git@github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-archive = { git = "https://github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-core = { git = "https://github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-document = { git = "https://github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } +nvisy-rt-engine = { git = "https://github.com/nvisycom/runtime.git", branch = "feature/prerelease", version = "0.1.0" } # CLI clap = { version = "4.5", features = [] } @@ -139,3 +141,9 @@ rig-core = { version = "0.28", default-features = false, features = ["reqwest-ru # Storage abstraction opendal = { version = "0.53", features = [] } chrono = { version = "0.4", features = ["serde"] } + +# Graph data structures +petgraph = { version = "0.8", features = ["serde-1"] } + +# Versioning +semver = { version = "1.0", features = ["serde"] } diff --git a/README.md b/README.md index 6c39e39..d3b48f7 100644 --- a/README.md +++ b/README.md @@ -43,9 +43,6 @@ make generate-all cargo run --features dotenv ``` -> **Note**: Some dependencies require SSH access to private GitHub repositories. -> See [CONTRIBUTING.md](CONTRIBUTING.md#ssh-access) for setup instructions. - ## Configuration See [.env.example](.env.example) for all available environment variables. diff --git a/crates/nvisy-core/src/lib.rs b/crates/nvisy-core/src/lib.rs index 9ba7bf3..98d8fcd 100644 --- a/crates/nvisy-core/src/lib.rs +++ b/crates/nvisy-core/src/lib.rs @@ -3,8 +3,6 @@ #![doc = include_str!("../README.md")] mod error; -#[doc(hidden)] -pub mod prelude; pub mod types; pub use error::{BoxedError, Error, ErrorKind, Result}; diff --git a/crates/nvisy-core/src/prelude.rs b/crates/nvisy-core/src/prelude.rs deleted file mode 100644 index fb04549..0000000 --- a/crates/nvisy-core/src/prelude.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Convenient re-exports for common use. - -pub use crate::error::{BoxedError, Error, ErrorKind, Result}; -pub use crate::types::{ServiceHealth, ServiceStatus, Timing}; diff --git a/crates/nvisy-opendal/Cargo.toml b/crates/nvisy-opendal/Cargo.toml index 55ed6a2..17b9ff4 100644 --- a/crates/nvisy-opendal/Cargo.toml +++ b/crates/nvisy-opendal/Cargo.toml @@ -18,31 +18,6 @@ documentation = { workspace = true } all-features = true rustdoc-args = ["--cfg", "docsrs"] -[features] -# Default feature set (none for minimal dependencies) -default = [] - -# Amazon S3 storage backend -s3 = ["opendal/services-s3"] - -# Google Cloud Storage backend -gcs = ["opendal/services-gcs"] - -# Azure Blob Storage backend -azblob = ["opendal/services-azblob"] - -# Google Drive backend -gdrive = ["opendal/services-gdrive"] - -# Dropbox backend -dropbox = ["opendal/services-dropbox"] - -# OneDrive backend -onedrive = ["opendal/services-onedrive"] - -# All storage backends -all-backends = ["s3", "gcs", "azblob", "gdrive", "dropbox", "onedrive"] - [dependencies] # Internal crates nvisy-core = { workspace = true } @@ -52,7 +27,14 @@ tokio = { workspace = true, features = ["rt", "sync", "io-util"] } futures = { workspace = true, features = [] } # Storage -opendal = { workspace = true, features = [] } +opendal = { workspace = true, features = [ + "services-s3", + "services-gcs", + "services-azblob", + "services-gdrive", + "services-dropbox", + "services-onedrive", +] } # Observability tracing = { workspace = true, features = [] } diff --git a/crates/nvisy-opendal/src/azblob/config.rs b/crates/nvisy-opendal/src/azblob/config.rs new file mode 100644 index 0000000..f2ef5d4 --- /dev/null +++ b/crates/nvisy-opendal/src/azblob/config.rs @@ -0,0 +1,42 @@ +//! Azure Blob Storage configuration. + +use serde::{Deserialize, Serialize}; + +/// Azure Blob Storage configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct AzureBlobConfig { + /// Container name. + pub container: String, + /// Storage account name. + pub account_name: String, + /// Storage account key. + #[serde(skip_serializing_if = "Option::is_none")] + pub account_key: Option, + /// Path prefix within the container. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl AzureBlobConfig { + /// Creates a new Azure Blob configuration. + pub fn new(container: impl Into, account_name: impl Into) -> Self { + Self { + container: container.into(), + account_name: account_name.into(), + account_key: None, + prefix: None, + } + } + + /// Sets the account key. + pub fn with_account_key(mut self, account_key: impl Into) -> Self { + self.account_key = Some(account_key.into()); + self + } + + /// Sets the path prefix. + pub fn with_prefix(mut self, prefix: impl Into) -> Self { + self.prefix = Some(prefix.into()); + self + } +} diff --git a/crates/nvisy-opendal/src/azblob/mod.rs b/crates/nvisy-opendal/src/azblob/mod.rs new file mode 100644 index 0000000..e13cd1e --- /dev/null +++ b/crates/nvisy-opendal/src/azblob/mod.rs @@ -0,0 +1,5 @@ +//! Azure Blob Storage backend. + +mod config; + +pub use config::AzureBlobConfig; diff --git a/crates/nvisy-opendal/src/backend.rs b/crates/nvisy-opendal/src/backend.rs index b4294c8..0a810e6 100644 --- a/crates/nvisy-opendal/src/backend.rs +++ b/crates/nvisy-opendal/src/backend.rs @@ -3,8 +3,14 @@ use opendal::{Operator, services}; use crate::TRACING_TARGET; -use crate::config::{BackendType, StorageConfig}; +use crate::azblob::AzureBlobConfig; +use crate::config::StorageConfig; +use crate::dropbox::DropboxConfig; use crate::error::{StorageError, StorageResult}; +use crate::gcs::GcsConfig; +use crate::gdrive::GoogleDriveConfig; +use crate::onedrive::OneDriveConfig; +use crate::s3::S3Config; /// Unified storage backend that wraps OpenDAL operators. #[derive(Clone)] @@ -20,8 +26,7 @@ impl StorageBackend { tracing::info!( target: TRACING_TARGET, - backend = ?config.backend_type, - root = %config.root, + backend = %config.backend_name(), "Storage backend initialized" ); @@ -33,9 +38,9 @@ impl StorageBackend { &self.config } - /// Returns the backend type. - pub fn backend_type(&self) -> &BackendType { - &self.config.backend_type + /// Returns the backend name. + pub fn backend_name(&self) -> &'static str { + self.config.backend_name() } /// Reads a file from storage. @@ -156,120 +161,127 @@ impl StorageBackend { } /// Creates an OpenDAL operator based on configuration. - #[allow(unreachable_patterns)] fn create_operator(config: &StorageConfig) -> StorageResult { - match config.backend_type { - #[cfg(feature = "s3")] - BackendType::S3 => { - let mut builder = services::S3::default().bucket(&config.root); - - if let Some(ref region) = config.region { - builder = builder.region(region); - } - - if let Some(ref endpoint) = config.endpoint { - builder = builder.endpoint(endpoint); - } - - if let Some(ref access_key_id) = config.access_key_id { - builder = builder.access_key_id(access_key_id); - } - - if let Some(ref secret_access_key) = config.secret_access_key { - builder = builder.secret_access_key(secret_access_key); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) - } - - #[cfg(feature = "gcs")] - BackendType::Gcs => { - let builder = services::Gcs::default().bucket(&config.root); - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) - } - - #[cfg(feature = "azblob")] - BackendType::AzureBlob => { - let mut builder = services::Azblob::default().container(&config.root); - - if let Some(ref account_name) = config.account_name { - builder = builder.account_name(account_name); - } - - if let Some(ref account_key) = config.account_key { - builder = builder.account_key(account_key); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) - } - - #[cfg(feature = "gdrive")] - BackendType::GoogleDrive => { - let mut builder = services::Gdrive::default().root(&config.root); - - if let Some(ref access_token) = config.access_token { - builder = builder.access_token(access_token); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) - } - - #[cfg(feature = "dropbox")] - BackendType::Dropbox => { - let mut builder = services::Dropbox::default().root(&config.root); - - if let Some(ref access_token) = config.access_token { - builder = builder.access_token(access_token); - } - - if let Some(ref refresh_token) = config.refresh_token { - builder = builder.refresh_token(refresh_token); - } - - if let Some(ref client_id) = config.access_key_id { - builder = builder.client_id(client_id); - } - - if let Some(ref client_secret) = config.secret_access_key { - builder = builder.client_secret(client_secret); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) - } - - #[cfg(feature = "onedrive")] - BackendType::OneDrive => { - let mut builder = services::Onedrive::default().root(&config.root); - - if let Some(ref access_token) = config.access_token { - builder = builder.access_token(access_token); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) - } - - // This should never be reached if the config was properly created - // with the same features enabled - #[allow(unreachable_patterns)] - _ => Err(StorageError::init(format!( - "Backend type {:?} is not supported with current features", - config.backend_type - ))), + match config { + StorageConfig::S3(cfg) => Self::create_s3_operator(cfg), + StorageConfig::Gcs(cfg) => Self::create_gcs_operator(cfg), + StorageConfig::AzureBlob(cfg) => Self::create_azblob_operator(cfg), + StorageConfig::GoogleDrive(cfg) => Self::create_gdrive_operator(cfg), + StorageConfig::Dropbox(cfg) => Self::create_dropbox_operator(cfg), + StorageConfig::OneDrive(cfg) => Self::create_onedrive_operator(cfg), } } + + fn create_s3_operator(cfg: &S3Config) -> StorageResult { + let mut builder = services::S3::default() + .bucket(&cfg.bucket) + .region(&cfg.region); + + if let Some(ref endpoint) = cfg.endpoint { + builder = builder.endpoint(endpoint); + } + + if let Some(ref access_key_id) = cfg.access_key_id { + builder = builder.access_key_id(access_key_id); + } + + if let Some(ref secret_access_key) = cfg.secret_access_key { + builder = builder.secret_access_key(secret_access_key); + } + + // Apply prefix as root path + if let Some(ref prefix) = cfg.prefix { + builder = builder.root(prefix); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + fn create_gcs_operator(cfg: &GcsConfig) -> StorageResult { + let mut builder = services::Gcs::default().bucket(&cfg.bucket); + + if let Some(ref credentials) = cfg.credentials { + builder = builder.credential(credentials); + } + + // Apply prefix as root path + if let Some(ref prefix) = cfg.prefix { + builder = builder.root(prefix); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + fn create_azblob_operator(cfg: &AzureBlobConfig) -> StorageResult { + let mut builder = services::Azblob::default() + .container(&cfg.container) + .account_name(&cfg.account_name); + + if let Some(ref account_key) = cfg.account_key { + builder = builder.account_key(account_key); + } + + // Apply prefix as root path + if let Some(ref prefix) = cfg.prefix { + builder = builder.root(prefix); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + fn create_gdrive_operator(cfg: &GoogleDriveConfig) -> StorageResult { + let mut builder = services::Gdrive::default().root(&cfg.root); + + if let Some(ref access_token) = cfg.access_token { + builder = builder.access_token(access_token); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + fn create_dropbox_operator(cfg: &DropboxConfig) -> StorageResult { + let mut builder = services::Dropbox::default().root(&cfg.root); + + if let Some(ref access_token) = cfg.access_token { + builder = builder.access_token(access_token); + } + + if let Some(ref refresh_token) = cfg.refresh_token { + builder = builder.refresh_token(refresh_token); + } + + if let Some(ref client_id) = cfg.client_id { + builder = builder.client_id(client_id); + } + + if let Some(ref client_secret) = cfg.client_secret { + builder = builder.client_secret(client_secret); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } + + fn create_onedrive_operator(cfg: &OneDriveConfig) -> StorageResult { + let mut builder = services::Onedrive::default().root(&cfg.root); + + if let Some(ref access_token) = cfg.access_token { + builder = builder.access_token(access_token); + } + + Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| StorageError::init(e.to_string())) + } } /// File metadata. @@ -286,8 +298,7 @@ pub struct FileMetadata { impl std::fmt::Debug for StorageBackend { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("StorageBackend") - .field("backend_type", &self.config.backend_type) - .field("root", &self.config.root) + .field("backend", &self.config.backend_name()) .finish() } } diff --git a/crates/nvisy-opendal/src/config.rs b/crates/nvisy-opendal/src/config.rs index 6b4dc6f..5c75eaa 100644 --- a/crates/nvisy-opendal/src/config.rs +++ b/crates/nvisy-opendal/src/config.rs @@ -2,243 +2,43 @@ use serde::{Deserialize, Serialize}; -/// Storage backend type. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum BackendType { +// Re-export configs from backend modules +pub use crate::azblob::AzureBlobConfig; +pub use crate::dropbox::DropboxConfig; +pub use crate::gcs::GcsConfig; +pub use crate::gdrive::GoogleDriveConfig; +pub use crate::onedrive::OneDriveConfig; +pub use crate::s3::S3Config; + +/// Storage backend configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +#[non_exhaustive] +pub enum StorageConfig { /// Amazon S3 compatible storage. - #[cfg(feature = "s3")] - S3, - + S3(S3Config), /// Google Cloud Storage. - #[cfg(feature = "gcs")] - Gcs, - + Gcs(GcsConfig), /// Azure Blob Storage. - #[cfg(feature = "azblob")] - AzureBlob, - + AzureBlob(AzureBlobConfig), /// Google Drive. - #[cfg(feature = "gdrive")] - GoogleDrive, - + GoogleDrive(GoogleDriveConfig), /// Dropbox. - #[cfg(feature = "dropbox")] - Dropbox, - + Dropbox(DropboxConfig), /// OneDrive. - #[cfg(feature = "onedrive")] - OneDrive, -} - -/// Configuration for a storage backend. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct StorageConfig { - /// Type of storage backend. - pub backend_type: BackendType, - - /// Root path or bucket/container name. - pub root: String, - - /// Region (for cloud storage). - #[serde(skip_serializing_if = "Option::is_none")] - pub region: Option, - - /// Endpoint URL (for S3-compatible storage). - #[serde(skip_serializing_if = "Option::is_none")] - pub endpoint: Option, - - /// Access key ID / Client ID (for cloud storage). - #[serde(skip_serializing_if = "Option::is_none")] - pub access_key_id: Option, - - /// Secret access key / Client secret (for cloud storage). - #[serde(skip_serializing_if = "Option::is_none")] - pub secret_access_key: Option, - - /// Account name (for Azure Blob Storage). - #[serde(skip_serializing_if = "Option::is_none")] - pub account_name: Option, - - /// Account key (for Azure Blob Storage). - #[serde(skip_serializing_if = "Option::is_none")] - pub account_key: Option, - - /// OAuth access token (for Google Drive, Dropbox, OneDrive). - #[serde(skip_serializing_if = "Option::is_none")] - pub access_token: Option, - - /// OAuth refresh token (for Google Drive, Dropbox, OneDrive). - #[serde(skip_serializing_if = "Option::is_none")] - pub refresh_token: Option, + OneDrive(OneDriveConfig), } impl StorageConfig { - /// Creates an S3 storage configuration. - #[cfg(feature = "s3")] - pub fn s3(bucket: impl Into, region: impl Into) -> Self { - Self { - backend_type: BackendType::S3, - root: bucket.into(), - region: Some(region.into()), - endpoint: None, - access_key_id: None, - secret_access_key: None, - account_name: None, - account_key: None, - access_token: None, - refresh_token: None, - } - } - - /// Creates an S3-compatible storage configuration with custom endpoint. - #[cfg(feature = "s3")] - pub fn s3_compatible( - bucket: impl Into, - endpoint: impl Into, - region: impl Into, - ) -> Self { - Self { - backend_type: BackendType::S3, - root: bucket.into(), - region: Some(region.into()), - endpoint: Some(endpoint.into()), - access_key_id: None, - secret_access_key: None, - account_name: None, - account_key: None, - access_token: None, - refresh_token: None, - } - } - - /// Creates a GCS storage configuration. - #[cfg(feature = "gcs")] - pub fn gcs(bucket: impl Into) -> Self { - Self { - backend_type: BackendType::Gcs, - root: bucket.into(), - region: None, - endpoint: None, - access_key_id: None, - secret_access_key: None, - account_name: None, - account_key: None, - access_token: None, - refresh_token: None, + /// Returns the backend name as a static string. + pub fn backend_name(&self) -> &'static str { + match self { + Self::S3(_) => "s3", + Self::Gcs(_) => "gcs", + Self::AzureBlob(_) => "azblob", + Self::GoogleDrive(_) => "gdrive", + Self::Dropbox(_) => "dropbox", + Self::OneDrive(_) => "onedrive", } } - - /// Creates an Azure Blob Storage configuration. - #[cfg(feature = "azblob")] - pub fn azure_blob(container: impl Into, account_name: impl Into) -> Self { - Self { - backend_type: BackendType::AzureBlob, - root: container.into(), - region: None, - endpoint: None, - access_key_id: None, - secret_access_key: None, - account_name: Some(account_name.into()), - account_key: None, - access_token: None, - refresh_token: None, - } - } - - /// Creates a Google Drive storage configuration. - #[cfg(feature = "gdrive")] - pub fn google_drive(root: impl Into) -> Self { - Self { - backend_type: BackendType::GoogleDrive, - root: root.into(), - region: None, - endpoint: None, - access_key_id: None, - secret_access_key: None, - account_name: None, - account_key: None, - access_token: None, - refresh_token: None, - } - } - - /// Creates a Dropbox storage configuration. - #[cfg(feature = "dropbox")] - pub fn dropbox(root: impl Into) -> Self { - Self { - backend_type: BackendType::Dropbox, - root: root.into(), - region: None, - endpoint: None, - access_key_id: None, - secret_access_key: None, - account_name: None, - account_key: None, - access_token: None, - refresh_token: None, - } - } - - /// Creates a OneDrive storage configuration. - #[cfg(feature = "onedrive")] - pub fn onedrive(root: impl Into) -> Self { - Self { - backend_type: BackendType::OneDrive, - root: root.into(), - region: None, - endpoint: None, - access_key_id: None, - secret_access_key: None, - account_name: None, - account_key: None, - access_token: None, - refresh_token: None, - } - } - - /// Sets the access credentials for S3/GCS. - #[cfg(any(feature = "s3", feature = "gcs"))] - pub fn with_credentials( - mut self, - access_key_id: impl Into, - secret_access_key: impl Into, - ) -> Self { - self.access_key_id = Some(access_key_id.into()); - self.secret_access_key = Some(secret_access_key.into()); - self - } - - /// Sets the Azure account key. - #[cfg(feature = "azblob")] - pub fn with_account_key(mut self, account_key: impl Into) -> Self { - self.account_key = Some(account_key.into()); - self - } - - /// Sets the OAuth access token for OAuth-based backends. - #[cfg(any(feature = "gdrive", feature = "dropbox", feature = "onedrive"))] - pub fn with_access_token(mut self, access_token: impl Into) -> Self { - self.access_token = Some(access_token.into()); - self - } - - /// Sets the OAuth refresh token for OAuth-based backends. - #[cfg(any(feature = "gdrive", feature = "dropbox", feature = "onedrive"))] - pub fn with_refresh_token(mut self, refresh_token: impl Into) -> Self { - self.refresh_token = Some(refresh_token.into()); - self - } - - /// Sets the client credentials for OAuth-based backends. - #[cfg(any(feature = "gdrive", feature = "dropbox", feature = "onedrive"))] - pub fn with_client_credentials( - mut self, - client_id: impl Into, - client_secret: impl Into, - ) -> Self { - self.access_key_id = Some(client_id.into()); - self.secret_access_key = Some(client_secret.into()); - self - } } diff --git a/crates/nvisy-opendal/src/dropbox/config.rs b/crates/nvisy-opendal/src/dropbox/config.rs new file mode 100644 index 0000000..2f435b1 --- /dev/null +++ b/crates/nvisy-opendal/src/dropbox/config.rs @@ -0,0 +1,58 @@ +//! Dropbox configuration. + +use serde::{Deserialize, Serialize}; + +/// Dropbox configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct DropboxConfig { + /// Root folder path. + pub root: String, + /// OAuth client ID. + #[serde(skip_serializing_if = "Option::is_none")] + pub client_id: Option, + /// OAuth client secret. + #[serde(skip_serializing_if = "Option::is_none")] + pub client_secret: Option, + /// OAuth access token. + #[serde(skip_serializing_if = "Option::is_none")] + pub access_token: Option, + /// OAuth refresh token. + #[serde(skip_serializing_if = "Option::is_none")] + pub refresh_token: Option, +} + +impl DropboxConfig { + /// Creates a new Dropbox configuration. + pub fn new(root: impl Into) -> Self { + Self { + root: root.into(), + client_id: None, + client_secret: None, + access_token: None, + refresh_token: None, + } + } + + /// Sets the OAuth client credentials. + pub fn with_client_credentials( + mut self, + client_id: impl Into, + client_secret: impl Into, + ) -> Self { + self.client_id = Some(client_id.into()); + self.client_secret = Some(client_secret.into()); + self + } + + /// Sets the access token. + pub fn with_access_token(mut self, access_token: impl Into) -> Self { + self.access_token = Some(access_token.into()); + self + } + + /// Sets the refresh token. + pub fn with_refresh_token(mut self, refresh_token: impl Into) -> Self { + self.refresh_token = Some(refresh_token.into()); + self + } +} diff --git a/crates/nvisy-opendal/src/dropbox/mod.rs b/crates/nvisy-opendal/src/dropbox/mod.rs new file mode 100644 index 0000000..9389461 --- /dev/null +++ b/crates/nvisy-opendal/src/dropbox/mod.rs @@ -0,0 +1,5 @@ +//! Dropbox storage backend. + +mod config; + +pub use config::DropboxConfig; diff --git a/crates/nvisy-opendal/src/gcs/config.rs b/crates/nvisy-opendal/src/gcs/config.rs new file mode 100644 index 0000000..6eacef3 --- /dev/null +++ b/crates/nvisy-opendal/src/gcs/config.rs @@ -0,0 +1,39 @@ +//! Google Cloud Storage configuration. + +use serde::{Deserialize, Serialize}; + +/// Google Cloud Storage configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct GcsConfig { + /// Bucket name. + pub bucket: String, + /// Service account credentials JSON. + #[serde(skip_serializing_if = "Option::is_none")] + pub credentials: Option, + /// Path prefix within the bucket. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl GcsConfig { + /// Creates a new GCS configuration. + pub fn new(bucket: impl Into) -> Self { + Self { + bucket: bucket.into(), + credentials: None, + prefix: None, + } + } + + /// Sets the service account credentials. + pub fn with_credentials(mut self, credentials: impl Into) -> Self { + self.credentials = Some(credentials.into()); + self + } + + /// Sets the path prefix. + pub fn with_prefix(mut self, prefix: impl Into) -> Self { + self.prefix = Some(prefix.into()); + self + } +} diff --git a/crates/nvisy-opendal/src/gcs/mod.rs b/crates/nvisy-opendal/src/gcs/mod.rs new file mode 100644 index 0000000..0279e42 --- /dev/null +++ b/crates/nvisy-opendal/src/gcs/mod.rs @@ -0,0 +1,5 @@ +//! Google Cloud Storage backend. + +mod config; + +pub use config::GcsConfig; diff --git a/crates/nvisy-opendal/src/gdrive/config.rs b/crates/nvisy-opendal/src/gdrive/config.rs new file mode 100644 index 0000000..f03449d --- /dev/null +++ b/crates/nvisy-opendal/src/gdrive/config.rs @@ -0,0 +1,58 @@ +//! Google Drive configuration. + +use serde::{Deserialize, Serialize}; + +/// Google Drive configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct GoogleDriveConfig { + /// Root folder path or ID. + pub root: String, + /// OAuth client ID. + #[serde(skip_serializing_if = "Option::is_none")] + pub client_id: Option, + /// OAuth client secret. + #[serde(skip_serializing_if = "Option::is_none")] + pub client_secret: Option, + /// OAuth access token. + #[serde(skip_serializing_if = "Option::is_none")] + pub access_token: Option, + /// OAuth refresh token. + #[serde(skip_serializing_if = "Option::is_none")] + pub refresh_token: Option, +} + +impl GoogleDriveConfig { + /// Creates a new Google Drive configuration. + pub fn new(root: impl Into) -> Self { + Self { + root: root.into(), + client_id: None, + client_secret: None, + access_token: None, + refresh_token: None, + } + } + + /// Sets the OAuth client credentials. + pub fn with_client_credentials( + mut self, + client_id: impl Into, + client_secret: impl Into, + ) -> Self { + self.client_id = Some(client_id.into()); + self.client_secret = Some(client_secret.into()); + self + } + + /// Sets the access token. + pub fn with_access_token(mut self, access_token: impl Into) -> Self { + self.access_token = Some(access_token.into()); + self + } + + /// Sets the refresh token. + pub fn with_refresh_token(mut self, refresh_token: impl Into) -> Self { + self.refresh_token = Some(refresh_token.into()); + self + } +} diff --git a/crates/nvisy-opendal/src/gdrive/mod.rs b/crates/nvisy-opendal/src/gdrive/mod.rs new file mode 100644 index 0000000..9f4259f --- /dev/null +++ b/crates/nvisy-opendal/src/gdrive/mod.rs @@ -0,0 +1,5 @@ +//! Google Drive storage backend. + +mod config; + +pub use config::GoogleDriveConfig; diff --git a/crates/nvisy-opendal/src/lib.rs b/crates/nvisy-opendal/src/lib.rs index f937a09..808c9c2 100644 --- a/crates/nvisy-opendal/src/lib.rs +++ b/crates/nvisy-opendal/src/lib.rs @@ -2,15 +2,22 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![doc = include_str!("../README.md")] +pub mod azblob; +pub mod dropbox; +pub mod gcs; +pub mod gdrive; +pub mod onedrive; +pub mod s3; + mod backend; mod config; mod error; -#[doc(hidden)] -pub mod prelude; - pub use backend::{FileMetadata, StorageBackend}; -pub use config::{BackendType, StorageConfig}; +pub use config::{ + AzureBlobConfig, DropboxConfig, GcsConfig, GoogleDriveConfig, OneDriveConfig, S3Config, + StorageConfig, +}; pub use error::{StorageError, StorageResult}; /// Tracing target for storage operations. diff --git a/crates/nvisy-opendal/src/onedrive/config.rs b/crates/nvisy-opendal/src/onedrive/config.rs new file mode 100644 index 0000000..a34c79c --- /dev/null +++ b/crates/nvisy-opendal/src/onedrive/config.rs @@ -0,0 +1,58 @@ +//! OneDrive configuration. + +use serde::{Deserialize, Serialize}; + +/// OneDrive configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OneDriveConfig { + /// Root folder path. + pub root: String, + /// OAuth client ID. + #[serde(skip_serializing_if = "Option::is_none")] + pub client_id: Option, + /// OAuth client secret. + #[serde(skip_serializing_if = "Option::is_none")] + pub client_secret: Option, + /// OAuth access token. + #[serde(skip_serializing_if = "Option::is_none")] + pub access_token: Option, + /// OAuth refresh token. + #[serde(skip_serializing_if = "Option::is_none")] + pub refresh_token: Option, +} + +impl OneDriveConfig { + /// Creates a new OneDrive configuration. + pub fn new(root: impl Into) -> Self { + Self { + root: root.into(), + client_id: None, + client_secret: None, + access_token: None, + refresh_token: None, + } + } + + /// Sets the OAuth client credentials. + pub fn with_client_credentials( + mut self, + client_id: impl Into, + client_secret: impl Into, + ) -> Self { + self.client_id = Some(client_id.into()); + self.client_secret = Some(client_secret.into()); + self + } + + /// Sets the access token. + pub fn with_access_token(mut self, access_token: impl Into) -> Self { + self.access_token = Some(access_token.into()); + self + } + + /// Sets the refresh token. + pub fn with_refresh_token(mut self, refresh_token: impl Into) -> Self { + self.refresh_token = Some(refresh_token.into()); + self + } +} diff --git a/crates/nvisy-opendal/src/onedrive/mod.rs b/crates/nvisy-opendal/src/onedrive/mod.rs new file mode 100644 index 0000000..98b0365 --- /dev/null +++ b/crates/nvisy-opendal/src/onedrive/mod.rs @@ -0,0 +1,5 @@ +//! OneDrive storage backend. + +mod config; + +pub use config::OneDriveConfig; diff --git a/crates/nvisy-opendal/src/prelude.rs b/crates/nvisy-opendal/src/prelude.rs deleted file mode 100644 index b7068c7..0000000 --- a/crates/nvisy-opendal/src/prelude.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Prelude module for convenient imports. - -pub use crate::backend::{FileMetadata, StorageBackend}; -pub use crate::config::{BackendType, StorageConfig}; -pub use crate::error::{StorageError, StorageResult}; diff --git a/crates/nvisy-opendal/src/s3/config.rs b/crates/nvisy-opendal/src/s3/config.rs new file mode 100644 index 0000000..b938ee5 --- /dev/null +++ b/crates/nvisy-opendal/src/s3/config.rs @@ -0,0 +1,61 @@ +//! Amazon S3 configuration. + +use serde::{Deserialize, Serialize}; + +/// Amazon S3 configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct S3Config { + /// Bucket name. + pub bucket: String, + /// AWS region. + pub region: String, + /// Custom endpoint URL (for S3-compatible storage like MinIO, R2). + #[serde(skip_serializing_if = "Option::is_none")] + pub endpoint: Option, + /// Access key ID. + #[serde(skip_serializing_if = "Option::is_none")] + pub access_key_id: Option, + /// Secret access key. + #[serde(skip_serializing_if = "Option::is_none")] + pub secret_access_key: Option, + /// Path prefix within the bucket. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl S3Config { + /// Creates a new S3 configuration. + pub fn new(bucket: impl Into, region: impl Into) -> Self { + Self { + bucket: bucket.into(), + region: region.into(), + endpoint: None, + access_key_id: None, + secret_access_key: None, + prefix: None, + } + } + + /// Sets the custom endpoint (for S3-compatible storage). + pub fn with_endpoint(mut self, endpoint: impl Into) -> Self { + self.endpoint = Some(endpoint.into()); + self + } + + /// Sets the access credentials. + pub fn with_credentials( + mut self, + access_key_id: impl Into, + secret_access_key: impl Into, + ) -> Self { + self.access_key_id = Some(access_key_id.into()); + self.secret_access_key = Some(secret_access_key.into()); + self + } + + /// Sets the path prefix. + pub fn with_prefix(mut self, prefix: impl Into) -> Self { + self.prefix = Some(prefix.into()); + self + } +} diff --git a/crates/nvisy-opendal/src/s3/mod.rs b/crates/nvisy-opendal/src/s3/mod.rs new file mode 100644 index 0000000..243ae8c --- /dev/null +++ b/crates/nvisy-opendal/src/s3/mod.rs @@ -0,0 +1,5 @@ +//! Amazon S3 storage backend. + +mod config; + +pub use config::S3Config; diff --git a/crates/nvisy-postgres/src/model/account_api_token.rs b/crates/nvisy-postgres/src/model/account_api_token.rs index d3ed6c1..ac01344 100644 --- a/crates/nvisy-postgres/src/model/account_api_token.rs +++ b/crates/nvisy-postgres/src/model/account_api_token.rs @@ -6,8 +6,10 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::account_api_tokens; -use crate::types::{ApiTokenType, HasCreatedAt, HasExpiresAt, HasSecurityContext}; -use crate::types::{EXPIRY_WARNING_MINUTES, LONG_LIVED_THRESHOLD_HOURS}; +use crate::types::{ + ApiTokenType, EXPIRY_WARNING_MINUTES, HasCreatedAt, HasExpiresAt, HasSecurityContext, + LONG_LIVED_THRESHOLD_HOURS, +}; /// Account API token model representing an authentication token. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] diff --git a/crates/nvisy-postgres/src/model/account_notification.rs b/crates/nvisy-postgres/src/model/account_notification.rs index 78e044c..4a1dbde 100644 --- a/crates/nvisy-postgres/src/model/account_notification.rs +++ b/crates/nvisy-postgres/src/model/account_notification.rs @@ -5,8 +5,7 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::account_notifications; -use crate::types::DEFAULT_RETENTION_DAYS; -use crate::types::{HasCreatedAt, HasExpiresAt, NotificationEvent}; +use crate::types::{DEFAULT_RETENTION_DAYS, HasCreatedAt, HasExpiresAt, NotificationEvent}; /// Account notification model representing a notification sent to a user. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] diff --git a/crates/nvisy-postgres/src/model/mod.rs b/crates/nvisy-postgres/src/model/mod.rs index ce70186..8c381c7 100644 --- a/crates/nvisy-postgres/src/model/mod.rs +++ b/crates/nvisy-postgres/src/model/mod.rs @@ -30,16 +30,13 @@ pub use account_api_token::{AccountApiToken, NewAccountApiToken, UpdateAccountAp pub use account_notification::{ AccountNotification, NewAccountNotification, UpdateAccountNotification, }; - // File models pub use file::{File, NewFile, UpdateFile}; pub use file_annotation::{FileAnnotation, NewFileAnnotation, UpdateFileAnnotation}; pub use file_chunk::{FileChunk, NewFileChunk, ScoredFileChunk, UpdateFileChunk}; - // Pipeline models pub use pipeline::{NewPipeline, Pipeline, UpdatePipeline}; pub use pipeline_run::{NewPipelineRun, PipelineRun, UpdatePipelineRun}; - // Workspace models pub use workspace::{NewWorkspace, UpdateWorkspace, Workspace}; pub use workspace_activity::{NewWorkspaceActivity, WorkspaceActivity}; diff --git a/crates/nvisy-postgres/src/model/workspace_invite.rs b/crates/nvisy-postgres/src/model/workspace_invite.rs index 6dd8624..384f769 100644 --- a/crates/nvisy-postgres/src/model/workspace_invite.rs +++ b/crates/nvisy-postgres/src/model/workspace_invite.rs @@ -5,8 +5,7 @@ use jiff_diesel::Timestamp; use uuid::Uuid; use crate::schema::workspace_invites; -use crate::types::RECENTLY_SENT_HOURS; -use crate::types::{HasCreatedAt, HasUpdatedAt, InviteStatus, WorkspaceRole}; +use crate::types::{HasCreatedAt, HasUpdatedAt, InviteStatus, RECENTLY_SENT_HOURS, WorkspaceRole}; /// Workspace invitation model representing an invitation to join a workspace. #[derive(Debug, Clone, PartialEq, Queryable, Selectable)] diff --git a/crates/nvisy-postgres/src/query/mod.rs b/crates/nvisy-postgres/src/query/mod.rs index f0e525e..d4b1f75 100644 --- a/crates/nvisy-postgres/src/query/mod.rs +++ b/crates/nvisy-postgres/src/query/mod.rs @@ -37,14 +37,11 @@ pub use account::AccountRepository; pub use account_action_token::AccountActionTokenRepository; pub use account_api_token::AccountApiTokenRepository; pub use account_notification::AccountNotificationRepository; - pub use file::FileRepository; pub use file_annotation::FileAnnotationRepository; pub use file_chunk::FileChunkRepository; - pub use pipeline::PipelineRepository; pub use pipeline_run::PipelineRunRepository; - pub use workspace::WorkspaceRepository; pub use workspace_activity::WorkspaceActivityRepository; pub use workspace_integration::WorkspaceIntegrationRepository; diff --git a/crates/nvisy-postgres/src/types/enums/mod.rs b/crates/nvisy-postgres/src/types/enums/mod.rs index 1e52806..b4d867e 100644 --- a/crates/nvisy-postgres/src/types/enums/mod.rs +++ b/crates/nvisy-postgres/src/types/enums/mod.rs @@ -38,12 +38,11 @@ pub use integration_status::IntegrationStatus; pub use integration_type::IntegrationType; pub use invite_status::InviteStatus; pub use notification_event::NotificationEvent; +pub use pipeline_run_status::PipelineRunStatus; +pub use pipeline_status::PipelineStatus; +pub use pipeline_trigger_type::PipelineTriggerType; pub use run_type::RunType; pub use webhook_event::WebhookEvent; pub use webhook_status::WebhookStatus; pub use webhook_type::WebhookType; pub use workspace_role::WorkspaceRole; - -pub use pipeline_run_status::PipelineRunStatus; -pub use pipeline_status::PipelineStatus; -pub use pipeline_trigger_type::PipelineTriggerType; diff --git a/crates/nvisy-rig/src/rag/indexer/mod.rs b/crates/nvisy-rig/src/rag/indexer/mod.rs index f0458d9..b2927b0 100644 --- a/crates/nvisy-rig/src/rag/indexer/mod.rs +++ b/crates/nvisy-rig/src/rag/indexer/mod.rs @@ -7,7 +7,6 @@ mod indexed; use nvisy_postgres::model::NewFileChunk; use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; - use sha2::{Digest, Sha256}; use uuid::Uuid; diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs index 3d90b78..e4419be 100644 --- a/crates/nvisy-rig/src/rag/searcher/mod.rs +++ b/crates/nvisy-rig/src/rag/searcher/mod.rs @@ -11,7 +11,6 @@ use nvisy_nats::object::{DocumentKey, DocumentStore, Files}; use nvisy_postgres::model::ScoredFileChunk; use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; - use tokio::io::AsyncReadExt; use uuid::Uuid; diff --git a/crates/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml index 21eb0a1..591a5ce 100644 --- a/crates/nvisy-runtime/Cargo.toml +++ b/crates/nvisy-runtime/Cargo.toml @@ -18,10 +18,6 @@ documentation = { workspace = true } all-features = true rustdoc-args = ["--cfg", "docsrs"] -[features] -# Default feature set (none for minimal dependencies) -default = [] - [dependencies] # Internal crates nvisy-core = { workspace = true } @@ -50,5 +46,11 @@ derive_builder = { workspace = true, features = [] } # Data types uuid = { workspace = true, features = ["v7", "serde"] } +# Graph data structures +petgraph = { workspace = true, features = [] } + +# Versioning +semver = { workspace = true, features = [] } + [dev-dependencies] tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-runtime/src/engine/config.rs b/crates/nvisy-runtime/src/engine/config.rs index 9b648fa..03c2bbc 100644 --- a/crates/nvisy-runtime/src/engine/config.rs +++ b/crates/nvisy-runtime/src/engine/config.rs @@ -27,10 +27,10 @@ pub struct EngineConfig { impl EngineConfigBuilder { fn validate(&self) -> Result<(), String> { - if let Some(max) = self.max_concurrent_runs { - if max == 0 { - return Err("max_concurrent_runs must be at least 1".into()); - } + if let Some(max) = self.max_concurrent_runs + && max == 0 + { + return Err("max_concurrent_runs must be at least 1".into()); } Ok(()) } diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index f2049c6..246f8e0 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -4,11 +4,10 @@ use std::sync::Arc; use tokio::sync::Semaphore; +use super::EngineConfig; use crate::error::WorkflowResult; use crate::graph::WorkflowGraph; -use super::EngineConfig; - /// Tracing target for engine operations. const TRACING_TARGET: &str = "nvisy_workflow::engine"; diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index be73ddb..1d44843 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -2,10 +2,12 @@ //! //! This module provides the graph representation for workflows: //! - [`WorkflowGraph`]: The main graph structure containing nodes and edges +//! - [`WorkflowMetadata`]: Metadata about the workflow //! - [`Edge`]: Connections between nodes +//! - [`EdgeData`]: Data stored on edges in the underlying petgraph mod edge; mod workflow; pub use edge::Edge; -pub use workflow::WorkflowGraph; +pub use workflow::{EdgeData, WorkflowGraph, WorkflowMetadata}; diff --git a/crates/nvisy-runtime/src/graph/workflow.rs b/crates/nvisy-runtime/src/graph/workflow.rs index e35059c..19b94f0 100644 --- a/crates/nvisy-runtime/src/graph/workflow.rs +++ b/crates/nvisy-runtime/src/graph/workflow.rs @@ -2,23 +2,104 @@ use std::collections::HashMap; +use petgraph::Direction; +use petgraph::algo::{is_cyclic_directed, toposort}; +use petgraph::graph::{DiGraph, NodeIndex}; +use petgraph::visit::EdgeRef; +use semver::Version; use serde::{Deserialize, Serialize}; +use super::Edge; use crate::error::{WorkflowError, WorkflowResult}; use crate::node::{NodeData, NodeId}; -use super::Edge; +/// Workflow metadata. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct WorkflowMetadata { + /// Workflow name (optional). + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + /// Workflow description. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Workflow version (semver, optional). + #[serde(skip_serializing_if = "Option::is_none")] + pub version: Option, + /// Tags for organization. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tags: Vec, + /// Author identifier. + #[serde(skip_serializing_if = "Option::is_none")] + pub author: Option, + /// Creation timestamp (ISO 8601). + #[serde(skip_serializing_if = "Option::is_none")] + pub created_at: Option, + /// Last update timestamp (ISO 8601). + #[serde(skip_serializing_if = "Option::is_none")] + pub updated_at: Option, +} + +impl WorkflowMetadata { + /// Creates a new empty metadata. + pub fn new() -> Self { + Self::default() + } + + /// Sets the workflow name. + pub fn with_name(mut self, name: impl Into) -> Self { + self.name = Some(name.into()); + self + } + + /// Sets the workflow description. + pub fn with_description(mut self, description: impl Into) -> Self { + self.description = Some(description.into()); + self + } + + /// Sets the workflow version. + pub fn with_version(mut self, version: Version) -> Self { + self.version = Some(version); + self + } + + /// Sets the author. + pub fn with_author(mut self, author: impl Into) -> Self { + self.author = Some(author.into()); + self + } + + /// Adds tags. + pub fn with_tags(mut self, tags: impl IntoIterator>) -> Self { + self.tags = tags.into_iter().map(Into::into).collect(); + self + } +} /// A workflow graph containing nodes and edges. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +/// +/// Internally uses petgraph's `DiGraph` for efficient graph operations. +#[derive(Debug, Clone, Default)] pub struct WorkflowGraph { - /// Map of node IDs to their data. - nodes: HashMap, - /// Edges connecting nodes. - edges: Vec, + /// The underlying directed graph. + graph: DiGraph, + /// Mapping from NodeId to petgraph's NodeIndex. + node_indices: HashMap, + /// Reverse mapping from NodeIndex to NodeId. + index_to_id: HashMap, /// Workflow metadata. - #[serde(default)] - pub metadata: serde_json::Value, + pub metadata: WorkflowMetadata, +} + +/// Edge data stored in the graph. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] +pub struct EdgeData { + /// Optional port/slot name on the source node. + #[serde(skip_serializing_if = "Option::is_none")] + pub from_port: Option, + /// Optional port/slot name on the target node. + #[serde(skip_serializing_if = "Option::is_none")] + pub to_port: Option, } impl WorkflowGraph { @@ -27,82 +108,98 @@ impl WorkflowGraph { Self::default() } + /// Creates a new workflow graph with metadata. + pub fn with_metadata(metadata: WorkflowMetadata) -> Self { + Self { + metadata, + ..Default::default() + } + } + /// Returns the number of nodes in the graph. pub fn node_count(&self) -> usize { - self.nodes.len() + self.graph.node_count() } /// Returns the number of edges in the graph. pub fn edge_count(&self) -> usize { - self.edges.len() + self.graph.edge_count() } /// Returns whether the graph is empty. pub fn is_empty(&self) -> bool { - self.nodes.is_empty() + self.graph.node_count() == 0 } /// Adds a node to the graph and returns its ID. pub fn add_node(&mut self, data: impl Into) -> NodeId { let id = NodeId::new(); - self.nodes.insert(id, data.into()); + let index = self.graph.add_node(data.into()); + self.node_indices.insert(id, index); + self.index_to_id.insert(index, id); id } /// Adds a node with a specific ID. pub fn add_node_with_id(&mut self, id: NodeId, data: impl Into) { - self.nodes.insert(id, data.into()); + let index = self.graph.add_node(data.into()); + self.node_indices.insert(id, index); + self.index_to_id.insert(index, id); } /// Removes a node and all its connected edges. pub fn remove_node(&mut self, id: NodeId) -> Option { - // Remove all edges connected to this node - self.edges.retain(|e| e.from != id && e.to != id); - self.nodes.remove(&id) + let index = self.node_indices.remove(&id)?; + self.index_to_id.remove(&index); + self.graph.remove_node(index) } /// Returns a reference to a node's data. pub fn get_node(&self, id: NodeId) -> Option<&NodeData> { - self.nodes.get(&id) + let index = self.node_indices.get(&id)?; + self.graph.node_weight(*index) } /// Returns a mutable reference to a node's data. pub fn get_node_mut(&mut self, id: NodeId) -> Option<&mut NodeData> { - self.nodes.get_mut(&id) + let index = self.node_indices.get(&id)?; + self.graph.node_weight_mut(*index) } /// Returns whether a node exists. pub fn contains_node(&self, id: NodeId) -> bool { - self.nodes.contains_key(&id) + self.node_indices.contains_key(&id) } /// Returns an iterator over all nodes. pub fn nodes(&self) -> impl Iterator { - self.nodes.iter().map(|(&id, data)| (id, data)) + self.graph.node_indices().filter_map(|index| { + let id = self.index_to_id.get(&index)?; + let data = self.graph.node_weight(index)?; + Some((*id, data)) + }) } /// Returns an iterator over all node IDs. pub fn node_ids(&self) -> impl Iterator + '_ { - self.nodes.keys().copied() + self.node_indices.keys().copied() } /// Adds an edge between two nodes. pub fn add_edge(&mut self, edge: Edge) -> WorkflowResult<()> { - // Validate that both nodes exist - if !self.nodes.contains_key(&edge.from) { - return Err(WorkflowError::InvalidDefinition(format!( - "source node {} does not exist", - edge.from - ))); - } - if !self.nodes.contains_key(&edge.to) { - return Err(WorkflowError::InvalidDefinition(format!( - "target node {} does not exist", - edge.to - ))); - } - - self.edges.push(edge); + let from_index = self.node_indices.get(&edge.from).ok_or_else(|| { + WorkflowError::InvalidDefinition(format!("source node {} does not exist", edge.from)) + })?; + let to_index = self.node_indices.get(&edge.to).ok_or_else(|| { + WorkflowError::InvalidDefinition(format!("target node {} does not exist", edge.to)) + })?; + + let edge_data = EdgeData { + from_port: edge.from_port, + to_port: edge.to_port, + }; + + self.graph.add_edge(*from_index, *to_index, edge_data); Ok(()) } @@ -112,131 +209,190 @@ impl WorkflowGraph { } /// Returns an iterator over all edges. - pub fn edges(&self) -> impl Iterator { - self.edges.iter() + pub fn edges(&self) -> impl Iterator + '_ { + self.graph.edge_references().filter_map(|edge_ref| { + let from = *self.index_to_id.get(&edge_ref.source())?; + let to = *self.index_to_id.get(&edge_ref.target())?; + let data = edge_ref.weight(); + Some(Edge { + from, + to, + from_port: data.from_port.clone(), + to_port: data.to_port.clone(), + }) + }) } /// Returns edges originating from a node. - pub fn outgoing_edges(&self, id: NodeId) -> impl Iterator { - self.edges.iter().filter(move |e| e.from == id) + pub fn outgoing_edges(&self, id: NodeId) -> impl Iterator + '_ { + let index = self.node_indices.get(&id).copied(); + self.graph + .edges_directed( + index.unwrap_or(NodeIndex::new(usize::MAX)), + Direction::Outgoing, + ) + .filter_map(move |edge_ref| { + let from = *self.index_to_id.get(&edge_ref.source())?; + let to = *self.index_to_id.get(&edge_ref.target())?; + let data = edge_ref.weight(); + Some(Edge { + from, + to, + from_port: data.from_port.clone(), + to_port: data.to_port.clone(), + }) + }) } /// Returns edges targeting a node. - pub fn incoming_edges(&self, id: NodeId) -> impl Iterator { - self.edges.iter().filter(move |e| e.to == id) - } - - /// Returns all source nodes (nodes with no incoming edges). - pub fn source_nodes(&self) -> Vec { - self.nodes - .keys() - .copied() - .filter(|&id| { - self.nodes.get(&id).is_some_and(|data| data.is_source()) - || !self.edges.iter().any(|e| e.to == id) + pub fn incoming_edges(&self, id: NodeId) -> impl Iterator + '_ { + let index = self.node_indices.get(&id).copied(); + self.graph + .edges_directed( + index.unwrap_or(NodeIndex::new(usize::MAX)), + Direction::Incoming, + ) + .filter_map(move |edge_ref| { + let from = *self.index_to_id.get(&edge_ref.source())?; + let to = *self.index_to_id.get(&edge_ref.target())?; + let data = edge_ref.weight(); + Some(Edge { + from, + to, + from_port: data.from_port.clone(), + to_port: data.to_port.clone(), + }) + }) + } + + /// Returns all input nodes (nodes marked as Input or with no incoming edges). + pub fn input_nodes(&self) -> Vec { + self.graph + .node_indices() + .filter_map(|index| { + let id = self.index_to_id.get(&index)?; + let data = self.graph.node_weight(index)?; + if data.is_input() + || self + .graph + .edges_directed(index, Direction::Incoming) + .next() + .is_none() + { + Some(*id) + } else { + None + } }) .collect() } - /// Returns all sink nodes (nodes with no outgoing edges). - pub fn sink_nodes(&self) -> Vec { - self.nodes - .keys() - .copied() - .filter(|&id| { - self.nodes.get(&id).is_some_and(|data| data.is_sink()) - || !self.edges.iter().any(|e| e.from == id) + /// Returns all output nodes (nodes marked as Output or with no outgoing edges). + pub fn output_nodes(&self) -> Vec { + self.graph + .node_indices() + .filter_map(|index| { + let id = self.index_to_id.get(&index)?; + let data = self.graph.node_weight(index)?; + if data.is_output() + || self + .graph + .edges_directed(index, Direction::Outgoing) + .next() + .is_none() + { + Some(*id) + } else { + None + } }) .collect() } - /// Validates the workflow graph. + /// Validates the workflow graph structure and constraints. pub fn validate(&self) -> WorkflowResult<()> { // Must have at least one node - if self.nodes.is_empty() { + if self.graph.node_count() == 0 { return Err(WorkflowError::InvalidDefinition( "workflow must have at least one node".into(), )); } - // Must have at least one source - let sources: Vec<_> = self - .nodes - .iter() - .filter(|(_, data)| data.is_source()) - .collect(); - if sources.is_empty() { + // Must have at least one input node + let has_input = self.graph.node_weights().any(|data| data.is_input()); + if !has_input { return Err(WorkflowError::InvalidDefinition( - "workflow must have at least one source node".into(), + "workflow must have at least one input node".into(), )); } - // Must have at least one sink - let sinks: Vec<_> = self - .nodes - .iter() - .filter(|(_, data)| data.is_sink()) - .collect(); - if sinks.is_empty() { + // Must have at least one output node + let has_output = self.graph.node_weights().any(|data| data.is_output()); + if !has_output { return Err(WorkflowError::InvalidDefinition( - "workflow must have at least one sink node".into(), + "workflow must have at least one output node".into(), )); } - // Check for cycles (simple DFS-based detection) - self.check_cycles()?; - - Ok(()) - } - - /// Checks for cycles in the graph using DFS. - fn check_cycles(&self) -> WorkflowResult<()> { - #[derive(Clone, Copy, PartialEq, Eq)] - enum State { - Unvisited, - Visiting, - Visited, + // Check for cycles + if is_cyclic_directed(&self.graph) { + return Err(WorkflowError::InvalidDefinition( + "cycle detected in workflow graph".into(), + )); } - let mut states: HashMap = self - .nodes - .keys() - .map(|&id| (id, State::Unvisited)) - .collect(); - - fn dfs( - graph: &WorkflowGraph, - node: NodeId, - states: &mut HashMap, - path: &mut Vec, - ) -> WorkflowResult<()> { - states.insert(node, State::Visiting); - path.push(node); - - for edge in graph.outgoing_edges(node) { - match states.get(&edge.to) { - Some(State::Visiting) => { - return Err(WorkflowError::InvalidDefinition(format!( - "cycle detected involving node {}", - edge.to - ))); - } - Some(State::Unvisited) => { - dfs(graph, edge.to, states, path)?; - } - _ => {} - } + // Validate edge constraints for each node + for index in self.graph.node_indices() { + let node_id = self + .index_to_id + .get(&index) + .copied() + .ok_or_else(|| WorkflowError::InvalidDefinition("invalid node index".into()))?; + + let data = self + .graph + .node_weight(index) + .ok_or_else(|| WorkflowError::InvalidDefinition("missing node data".into()))?; + + let incoming_count = self + .graph + .edges_directed(index, Direction::Incoming) + .count(); + let outgoing_count = self + .graph + .edges_directed(index, Direction::Outgoing) + .count(); + + // Input nodes must not have incoming edges + if data.is_input() && incoming_count > 0 { + return Err(WorkflowError::InvalidDefinition(format!( + "input node {} must not have incoming edges", + node_id + ))); } - states.insert(node, State::Visited); - path.pop(); - Ok(()) - } + // Output nodes must not have outgoing edges + if data.is_output() && outgoing_count > 0 { + return Err(WorkflowError::InvalidDefinition(format!( + "output node {} must not have outgoing edges", + node_id + ))); + } + + // Transformer nodes must have at least one incoming edge + if data.is_transformer() && incoming_count == 0 { + return Err(WorkflowError::InvalidDefinition(format!( + "transformer node {} must have at least one incoming edge", + node_id + ))); + } - for &node in self.nodes.keys() { - if states.get(&node) == Some(&State::Unvisited) { - let mut path = Vec::new(); - dfs(self, node, &mut states, &mut path)?; + // Transformer nodes must have at least one outgoing edge + if data.is_transformer() && outgoing_count == 0 { + return Err(WorkflowError::InvalidDefinition(format!( + "transformer node {} must have at least one outgoing edge", + node_id + ))); } } @@ -245,42 +401,75 @@ impl WorkflowGraph { /// Returns nodes in topological order. pub fn topological_order(&self) -> WorkflowResult> { - use std::collections::VecDeque; + toposort(&self.graph, None) + .map(|indices| { + indices + .into_iter() + .filter_map(|index| self.index_to_id.get(&index).copied()) + .collect() + }) + .map_err(|_| { + WorkflowError::InvalidDefinition("cycle detected in workflow graph".into()) + }) + } - let mut in_degree: HashMap = self.nodes.keys().map(|&id| (id, 0)).collect(); + /// Returns a reference to the underlying petgraph. + pub fn inner(&self) -> &DiGraph { + &self.graph + } - // Calculate in-degrees - for edge in &self.edges { - *in_degree.get_mut(&edge.to).unwrap() += 1; - } + /// Returns a mutable reference to the underlying petgraph. + pub fn inner_mut(&mut self) -> &mut DiGraph { + &mut self.graph + } +} - // Start with nodes that have no incoming edges - let mut queue: VecDeque = in_degree - .iter() - .filter(|(_, deg)| **deg == 0) - .map(|(&id, _)| id) - .collect(); +impl Serialize for WorkflowGraph { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; - let mut result = Vec::with_capacity(self.nodes.len()); + let mut state = serializer.serialize_struct("WorkflowGraph", 3)?; - while let Some(node) = queue.pop_front() { - result.push(node); + // Serialize nodes as a map of NodeId -> NodeData + let nodes: HashMap = self.nodes().collect(); + state.serialize_field("nodes", &nodes)?; - for edge in self.outgoing_edges(node) { - let deg = in_degree.get_mut(&edge.to).unwrap(); - *deg -= 1; - if *deg == 0 { - queue.push_back(edge.to); - } - } + // Serialize edges + let edges: Vec = self.edges().collect(); + state.serialize_field("edges", &edges)?; + + state.serialize_field("metadata", &self.metadata)?; + state.end() + } +} + +impl<'de> Deserialize<'de> for WorkflowGraph { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + #[derive(Deserialize)] + struct WorkflowGraphData { + nodes: HashMap, + edges: Vec, + #[serde(default)] + metadata: WorkflowMetadata, } - if result.len() != self.nodes.len() { - return Err(WorkflowError::InvalidDefinition( - "cycle detected in workflow graph".into(), - )); + let data = WorkflowGraphData::deserialize(deserializer)?; + let mut graph = WorkflowGraph::with_metadata(data.metadata); + + for (id, node_data) in data.nodes { + graph.add_node_with_id(id, node_data); + } + + for edge in data.edges { + graph.add_edge(edge).map_err(serde::de::Error::custom)?; } - Ok(result) + Ok(graph) } } diff --git a/crates/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs index ac80181..a01375e 100644 --- a/crates/nvisy-runtime/src/lib.rs +++ b/crates/nvisy-runtime/src/lib.rs @@ -8,9 +8,6 @@ pub mod graph; pub mod node; pub mod runtime; -#[doc(hidden)] -pub mod prelude; - pub use error::{WorkflowError, WorkflowResult}; /// Tracing target for runtime operations. diff --git a/crates/nvisy-runtime/src/node/data.rs b/crates/nvisy-runtime/src/node/data.rs index 6afa2e5..ea538a2 100644 --- a/crates/nvisy-runtime/src/node/data.rs +++ b/crates/nvisy-runtime/src/node/data.rs @@ -1,229 +1,60 @@ -//! Node data types representing different processing operations. +//! Core node data enum. +use derive_more::From; use serde::{Deserialize, Serialize}; +use super::input::InputNode; +use super::output::OutputNode; +use super::transformer::TransformerNode; + /// Data associated with a workflow node. /// /// Nodes are categorized by their role in data flow: -/// - **Source**: Reads/produces data (entry points) +/// - **Input**: Reads/produces data (entry points) /// - **Transformer**: Processes/transforms data (intermediate) -/// - **Sink**: Writes/consumes data (exit points) -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +/// - **Output**: Writes/consumes data (exit points) +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, From)] #[serde(tag = "type", rename_all = "snake_case")] pub enum NodeData { - /// Data source node - reads or produces data. - Source(SourceNode), - /// Data transformer node - processes or transforms data. + /// Data input node, reads or produces data. + Input(InputNode), + /// Data transformer node, processes or transforms data. Transformer(TransformerNode), - /// Data sink node - writes or consumes data. - Sink(SinkNode), + /// Data output node, writes or consumes data. + Output(OutputNode), } impl NodeData { - /// Returns the node's display name. - pub fn name(&self) -> &str { + /// Returns the node's display name if set. + pub fn name(&self) -> Option<&str> { match self { - NodeData::Source(n) => &n.name, - NodeData::Transformer(n) => &n.name, - NodeData::Sink(n) => &n.name, + NodeData::Input(n) => n.name.as_deref(), + NodeData::Transformer(n) => n.name.as_deref(), + NodeData::Output(n) => n.name.as_deref(), } } - /// Returns whether this is a source node. - pub const fn is_source(&self) -> bool { - matches!(self, NodeData::Source(_)) - } - - /// Returns whether this is a transformer node. - pub const fn is_transformer(&self) -> bool { - matches!(self, NodeData::Transformer(_)) - } - - /// Returns whether this is a sink node. - pub const fn is_sink(&self) -> bool { - matches!(self, NodeData::Sink(_)) - } -} - -/// A data source node that reads or produces data. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SourceNode { - /// Display name of the source. - pub name: String, - /// Type of source. - pub kind: SourceKind, - /// Source-specific configuration. - #[serde(default)] - pub config: serde_json::Value, -} - -/// Types of data sources. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SourceKind { - /// Amazon S3 compatible storage. - S3, - /// Google Cloud Storage. - Gcs, - /// Azure Blob Storage. - AzureBlob, - /// Google Drive. - GoogleDrive, - /// Dropbox. - Dropbox, - /// OneDrive. - OneDrive, - /// Receive files from HTTP upload. - HttpUpload, - /// Fetch from an external API. - ApiEndpoint, - /// Custom source type. - Custom(String), -} - -/// A data transformer node that processes or transforms data. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct TransformerNode { - /// Display name of the transformer. - pub name: String, - /// Type of transformation. - pub kind: TransformerKind, - /// Transformer-specific configuration. - #[serde(default)] - pub config: serde_json::Value, -} - -/// Types of data transformations. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum TransformerKind { - /// Extract text from documents (PDF, images via OCR). - ExtractText, - /// Split content into chunks. - ChunkContent, - /// Generate vector embeddings. - GenerateEmbeddings, - /// Transform using an LLM. - LlmTransform, - /// Convert file format. - ConvertFormat, - /// Validate content against schema. - Validate, - /// Filter data based on conditions. - Filter, - /// Merge multiple inputs. - Merge, - /// Custom transformation. - Custom(String), -} - -/// A data sink node that writes or consumes data. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SinkNode { - /// Display name of the sink. - pub name: String, - /// Type of sink. - pub kind: SinkKind, - /// Sink-specific configuration. - #[serde(default)] - pub config: serde_json::Value, -} - -/// Types of data sinks. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SinkKind { - /// Amazon S3 compatible storage. - S3, - /// Google Cloud Storage. - Gcs, - /// Azure Blob Storage. - AzureBlob, - /// Google Drive. - GoogleDrive, - /// Dropbox. - Dropbox, - /// OneDrive. - OneDrive, - /// Store in database. - Database, - /// Store vector embeddings. - VectorStore, - /// Send to webhook. - Webhook, - /// Send to external API. - ApiEndpoint, - /// Custom sink type. - Custom(String), -} - -impl SourceNode { - /// Creates a new source node. - pub fn new(name: impl Into, kind: SourceKind) -> Self { - Self { - name: name.into(), - kind, - config: serde_json::Value::Object(Default::default()), - } - } - - /// Sets the configuration. - pub fn with_config(mut self, config: serde_json::Value) -> Self { - self.config = config; - self - } -} - -impl TransformerNode { - /// Creates a new transformer node. - pub fn new(name: impl Into, kind: TransformerKind) -> Self { - Self { - name: name.into(), - kind, - config: serde_json::Value::Object(Default::default()), - } - } - - /// Sets the configuration. - pub fn with_config(mut self, config: serde_json::Value) -> Self { - self.config = config; - self - } -} - -impl SinkNode { - /// Creates a new sink node. - pub fn new(name: impl Into, kind: SinkKind) -> Self { - Self { - name: name.into(), - kind, - config: serde_json::Value::Object(Default::default()), + /// Returns the node's description if set. + pub fn description(&self) -> Option<&str> { + match self { + NodeData::Input(n) => n.description.as_deref(), + NodeData::Transformer(n) => n.description.as_deref(), + NodeData::Output(n) => n.description.as_deref(), } } - /// Sets the configuration. - pub fn with_config(mut self, config: serde_json::Value) -> Self { - self.config = config; - self + /// Returns whether this is an input node. + pub const fn is_input(&self) -> bool { + matches!(self, NodeData::Input(_)) } -} -// Conversions to NodeData - -impl From for NodeData { - fn from(node: SourceNode) -> Self { - NodeData::Source(node) - } -} - -impl From for NodeData { - fn from(node: TransformerNode) -> Self { - NodeData::Transformer(node) + /// Returns whether this is a transformer node. + pub const fn is_transformer(&self) -> bool { + matches!(self, NodeData::Transformer(_)) } -} -impl From for NodeData { - fn from(node: SinkNode) -> Self { - NodeData::Sink(node) + /// Returns whether this is an output node. + pub const fn is_output(&self) -> bool { + matches!(self, NodeData::Output(_)) } } diff --git a/crates/nvisy-runtime/src/node/input/config.rs b/crates/nvisy-runtime/src/node/input/config.rs new file mode 100644 index 0000000..21c1edf --- /dev/null +++ b/crates/nvisy-runtime/src/node/input/config.rs @@ -0,0 +1,3 @@ +//! Input node configuration types. + +pub use nvisy_opendal::StorageConfig as InputConfig; diff --git a/crates/nvisy-runtime/src/node/input/mod.rs b/crates/nvisy-runtime/src/node/input/mod.rs new file mode 100644 index 0000000..c8ebb78 --- /dev/null +++ b/crates/nvisy-runtime/src/node/input/mod.rs @@ -0,0 +1,48 @@ +//! Input node types for reading data from storage backends. + +mod config; + +pub use config::InputConfig; +use serde::{Deserialize, Serialize}; + +/// A data input node that reads or produces data. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct InputNode { + /// Display name of the input. + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + /// Description of what this input does. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Input configuration. + pub config: InputConfig, +} + +impl InputNode { + /// Creates a new input node. + pub fn new(config: InputConfig) -> Self { + Self { + name: None, + description: None, + config, + } + } + + /// Sets the display name. + pub fn with_name(mut self, name: impl Into) -> Self { + self.name = Some(name.into()); + self + } + + /// Sets the description. + pub fn with_description(mut self, description: impl Into) -> Self { + self.description = Some(description.into()); + self + } +} + +impl From for InputNode { + fn from(config: InputConfig) -> Self { + Self::new(config) + } +} diff --git a/crates/nvisy-runtime/src/node/mod.rs b/crates/nvisy-runtime/src/node/mod.rs index 7ea61d1..5c80bab 100644 --- a/crates/nvisy-runtime/src/node/mod.rs +++ b/crates/nvisy-runtime/src/node/mod.rs @@ -2,12 +2,16 @@ //! //! This module provides the core node abstractions: //! - [`NodeId`]: Unique identifier for nodes -//! - [`NodeData`]: Data associated with each node (Source, Transformer, Sink) +//! - [`NodeData`]: Data associated with each node (Input, Transformer, Output) mod data; mod id; +pub mod input; +pub mod output; +pub mod transformer; -pub use data::{ - NodeData, SinkKind, SinkNode, SourceKind, SourceNode, TransformerKind, TransformerNode, -}; +pub use data::NodeData; pub use id::NodeId; +pub use input::{InputConfig, InputNode}; +pub use output::{OutputConfig, OutputNode}; +pub use transformer::{TransformerConfig, TransformerNode}; diff --git a/crates/nvisy-runtime/src/node/output/config.rs b/crates/nvisy-runtime/src/node/output/config.rs new file mode 100644 index 0000000..c4af3e4 --- /dev/null +++ b/crates/nvisy-runtime/src/node/output/config.rs @@ -0,0 +1,30 @@ +//! Output node configuration types. + +use serde::{Deserialize, Serialize}; + +/// Output node configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum OutputConfig { + /// Storage backend output (S3, GCS, Azure, etc.). + Storage(nvisy_opendal::StorageConfig), + /// Send to webhook. + Webhook(WebhookConfig), +} + +/// Webhook output configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct WebhookConfig { + /// Webhook URL. + pub url: String, + /// HTTP method. + #[serde(default = "default_post")] + pub method: String, + /// Additional headers. + #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")] + pub headers: std::collections::HashMap, +} + +fn default_post() -> String { + "POST".to_string() +} diff --git a/crates/nvisy-runtime/src/node/output/mod.rs b/crates/nvisy-runtime/src/node/output/mod.rs new file mode 100644 index 0000000..b49252d --- /dev/null +++ b/crates/nvisy-runtime/src/node/output/mod.rs @@ -0,0 +1,48 @@ +//! Output node types for writing data to storage backends. + +mod config; + +pub use config::{OutputConfig, WebhookConfig}; +use serde::{Deserialize, Serialize}; + +/// A data output node that writes or consumes data. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OutputNode { + /// Display name of the output. + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + /// Description of what this output does. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Output configuration. + pub config: OutputConfig, +} + +impl OutputNode { + /// Creates a new output node. + pub fn new(config: OutputConfig) -> Self { + Self { + name: None, + description: None, + config, + } + } + + /// Sets the display name. + pub fn with_name(mut self, name: impl Into) -> Self { + self.name = Some(name.into()); + self + } + + /// Sets the description. + pub fn with_description(mut self, description: impl Into) -> Self { + self.description = Some(description.into()); + self + } +} + +impl From for OutputNode { + fn from(config: OutputConfig) -> Self { + Self::new(config) + } +} diff --git a/crates/nvisy-runtime/src/node/transformer/chunking.rs b/crates/nvisy-runtime/src/node/transformer/chunking.rs new file mode 100644 index 0000000..d43f2de --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/chunking.rs @@ -0,0 +1,288 @@ +//! Chunking strategy configurations for text splitting. + +use serde::{Deserialize, Serialize}; + +/// Chunking strategy configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "strategy", rename_all = "snake_case")] +pub enum ChunkingStrategy { + /// Split by character count. + Character(CharacterChunkingConfig), + /// Split by sentences. + Sentence(SentenceChunkingConfig), + /// Split by paragraphs. + Paragraph(ParagraphChunkingConfig), + /// Split by page boundaries (for PDFs). + Page(PageChunkingConfig), + /// Split by document structure/titles. + Title(TitleChunkingConfig), + /// Recursive splitting with fallback strategies. + Recursive(RecursiveChunkingConfig), + /// Semantic/similarity-based chunking. + Semantic(SemanticChunkingConfig), + /// Contextual chunking with LLM-assisted boundaries. + Contextual(ContextualChunkingConfig), +} + +/// Character-based chunking configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct CharacterChunkingConfig { + /// Maximum chunk size in characters. + pub max_size: usize, + /// Overlap between chunks in characters. + #[serde(default)] + pub overlap: usize, + /// Separator to split on (defaults to whitespace). + #[serde(skip_serializing_if = "Option::is_none")] + pub separator: Option, + /// Whether to trim whitespace from chunks. + #[serde(default = "default_true")] + pub trim: bool, +} + +impl CharacterChunkingConfig { + /// Creates a new character chunking config. + pub fn new(max_size: usize) -> Self { + Self { + max_size, + overlap: 0, + separator: None, + trim: true, + } + } + + /// Sets the overlap. + pub fn with_overlap(mut self, overlap: usize) -> Self { + self.overlap = overlap; + self + } + + /// Sets the separator. + pub fn with_separator(mut self, separator: impl Into) -> Self { + self.separator = Some(separator.into()); + self + } +} + +/// Sentence-based chunking configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SentenceChunkingConfig { + /// Maximum number of sentences per chunk. + pub max_sentences: usize, + /// Overlap in sentences. + #[serde(default)] + pub overlap_sentences: usize, + /// Maximum chunk size in characters (soft limit). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_size: Option, +} + +impl SentenceChunkingConfig { + /// Creates a new sentence chunking config. + pub fn new(max_sentences: usize) -> Self { + Self { + max_sentences, + overlap_sentences: 0, + max_size: None, + } + } + + /// Sets the overlap. + pub fn with_overlap(mut self, overlap: usize) -> Self { + self.overlap_sentences = overlap; + self + } +} + +/// Paragraph-based chunking configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct ParagraphChunkingConfig { + /// Maximum number of paragraphs per chunk. + pub max_paragraphs: usize, + /// Maximum chunk size in characters (soft limit). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_size: Option, + /// Minimum paragraph length to consider (filters short lines). + #[serde(default)] + pub min_paragraph_length: usize, +} + +impl ParagraphChunkingConfig { + /// Creates a new paragraph chunking config. + pub fn new(max_paragraphs: usize) -> Self { + Self { + max_paragraphs, + max_size: None, + min_paragraph_length: 0, + } + } +} + +/// Page-based chunking configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PageChunkingConfig { + /// Maximum number of pages per chunk. + #[serde(default = "default_one")] + pub max_pages: usize, + /// Whether to preserve page boundaries exactly. + #[serde(default = "default_true")] + pub preserve_boundaries: bool, +} + +impl Default for PageChunkingConfig { + fn default() -> Self { + Self { + max_pages: 1, + preserve_boundaries: true, + } + } +} + +/// Title/heading-based chunking configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct TitleChunkingConfig { + /// Heading levels to split on (1 = h1, 2 = h2, etc.). + #[serde(default = "default_heading_levels")] + pub heading_levels: Vec, + /// Whether to include the heading in each chunk. + #[serde(default = "default_true")] + pub include_heading: bool, + /// Maximum chunk size in characters (soft limit). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_size: Option, +} + +impl Default for TitleChunkingConfig { + fn default() -> Self { + Self { + heading_levels: default_heading_levels(), + include_heading: true, + max_size: None, + } + } +} + +/// Recursive chunking configuration with fallback strategies. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RecursiveChunkingConfig { + /// Maximum chunk size in characters. + pub max_size: usize, + /// Overlap between chunks. + #[serde(default)] + pub overlap: usize, + /// Separators to try in order (from most to least preferred). + #[serde(default = "default_recursive_separators")] + pub separators: Vec, +} + +impl RecursiveChunkingConfig { + /// Creates a new recursive chunking config. + pub fn new(max_size: usize) -> Self { + Self { + max_size, + overlap: 0, + separators: default_recursive_separators(), + } + } + + /// Sets the overlap. + pub fn with_overlap(mut self, overlap: usize) -> Self { + self.overlap = overlap; + self + } + + /// Sets custom separators. + pub fn with_separators(mut self, separators: Vec) -> Self { + self.separators = separators; + self + } +} + +/// Semantic/similarity-based chunking configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SemanticChunkingConfig { + /// Similarity threshold for splitting (0.0-1.0). + /// Lower values = more aggressive splitting. + #[serde(default = "default_similarity_threshold")] + pub similarity_threshold: f32, + /// Minimum chunk size in characters. + #[serde(default = "default_min_chunk_size")] + pub min_size: usize, + /// Maximum chunk size in characters. + #[serde(default = "default_max_chunk_size")] + pub max_size: usize, + /// Embedding model to use for similarity. + #[serde(skip_serializing_if = "Option::is_none")] + pub embedding_model: Option, +} + +impl Default for SemanticChunkingConfig { + fn default() -> Self { + Self { + similarity_threshold: default_similarity_threshold(), + min_size: default_min_chunk_size(), + max_size: default_max_chunk_size(), + embedding_model: None, + } + } +} + +/// Contextual chunking using LLM to determine boundaries. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct ContextualChunkingConfig { + /// LLM model to use for boundary detection. + pub model: String, + /// Maximum chunk size in characters. + #[serde(default = "default_max_chunk_size")] + pub max_size: usize, + /// Custom prompt for boundary detection. + #[serde(skip_serializing_if = "Option::is_none")] + pub custom_prompt: Option, +} + +impl ContextualChunkingConfig { + /// Creates a new contextual chunking config. + pub fn new(model: impl Into) -> Self { + Self { + model: model.into(), + max_size: default_max_chunk_size(), + custom_prompt: None, + } + } +} + +// Default value functions + +fn default_true() -> bool { + true +} + +fn default_one() -> usize { + 1 +} + +fn default_heading_levels() -> Vec { + vec![1, 2, 3] +} + +fn default_recursive_separators() -> Vec { + vec![ + "\n\n".to_string(), // Paragraphs + "\n".to_string(), // Lines + ". ".to_string(), // Sentences + ", ".to_string(), // Clauses + " ".to_string(), // Words + ] +} + +fn default_similarity_threshold() -> f32 { + 0.5 +} + +fn default_min_chunk_size() -> usize { + 100 +} + +fn default_max_chunk_size() -> usize { + 1000 +} diff --git a/crates/nvisy-runtime/src/node/transformer/config.rs b/crates/nvisy-runtime/src/node/transformer/config.rs new file mode 100644 index 0000000..3ee45eb --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/config.rs @@ -0,0 +1,93 @@ +//! Transformer node configuration types. + +use serde::{Deserialize, Serialize}; + +use super::document::{ + LanguageDetectionConfig, SentimentAnalysisConfig, SummarizationConfig, + TopicClassificationConfig, TranslationConfig, +}; +use super::embedding::GenerateEmbeddingsConfig; +use super::extraction::{ + CitationParsingConfig, EntityRelationExtractionConfig, ExtractTextConfig, + ImageDescriptionConfig, MetadataExtractionConfig, NamedEntityRecognitionConfig, + TableDescriptionConfig, TableToHtmlConfig, +}; +use super::processing::{ + ChunkContentConfig, ConvertFormatConfig, FilterConfig, LlmTransformConfig, MergeConfig, + ValidateConfig, +}; +use super::quality::{DataNormalizationConfig, DeduplicationConfig, TextCleaningConfig}; +use super::routing::{ + ContentTypeRouterConfig, DurationRouterConfig, FileDateRouterConfig, FileNameRouterConfig, + FileSizeRouterConfig, LanguageRouterConfig, PageCountRouterConfig, +}; + +/// Transformer node configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum TransformerConfig { + /// Route by detected content/mime type. + ContentTypeRouter(ContentTypeRouterConfig), + /// Route by file size threshold. + FileSizeRouter(FileSizeRouterConfig), + /// Route by document page count threshold. + PageCountRouter(PageCountRouterConfig), + /// Route by audio/video duration threshold. + DurationRouter(DurationRouterConfig), + /// Route by detected language. + LanguageRouter(LanguageRouterConfig), + /// Route by file date (created/modified). + FileDateRouter(FileDateRouterConfig), + /// Route by filename regex patterns. + FileNameRouter(FileNameRouterConfig), + + /// Detect language of text content. + LanguageDetection(LanguageDetectionConfig), + /// Translate text to target language. + Translation(TranslationConfig), + /// Analyze sentiment of text content. + SentimentAnalysis(SentimentAnalysisConfig), + /// Classify content into topics. + TopicClassification(TopicClassificationConfig), + /// Generate summary of content. + Summarization(SummarizationConfig), + + /// Extract text from documents (PDF, images via OCR). + ExtractText(ExtractTextConfig), + /// Extract metadata from documents. + MetadataExtraction(MetadataExtractionConfig), + /// Extract named entities (people, organizations, locations, dates). + NamedEntityRecognition(NamedEntityRecognitionConfig), + /// Extract relationships between entities. + EntityRelationExtraction(EntityRelationExtractionConfig), + /// Generate descriptions for images. + ImageDescription(ImageDescriptionConfig), + /// Generate descriptions for tables. + TableDescription(TableDescriptionConfig), + /// Convert tables to HTML. + TableToHtml(TableToHtmlConfig), + /// Parse and normalize citations and references. + CitationParsing(CitationParsingConfig), + + /// Normalize data formats (dates, times, units). + DataNormalization(DataNormalizationConfig), + /// Detect and remove duplicate content. + Deduplication(DeduplicationConfig), + /// Clean and correct text (spelling, grammar, formatting, noise removal). + TextCleaning(TextCleaningConfig), + + /// Split content into chunks. + ChunkContent(ChunkContentConfig), + /// Generate vector embeddings. + GenerateEmbeddings(GenerateEmbeddingsConfig), + /// Transform using an LLM. + LlmTransform(LlmTransformConfig), + /// Convert file format. + ConvertFormat(ConvertFormatConfig), + /// Validate content against schema. + Validate(ValidateConfig), + /// Filter data based on conditions. + Filter(FilterConfig), + /// Merge multiple inputs. + Merge(MergeConfig), +} diff --git a/crates/nvisy-runtime/src/node/transformer/document.rs b/crates/nvisy-runtime/src/node/transformer/document.rs new file mode 100644 index 0000000..762a4ad --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/document.rs @@ -0,0 +1,104 @@ +//! Document understanding transformer configurations. + +use serde::{Deserialize, Serialize}; + +/// Configuration for language detection. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct LanguageDetectionConfig { + /// Minimum confidence threshold (0.0 to 1.0). + #[serde(skip_serializing_if = "Option::is_none")] + pub min_confidence: Option, +} + +/// Configuration for translation. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct TranslationConfig { + /// Target language code (e.g., "en", "es", "fr"). + pub target_language: String, + /// Source language code (auto-detect if not specified). + #[serde(skip_serializing_if = "Option::is_none")] + pub source_language: Option, + /// Model to use for translation. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +/// Configuration for sentiment analysis. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct SentimentAnalysisConfig { + /// Granularity of analysis. + #[serde(default)] + pub granularity: SentimentGranularity, + /// Model to use for analysis. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +/// Granularity for sentiment analysis. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SentimentGranularity { + /// Analyze entire document. + #[default] + Document, + /// Analyze each paragraph. + Paragraph, + /// Analyze each sentence. + Sentence, +} + +/// Configuration for topic classification. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct TopicClassificationConfig { + /// Predefined topics to classify into (empty for auto-discovery). + #[serde(default)] + pub topics: Vec, + /// Maximum number of topics to assign. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_topics: Option, + /// Model to use for classification. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +/// Configuration for summarization. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct SummarizationConfig { + /// Target summary length. + #[serde(default)] + pub length: SummaryLength, + /// Summary style. + #[serde(default)] + pub style: SummaryStyle, + /// Model to use for summarization. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +/// Target length for summaries. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SummaryLength { + /// Brief summary (1-2 sentences). + Brief, + /// Standard summary. + #[default] + Standard, + /// Detailed summary. + Detailed, + /// Custom max tokens. + Custom(usize), +} + +/// Style for summaries. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum SummaryStyle { + /// Extractive summary (key sentences). + Extractive, + /// Abstractive summary (rewritten). + #[default] + Abstractive, + /// Bullet points. + BulletPoints, +} diff --git a/crates/nvisy-runtime/src/node/transformer/embedding.rs b/crates/nvisy-runtime/src/node/transformer/embedding.rs new file mode 100644 index 0000000..51b3c6e --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/embedding.rs @@ -0,0 +1,270 @@ +//! Embedding generation configurations. + +use serde::{Deserialize, Serialize}; + +use super::chunking::ChunkingStrategy; + +/// Configuration for embedding generation. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct GenerateEmbeddingsConfig { + /// Embedding provider and model. + pub provider: EmbeddingProvider, + /// Chunking strategy (if content should be chunked before embedding). + #[serde(skip_serializing_if = "Option::is_none")] + pub chunking: Option, + /// Batch size for embedding requests. + #[serde(skip_serializing_if = "Option::is_none")] + pub batch_size: Option, +} + +impl GenerateEmbeddingsConfig { + /// Creates a new embedding config with the given provider. + pub fn new(provider: EmbeddingProvider) -> Self { + Self { + provider, + chunking: None, + batch_size: None, + } + } + + /// Sets the chunking strategy. + pub fn with_chunking(mut self, chunking: ChunkingStrategy) -> Self { + self.chunking = Some(chunking); + self + } + + /// Sets the batch size. + pub fn with_batch_size(mut self, batch_size: usize) -> Self { + self.batch_size = Some(batch_size); + self + } +} + +/// Embedding provider configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum EmbeddingProvider { + /// OpenAI embeddings. + OpenAi(OpenAiEmbeddingConfig), + /// Ollama local embeddings. + Ollama(OllamaEmbeddingConfig), + /// Cohere embeddings. + Cohere(CohereEmbeddingConfig), + /// Google Gemini embeddings. + Gemini(GeminiEmbeddingConfig), +} + +/// OpenAI embedding configuration. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct OpenAiEmbeddingConfig { + /// Model to use. + #[serde(default)] + pub model: OpenAiEmbeddingModel, + /// Embedding dimensions (for models that support it). + #[serde(skip_serializing_if = "Option::is_none")] + pub dimensions: Option, +} + +impl OpenAiEmbeddingConfig { + /// Creates a new OpenAI embedding config with the given model. + pub fn new(model: OpenAiEmbeddingModel) -> Self { + Self { + model, + dimensions: None, + } + } + + /// Sets custom dimensions. + pub fn with_dimensions(mut self, dimensions: usize) -> Self { + self.dimensions = Some(dimensions); + self + } +} + +/// OpenAI embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum OpenAiEmbeddingModel { + /// text-embedding-3-small (1536 dimensions, cheapest). + #[default] + TextEmbedding3Small, + /// text-embedding-3-large (3072 dimensions, best quality). + TextEmbedding3Large, + /// text-embedding-ada-002 (1536 dimensions, legacy). + TextEmbeddingAda002, +} + +impl OpenAiEmbeddingModel { + /// Returns the model identifier string. + pub fn as_str(&self) -> &'static str { + match self { + Self::TextEmbedding3Small => "text-embedding-3-small", + Self::TextEmbedding3Large => "text-embedding-3-large", + Self::TextEmbeddingAda002 => "text-embedding-ada-002", + } + } + + /// Returns the default dimensions for this model. + pub fn default_dimensions(&self) -> usize { + match self { + Self::TextEmbedding3Small => 1536, + Self::TextEmbedding3Large => 3072, + Self::TextEmbeddingAda002 => 1536, + } + } +} + +/// Ollama embedding configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OllamaEmbeddingConfig { + /// Model name. + #[serde(default = "default_ollama_model")] + pub model: String, + /// Ollama server base URL. + #[serde(skip_serializing_if = "Option::is_none")] + pub base_url: Option, +} + +impl Default for OllamaEmbeddingConfig { + fn default() -> Self { + Self { + model: default_ollama_model(), + base_url: None, + } + } +} + +impl OllamaEmbeddingConfig { + /// Creates a new Ollama embedding config with the given model. + pub fn new(model: impl Into) -> Self { + Self { + model: model.into(), + base_url: None, + } + } + + /// Sets the base URL. + pub fn with_base_url(mut self, base_url: impl Into) -> Self { + self.base_url = Some(base_url.into()); + self + } +} + +/// Cohere embedding configuration. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct CohereEmbeddingConfig { + /// Model to use. + #[serde(default)] + pub model: CohereEmbeddingModel, + /// Input type for embeddings. + #[serde(default)] + pub input_type: CohereInputType, +} + +impl CohereEmbeddingConfig { + /// Creates a new Cohere embedding config with the given model. + pub fn new(model: CohereEmbeddingModel) -> Self { + Self { + model, + input_type: CohereInputType::default(), + } + } + + /// Sets the input type. + pub fn with_input_type(mut self, input_type: CohereInputType) -> Self { + self.input_type = input_type; + self + } +} + +/// Cohere embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum CohereEmbeddingModel { + /// embed-english-v3.0 (1024 dimensions). + #[default] + EmbedEnglishV3, + /// embed-multilingual-v3.0 (1024 dimensions). + EmbedMultilingualV3, + /// embed-english-light-v3.0 (384 dimensions). + EmbedEnglishLightV3, + /// embed-multilingual-light-v3.0 (384 dimensions). + EmbedMultilingualLightV3, +} + +impl CohereEmbeddingModel { + /// Returns the model identifier string. + pub fn as_str(&self) -> &'static str { + match self { + Self::EmbedEnglishV3 => "embed-english-v3.0", + Self::EmbedMultilingualV3 => "embed-multilingual-v3.0", + Self::EmbedEnglishLightV3 => "embed-english-light-v3.0", + Self::EmbedMultilingualLightV3 => "embed-multilingual-light-v3.0", + } + } + + /// Returns the default dimensions for this model. + pub fn default_dimensions(&self) -> usize { + match self { + Self::EmbedEnglishV3 | Self::EmbedMultilingualV3 => 1024, + Self::EmbedEnglishLightV3 | Self::EmbedMultilingualLightV3 => 384, + } + } +} + +/// Cohere input types. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CohereInputType { + /// For search queries. + SearchQuery, + /// For documents to be searched. + #[default] + SearchDocument, + /// For classification tasks. + Classification, + /// For clustering tasks. + Clustering, +} + +/// Google Gemini embedding configuration. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct GeminiEmbeddingConfig { + /// Model to use. + #[serde(default)] + pub model: GeminiEmbeddingModel, +} + +impl GeminiEmbeddingConfig { + /// Creates a new Gemini embedding config with the given model. + pub fn new(model: GeminiEmbeddingModel) -> Self { + Self { model } + } +} + +/// Gemini embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum GeminiEmbeddingModel { + /// text-embedding-004 (768 dimensions). + #[default] + TextEmbedding004, +} + +impl GeminiEmbeddingModel { + /// Returns the model identifier string. + pub fn as_str(&self) -> &'static str { + match self { + Self::TextEmbedding004 => "text-embedding-004", + } + } + + /// Returns the default dimensions for this model. + pub fn default_dimensions(&self) -> usize { + 768 + } +} + +fn default_ollama_model() -> String { + "nomic-embed-text".to_string() +} diff --git a/crates/nvisy-runtime/src/node/transformer/extraction.rs b/crates/nvisy-runtime/src/node/transformer/extraction.rs new file mode 100644 index 0000000..180682f --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/extraction.rs @@ -0,0 +1,136 @@ +//! Content extraction transformer configurations. + +use serde::{Deserialize, Serialize}; + +/// Configuration for text extraction. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct ExtractTextConfig { + /// Enable OCR for images. + #[serde(default)] + pub ocr_enabled: bool, + /// OCR language codes. + #[serde(skip_serializing_if = "Option::is_none")] + pub ocr_languages: Option>, +} + +/// Configuration for metadata extraction. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct MetadataExtractionConfig { + /// Specific fields to extract (empty for all available). + #[serde(default)] + pub fields: Vec, +} + +/// Configuration for named entity recognition. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct NamedEntityRecognitionConfig { + /// Entity types to extract (empty for all). + #[serde(default)] + pub entity_types: Vec, + /// Model to use for NER. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +/// Types of named entities. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum EntityType { + Person, + Organization, + Location, + Date, + Time, + Money, + Percent, + Product, + Event, + WorkOfArt, + Law, + Language, +} + +/// Configuration for entity relation extraction. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct EntityRelationExtractionConfig { + /// Relation types to extract (empty for all). + #[serde(default)] + pub relation_types: Vec, + /// Model to use for extraction. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, + /// Include confidence scores. + #[serde(default)] + pub include_confidence: bool, +} + +/// Configuration for image description. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct ImageDescriptionConfig { + /// Detail level of description. + #[serde(default)] + pub detail_level: DetailLevel, + /// Model to use for description. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +/// Detail level for descriptions. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DetailLevel { + /// Brief, concise description. + Brief, + /// Standard level of detail. + #[default] + Standard, + /// Comprehensive, detailed description. + Detailed, +} + +/// Configuration for table description. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct TableDescriptionConfig { + /// Include column statistics. + #[serde(default)] + pub include_statistics: bool, + /// Model to use for description. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +/// Configuration for table to HTML conversion. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct TableToHtmlConfig { + /// Include CSS styling. + #[serde(default)] + pub include_styles: bool, + /// Preserve cell formatting. + #[serde(default = "default_true")] + pub preserve_formatting: bool, +} + +/// Configuration for citation parsing. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct CitationParsingConfig { + /// Output format for normalized citations. + #[serde(default)] + pub output_format: CitationFormat, +} + +/// Citation output formats. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CitationFormat { + /// BibTeX format. + #[default] + Bibtex, + /// CSL-JSON format. + CslJson, + /// RIS format. + Ris, +} + +fn default_true() -> bool { + true +} diff --git a/crates/nvisy-runtime/src/node/transformer/mod.rs b/crates/nvisy-runtime/src/node/transformer/mod.rs new file mode 100644 index 0000000..9c1d2d3 --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/mod.rs @@ -0,0 +1,88 @@ +//! Transformer node types for processing and transforming data. + +mod chunking; +mod config; +mod document; +mod embedding; +mod extraction; +mod processing; +mod quality; +mod routing; + +pub use chunking::{ + CharacterChunkingConfig, ChunkingStrategy, ContextualChunkingConfig, PageChunkingConfig, + ParagraphChunkingConfig, RecursiveChunkingConfig, SemanticChunkingConfig, + SentenceChunkingConfig, TitleChunkingConfig, +}; +pub use config::TransformerConfig; +pub use document::{ + LanguageDetectionConfig, SentimentAnalysisConfig, SentimentGranularity, SummarizationConfig, + SummaryLength, SummaryStyle, TopicClassificationConfig, TranslationConfig, +}; +pub use embedding::{ + CohereEmbeddingConfig, CohereEmbeddingModel, CohereInputType, EmbeddingProvider, + GeminiEmbeddingConfig, GeminiEmbeddingModel, GenerateEmbeddingsConfig, OllamaEmbeddingConfig, + OpenAiEmbeddingConfig, OpenAiEmbeddingModel, +}; +pub use extraction::{ + CitationFormat, CitationParsingConfig, DetailLevel, EntityRelationExtractionConfig, EntityType, + ExtractTextConfig, ImageDescriptionConfig, MetadataExtractionConfig, + NamedEntityRecognitionConfig, TableDescriptionConfig, TableToHtmlConfig, +}; +pub use processing::{ + ChunkContentConfig, ChunkContentConfigBuilder, ConvertFormatConfig, FilterConfig, + LlmTransformConfig, LlmTransformConfigBuilder, MergeConfig, MergeStrategy, ValidateConfig, +}; +pub use quality::{ + DataNormalizationConfig, DateTimeNormalization, DeduplicationConfig, DeduplicationStrategy, + NormalizationType, TextCleaningConfig, TextCleaningOperation, UnitMapping, UnitNormalization, + UnitSystem, +}; +pub use routing::{ + ContentTypePort, ContentTypeRouterConfig, DateField, DurationRouterConfig, + FileDateRouterConfig, FileNamePattern, FileNameRouterConfig, FileSizeRouterConfig, + LanguageRouterConfig, MimeMapping, PageCountRouterConfig, +}; +use serde::{Deserialize, Serialize}; + +/// A data transformer node that processes or transforms data. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct TransformerNode { + /// Display name of the transformer. + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + /// Description of what this transformer does. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Transformer configuration. + pub config: TransformerConfig, +} + +impl TransformerNode { + /// Creates a new transformer node. + pub fn new(config: TransformerConfig) -> Self { + Self { + name: None, + description: None, + config, + } + } + + /// Sets the display name. + pub fn with_name(mut self, name: impl Into) -> Self { + self.name = Some(name.into()); + self + } + + /// Sets the description. + pub fn with_description(mut self, description: impl Into) -> Self { + self.description = Some(description.into()); + self + } +} + +impl From for TransformerNode { + fn from(config: TransformerConfig) -> Self { + Self::new(config) + } +} diff --git a/crates/nvisy-runtime/src/node/transformer/processing.rs b/crates/nvisy-runtime/src/node/transformer/processing.rs new file mode 100644 index 0000000..4ad0190 --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/processing.rs @@ -0,0 +1,131 @@ +//! Data processing transformer configurations. + +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +/// Configuration for content chunking (simple character-based). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Builder)] +#[builder( + name = "ChunkContentConfigBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] +pub struct ChunkContentConfig { + /// Maximum chunk size in characters. + pub max_chunk_size: usize, + /// Overlap between chunks in characters. + #[serde(default)] + #[builder(default)] + pub overlap: usize, +} + +impl ChunkContentConfigBuilder { + fn validate(&self) -> Result<(), String> { + if self.max_chunk_size.is_some_and(|s| s == 0) { + return Err("max_chunk_size must be greater than 0".into()); + } + if let (Some(max), Some(overlap)) = (&self.max_chunk_size, &self.overlap) + && overlap >= max + { + return Err("overlap must be less than max_chunk_size".into()); + } + Ok(()) + } +} + +/// Configuration for LLM transformation. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Builder)] +#[builder( + name = "LlmTransformConfigBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] +pub struct LlmTransformConfig { + /// Model identifier. + pub model: String, + /// System prompt. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub system_prompt: Option, + /// User prompt template. + pub prompt_template: String, + /// Temperature for generation. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub temperature: Option, + /// Maximum tokens to generate. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub max_tokens: Option, +} + +impl LlmTransformConfigBuilder { + fn validate(&self) -> Result<(), String> { + if self.model.as_ref().is_some_and(|m| m.is_empty()) { + return Err("model cannot be empty".into()); + } + if self.prompt_template.as_ref().is_some_and(|p| p.is_empty()) { + return Err("prompt_template cannot be empty".into()); + } + if let Some(Some(temp)) = &self.temperature + && (*temp < 0.0 || *temp > 2.0) + { + return Err("temperature must be between 0.0 and 2.0".into()); + } + Ok(()) + } +} + +/// Configuration for format conversion. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct ConvertFormatConfig { + /// Target format. + pub target_format: String, + /// Format-specific options. + #[serde(default)] + pub options: serde_json::Value, +} + +/// Configuration for validation. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct ValidateConfig { + /// JSON schema for validation. + pub schema: serde_json::Value, + /// Whether to fail on validation error. + #[serde(default = "default_true")] + pub fail_on_error: bool, +} + +/// Configuration for filtering. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct FilterConfig { + /// Filter expression. + pub expression: String, +} + +/// Configuration for merging. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct MergeConfig { + /// Merge strategy. + #[serde(default)] + pub strategy: MergeStrategy, +} + +/// Merge strategy. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum MergeStrategy { + /// Concatenate all inputs. + #[default] + Concatenate, + /// Interleave inputs. + Interleave, + /// Take first non-empty input. + First, +} + +fn default_true() -> bool { + true +} diff --git a/crates/nvisy-runtime/src/node/transformer/quality.rs b/crates/nvisy-runtime/src/node/transformer/quality.rs new file mode 100644 index 0000000..7aeef7c --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/quality.rs @@ -0,0 +1,147 @@ +//! Data quality and normalization transformer configurations. + +use serde::{Deserialize, Serialize}; + +/// Configuration for data normalization (dates, times, units). +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct DataNormalizationConfig { + /// Types of normalization to apply. + #[serde(default)] + pub normalizations: Vec, +} + +/// Types of data normalization. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum NormalizationType { + /// Normalize date and time formats. + DateTime(DateTimeNormalization), + /// Convert measurement units. + Unit(UnitNormalization), +} + +/// Date and time normalization settings. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct DateTimeNormalization { + /// Target format (ISO 8601 by default). + #[serde(default = "default_datetime_format")] + pub target_format: String, + /// Target timezone (UTC by default). + #[serde(skip_serializing_if = "Option::is_none")] + pub target_timezone: Option, +} + +impl Default for DateTimeNormalization { + fn default() -> Self { + Self { + target_format: default_datetime_format(), + target_timezone: None, + } + } +} + +/// Unit normalization settings. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct UnitNormalization { + /// Target unit system. + #[serde(default)] + pub target_system: UnitSystem, + /// Specific unit mappings (e.g., "miles" -> "kilometers"). + #[serde(default)] + pub conversions: Vec, +} + +/// Unit systems for conversion. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum UnitSystem { + /// International System of Units. + #[default] + Si, + /// Imperial/US customary units. + Imperial, +} + +/// Mapping for unit conversion. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct UnitMapping { + /// Source unit. + pub from: String, + /// Target unit. + pub to: String, +} + +/// Configuration for deduplication. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct DeduplicationConfig { + /// Similarity threshold for considering duplicates (0.0 to 1.0). + #[serde(default = "default_similarity_threshold")] + pub similarity_threshold: f32, + /// Deduplication strategy. + #[serde(default)] + pub strategy: DeduplicationStrategy, +} + +impl Default for DeduplicationConfig { + fn default() -> Self { + Self { + similarity_threshold: default_similarity_threshold(), + strategy: DeduplicationStrategy::default(), + } + } +} + +/// Deduplication strategies. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DeduplicationStrategy { + /// Keep first occurrence. + #[default] + KeepFirst, + /// Keep last occurrence. + KeepLast, + /// Keep longest version. + KeepLongest, + /// Merge duplicates. + Merge, +} + +/// Configuration for text cleaning and correction. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct TextCleaningConfig { + /// Language code for language-specific rules. + #[serde(skip_serializing_if = "Option::is_none")] + pub language: Option, + /// Cleaning operations to apply. + #[serde(default)] + pub operations: Vec, + /// Model to use for LLM-based cleaning. + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, +} + +/// Text cleaning operations. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TextCleaningOperation { + /// Fix spelling errors. + FixSpelling, + /// Fix grammar errors. + FixGrammar, + /// Normalize whitespace (remove extra spaces, normalize line breaks). + NormalizeWhitespace, + /// Normalize unicode (NFC normalization). + NormalizeUnicode, + /// Remove HTML tags. + StripHtml, + /// Fix common OCR errors. + FixOcrErrors, +} + +fn default_datetime_format() -> String { + "%Y-%m-%dT%H:%M:%S%.3fZ".to_string() +} + +fn default_similarity_threshold() -> f32 { + 0.9 +} diff --git a/crates/nvisy-runtime/src/node/transformer/routing.rs b/crates/nvisy-runtime/src/node/transformer/routing.rs new file mode 100644 index 0000000..9bd95a9 --- /dev/null +++ b/crates/nvisy-runtime/src/node/transformer/routing.rs @@ -0,0 +1,134 @@ +//! Routing transformer configurations. + +use serde::{Deserialize, Serialize}; + +/// Configuration for content type routing. +/// +/// Routes content based on detected mime type (magic bytes + extension fallback). +/// Output ports: `text`, `image`, `audio`, `video`, `document`, `default`. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct ContentTypeRouterConfig { + /// Custom mime type to port mappings (overrides defaults). + #[serde(default)] + pub mappings: Vec, +} + +/// Custom mime type to port mapping. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MimeMapping { + /// Mime type pattern (e.g., "application/pdf", "image/*"). + pub mime: String, + /// Target port. + pub port: ContentTypePort, +} + +/// Output ports for content type routing. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ContentTypePort { + Text, + Image, + Audio, + Video, + Document, + Default, +} + +/// Configuration for file size routing. +/// +/// Routes based on file size threshold. +/// Output ports: `true` (above threshold), `false` (below threshold), `default`. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct FileSizeRouterConfig { + /// Size threshold in bytes. + pub threshold_bytes: u64, +} + +/// Configuration for page count routing. +/// +/// Routes documents based on page count threshold. +/// Output ports: `true` (above threshold), `false` (below threshold), `default`. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PageCountRouterConfig { + /// Page count threshold. + pub threshold_pages: u32, +} + +/// Configuration for duration routing. +/// +/// Routes audio/video based on duration threshold. +/// Output ports: `true` (above threshold), `false` (below threshold), `default`. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct DurationRouterConfig { + /// Duration threshold in seconds. + pub threshold_seconds: u64, +} + +/// Configuration for language routing. +/// +/// Routes based on detected content language. +/// Output ports: configured language codes + `multiple` + `default`. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct LanguageRouterConfig { + /// Language codes to route (e.g., "en", "es", "fr"). + #[serde(default)] + pub languages: Vec, + /// Minimum confidence threshold (0.0 to 1.0) to consider a language detected. + #[serde(default = "default_confidence")] + pub min_confidence: f32, + /// Minimum percentage of content (0.0 to 1.0) for a language to be considered present. + #[serde(default = "default_min_percentage")] + pub min_percentage: f32, +} + +/// Configuration for file date routing. +/// +/// Routes based on file date threshold. +/// Output ports: `true` (newer than threshold), `false` (older than threshold), `default`. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct FileDateRouterConfig { + /// Which date field to use. + #[serde(default)] + pub date_field: DateField, + /// Threshold as ISO 8601 datetime or relative duration (e.g., "7d", "30d", "1y"). + pub threshold: String, +} + +/// Date field to use for routing. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DateField { + /// File creation date. + #[default] + Created, + /// File modification date. + Modified, +} + +/// Configuration for filename routing. +/// +/// Routes based on regex pattern matching on filename. +/// Output ports: user-defined ports from pattern mappings + `default`. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct FileNameRouterConfig { + /// Regex pattern to port mappings (evaluated in order, first match wins). + #[serde(default)] + pub patterns: Vec, +} + +/// Filename pattern to port mapping. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct FileNamePattern { + /// Regex pattern to match against filename. + pub regex: String, + /// Target port name. + pub port: String, +} + +fn default_confidence() -> f32 { + 0.8 +} + +fn default_min_percentage() -> f32 { + 0.1 +} diff --git a/crates/nvisy-runtime/src/prelude.rs b/crates/nvisy-runtime/src/prelude.rs deleted file mode 100644 index 0e390fb..0000000 --- a/crates/nvisy-runtime/src/prelude.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! Prelude module for convenient imports. -//! -//! This module re-exports commonly used types for ergonomic imports: -//! -//! ```rust -//! use nvisy_workflow::prelude::*; -//! ``` - -pub use crate::engine::{Engine, EngineConfig}; -pub use crate::error::{WorkflowError, WorkflowResult}; -pub use crate::graph::{Edge, WorkflowGraph}; -pub use crate::node::{ - NodeData, NodeId, SinkKind, SinkNode, SourceKind, SourceNode, TransformerKind, TransformerNode, -}; diff --git a/crates/nvisy-runtime/src/runtime/mod.rs b/crates/nvisy-runtime/src/runtime/mod.rs index 53ddfc4..d4d9e72 100644 --- a/crates/nvisy-runtime/src/runtime/mod.rs +++ b/crates/nvisy-runtime/src/runtime/mod.rs @@ -4,13 +4,11 @@ mod config; mod service; pub use config::RuntimeConfig; -pub use service::RuntimeService; - // Re-export commonly used types from the runtime crates pub use nvisy_rt_core as rt_core; pub use nvisy_rt_engine as rt_engine; - pub use nvisy_rt_engine::{ BoundingBox, Capabilities, Document, DocumentFormat, Engine, EngineConfig, FormatRegistry, LoadedDocument, Point, Region, RegionId, RegionKind, doc, }; +pub use service::RuntimeService; diff --git a/crates/nvisy-vector/Cargo.toml b/crates/nvisy-vector/Cargo.toml new file mode 100644 index 0000000..281d574 --- /dev/null +++ b/crates/nvisy-vector/Cargo.toml @@ -0,0 +1,48 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-vector" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +# Internal crates +nvisy-core = { workspace = true } + +# Async runtime +tokio = { workspace = true, features = ["rt", "sync"] } +futures = { workspace = true, features = [] } + +# HTTP client (for REST-based backends) +reqwest = { workspace = true, features = ["json"] } + +# Observability +tracing = { workspace = true, features = [] } + +# (De)serialization +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true, features = [] } + +# Derive macros & utilities +thiserror = { workspace = true, features = [] } +derive_more = { workspace = true, features = ["debug", "display", "from", "into"] } +async-trait = { workspace = true, features = [] } + +# Vector store clients +qdrant-client = "1.13" + +[dev-dependencies] +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-vector/README.md b/crates/nvisy-vector/README.md new file mode 100644 index 0000000..80e64e1 --- /dev/null +++ b/crates/nvisy-vector/README.md @@ -0,0 +1,42 @@ +# nvisy-vector + +Vector store abstraction layer for Nvisy Server. + +## Supported Backends + +- **Qdrant** - High-performance vector similarity search engine +- **Milvus** - Open-source vector database for AI applications +- **Pinecone** - Managed vector database service +- **pgvector** - PostgreSQL extension for vector similarity search + +## Features + +Enable specific backends via Cargo features: + +```toml +[dependencies] +nvisy-vector = { version = "0.1", features = ["qdrant"] } +``` + +Available features: +- `qdrant` - Qdrant support +- `milvus` - Milvus support +- `pinecone` - Pinecone support +- `pgvector` - PostgreSQL pgvector support +- `all-backends` - All backends + +## Usage + +```rust +use nvisy_vector::{VectorStore, VectorStoreConfig}; + +// Create a store from configuration +let config = VectorStoreConfig::Qdrant(QdrantConfig::new("http://localhost:6334")); +let store = VectorStore::new(config).await?; + +// Upsert vectors +store.upsert("collection", vectors).await?; + +// Search for similar vectors +let results = store.search("collection", query_vector, 10).await?; +``` diff --git a/crates/nvisy-vector/src/config.rs b/crates/nvisy-vector/src/config.rs new file mode 100644 index 0000000..1c3983c --- /dev/null +++ b/crates/nvisy-vector/src/config.rs @@ -0,0 +1,36 @@ +//! Vector store configuration types. + +use serde::{Deserialize, Serialize}; + +// Re-export configs from backend modules +pub use crate::milvus::MilvusConfig; +pub use crate::pgvector::{PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType}; +pub use crate::pinecone::PineconeConfig; +pub use crate::qdrant::QdrantConfig; + +/// Vector store backend configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +#[non_exhaustive] +pub enum VectorStoreConfig { + /// Qdrant vector database. + Qdrant(QdrantConfig), + /// Milvus vector database. + Milvus(MilvusConfig), + /// Pinecone managed vector database. + Pinecone(PineconeConfig), + /// PostgreSQL with pgvector extension. + PgVector(PgVectorConfig), +} + +impl VectorStoreConfig { + /// Returns the backend name as a static string. + pub fn backend_name(&self) -> &'static str { + match self { + Self::Qdrant(_) => "qdrant", + Self::Milvus(_) => "milvus", + Self::Pinecone(_) => "pinecone", + Self::PgVector(_) => "pgvector", + } + } +} diff --git a/crates/nvisy-vector/src/error.rs b/crates/nvisy-vector/src/error.rs new file mode 100644 index 0000000..883168c --- /dev/null +++ b/crates/nvisy-vector/src/error.rs @@ -0,0 +1,99 @@ +//! Vector store error types. + +use thiserror::Error; + +/// Result type for vector store operations. +pub type VectorResult = Result; + +/// Vector store errors. +#[derive(Debug, Error)] +pub enum VectorError { + /// Connection error. + #[error("connection error: {0}")] + Connection(String), + + /// Collection not found. + #[error("collection not found: {0}")] + CollectionNotFound(String), + + /// Invalid configuration. + #[error("invalid configuration: {0}")] + InvalidConfig(String), + + /// Authentication error. + #[error("authentication error: {0}")] + Authentication(String), + + /// Operation timeout. + #[error("operation timed out: {0}")] + Timeout(String), + + /// Vector dimension mismatch. + #[error("dimension mismatch: expected {expected}, got {actual}")] + DimensionMismatch { expected: usize, actual: usize }, + + /// Backend-specific error. + #[error("backend error: {0}")] + Backend(String), + + /// Serialization/deserialization error. + #[error("serialization error: {0}")] + Serialization(String), + + /// Feature not enabled. + #[error("feature not enabled: {0}")] + FeatureNotEnabled(String), +} + +impl VectorError { + /// Creates a connection error. + pub fn connection(msg: impl Into) -> Self { + Self::Connection(msg.into()) + } + + /// Creates a collection not found error. + pub fn collection_not_found(name: impl Into) -> Self { + Self::CollectionNotFound(name.into()) + } + + /// Creates an invalid config error. + pub fn invalid_config(msg: impl Into) -> Self { + Self::InvalidConfig(msg.into()) + } + + /// Creates an authentication error. + pub fn authentication(msg: impl Into) -> Self { + Self::Authentication(msg.into()) + } + + /// Creates a timeout error. + pub fn timeout(msg: impl Into) -> Self { + Self::Timeout(msg.into()) + } + + /// Creates a dimension mismatch error. + pub fn dimension_mismatch(expected: usize, actual: usize) -> Self { + Self::DimensionMismatch { expected, actual } + } + + /// Creates a backend error. + pub fn backend(msg: impl Into) -> Self { + Self::Backend(msg.into()) + } + + /// Creates a serialization error. + pub fn serialization(msg: impl Into) -> Self { + Self::Serialization(msg.into()) + } + + /// Creates a feature not enabled error. + pub fn feature_not_enabled(feature: impl Into) -> Self { + Self::FeatureNotEnabled(feature.into()) + } +} + +impl From for VectorError { + fn from(err: serde_json::Error) -> Self { + Self::serialization(err.to_string()) + } +} diff --git a/crates/nvisy-vector/src/lib.rs b/crates/nvisy-vector/src/lib.rs new file mode 100644 index 0000000..d39e619 --- /dev/null +++ b/crates/nvisy-vector/src/lib.rs @@ -0,0 +1,22 @@ +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +pub mod milvus; +pub mod pgvector; +pub mod pinecone; +pub mod qdrant; + +mod config; +mod error; +mod store; + +pub use config::{ + MilvusConfig, PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType, PineconeConfig, + QdrantConfig, VectorStoreConfig, +}; +pub use error::{VectorError, VectorResult}; +pub use store::{SearchOptions, SearchResult, VectorData, VectorStore, VectorStoreBackend}; + +/// Tracing target for vector store operations. +pub const TRACING_TARGET: &str = "nvisy_vector"; diff --git a/crates/nvisy-vector/src/milvus/backend.rs b/crates/nvisy-vector/src/milvus/backend.rs new file mode 100644 index 0000000..06aab94 --- /dev/null +++ b/crates/nvisy-vector/src/milvus/backend.rs @@ -0,0 +1,124 @@ +//! Milvus backend implementation. +//! +//! This is a stub implementation. The Milvus SDK API differs significantly +//! from the interface we designed. A full implementation would require +//! adapting to the actual milvus-sdk-rust API. + +use async_trait::async_trait; + +use super::MilvusConfig; +use crate::TRACING_TARGET; +use crate::error::{VectorError, VectorResult}; +use crate::store::{SearchOptions, SearchResult, VectorData, VectorStoreBackend}; + +/// Milvus backend implementation. +pub struct MilvusBackend { + #[allow(dead_code)] + config: MilvusConfig, +} + +impl MilvusBackend { + /// Creates a new Milvus backend. + pub async fn new(config: &MilvusConfig) -> VectorResult { + tracing::debug!( + target: TRACING_TARGET, + host = %config.host, + port = %config.port, + "Milvus backend initialized (stub implementation)" + ); + + Ok(Self { + config: config.clone(), + }) + } +} + +#[async_trait] +impl VectorStoreBackend for MilvusBackend { + async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { + tracing::warn!( + target: TRACING_TARGET, + collection = %name, + dimensions = %dimensions, + "Milvus create_collection is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Milvus backend is not yet implemented", + )) + } + + async fn delete_collection(&self, name: &str) -> VectorResult<()> { + tracing::warn!( + target: TRACING_TARGET, + collection = %name, + "Milvus delete_collection is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Milvus backend is not yet implemented", + )) + } + + async fn collection_exists(&self, name: &str) -> VectorResult { + tracing::warn!( + target: TRACING_TARGET, + collection = %name, + "Milvus collection_exists is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Milvus backend is not yet implemented", + )) + } + + async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { + tracing::warn!( + target: TRACING_TARGET, + collection = %collection, + count = %vectors.len(), + "Milvus upsert is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Milvus backend is not yet implemented", + )) + } + + async fn search( + &self, + collection: &str, + _query: Vec, + _limit: usize, + _options: SearchOptions, + ) -> VectorResult> { + tracing::warn!( + target: TRACING_TARGET, + collection = %collection, + "Milvus search is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Milvus backend is not yet implemented", + )) + } + + async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { + tracing::warn!( + target: TRACING_TARGET, + collection = %collection, + count = %ids.len(), + "Milvus delete is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Milvus backend is not yet implemented", + )) + } + + async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { + tracing::warn!( + target: TRACING_TARGET, + collection = %collection, + count = %ids.len(), + "Milvus get is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Milvus backend is not yet implemented", + )) + } +} diff --git a/crates/nvisy-vector/src/milvus/config.rs b/crates/nvisy-vector/src/milvus/config.rs new file mode 100644 index 0000000..945478a --- /dev/null +++ b/crates/nvisy-vector/src/milvus/config.rs @@ -0,0 +1,82 @@ +//! Milvus configuration. + +use serde::{Deserialize, Serialize}; + +/// Milvus configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MilvusConfig { + /// Milvus server host. + pub host: String, + /// Milvus server port. + #[serde(default = "default_milvus_port")] + pub port: u16, + /// Username for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub username: Option, + /// Password for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub password: Option, + /// Database name. + #[serde(skip_serializing_if = "Option::is_none")] + pub database: Option, + /// Default collection name. + #[serde(skip_serializing_if = "Option::is_none")] + pub collection: Option, + /// Vector dimensions. + #[serde(skip_serializing_if = "Option::is_none")] + pub dimensions: Option, +} + +impl MilvusConfig { + /// Creates a new Milvus configuration. + pub fn new(host: impl Into) -> Self { + Self { + host: host.into(), + port: default_milvus_port(), + username: None, + password: None, + database: None, + collection: None, + dimensions: None, + } + } + + /// Sets the port. + pub fn with_port(mut self, port: u16) -> Self { + self.port = port; + self + } + + /// Sets the credentials. + pub fn with_credentials( + mut self, + username: impl Into, + password: impl Into, + ) -> Self { + self.username = Some(username.into()); + self.password = Some(password.into()); + self + } + + /// Sets the database name. + pub fn with_database(mut self, database: impl Into) -> Self { + self.database = Some(database.into()); + self + } + + /// Sets the default collection. + pub fn with_collection(mut self, collection: impl Into) -> Self { + self.collection = Some(collection.into()); + self + } + + /// Sets the vector dimensions. + pub fn with_dimensions(mut self, dimensions: usize) -> Self { + self.dimensions = Some(dimensions); + self + } +} + +fn default_milvus_port() -> u16 { + 19530 +} diff --git a/crates/nvisy-vector/src/milvus/mod.rs b/crates/nvisy-vector/src/milvus/mod.rs new file mode 100644 index 0000000..dc3b2f0 --- /dev/null +++ b/crates/nvisy-vector/src/milvus/mod.rs @@ -0,0 +1,7 @@ +//! Milvus vector store backend. + +mod backend; +mod config; + +pub use backend::MilvusBackend; +pub use config::MilvusConfig; diff --git a/crates/nvisy-vector/src/pgvector/backend.rs b/crates/nvisy-vector/src/pgvector/backend.rs new file mode 100644 index 0000000..66262a7 --- /dev/null +++ b/crates/nvisy-vector/src/pgvector/backend.rs @@ -0,0 +1,278 @@ +//! pgvector backend implementation. + +use std::collections::HashMap; + +use async_trait::async_trait; + +use super::{PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType}; +use crate::TRACING_TARGET; +use crate::error::{VectorError, VectorResult}; +use crate::store::{SearchOptions, SearchResult, VectorData, VectorStoreBackend}; + +/// pgvector backend implementation. +/// +/// This backend uses raw SQL queries via the pgvector extension. +/// It's designed to work with any PostgreSQL async driver. +pub struct PgVectorBackend { + config: PgVectorConfig, + // In a real implementation, this would hold a connection pool + // For now, we store the connection URL for documentation purposes + #[allow(dead_code)] + connection_url: String, +} + +impl PgVectorBackend { + /// Creates a new pgvector backend. + pub async fn new(config: &PgVectorConfig) -> VectorResult { + // In a real implementation, we would: + // 1. Create a connection pool + // 2. Verify pgvector extension is installed + // 3. Test the connection + + tracing::debug!( + target: TRACING_TARGET, + table = %config.table, + dimensions = %config.dimensions, + "Initialized pgvector backend" + ); + + Ok(Self { + config: config.clone(), + connection_url: config.connection_url.clone(), + }) + } + + /// Generates SQL for creating the vectors table. + pub fn create_table_sql(&self, name: &str, dimensions: usize) -> String { + format!( + r#" + CREATE TABLE IF NOT EXISTS {} ( + id VARCHAR(256) PRIMARY KEY, + vector vector({}), + metadata JSONB DEFAULT '{{}}'::jsonb, + created_at TIMESTAMPTZ DEFAULT NOW() + ) + "#, + name, dimensions + ) + } + + /// Generates SQL for creating the vector index. + pub fn create_index_sql(&self, name: &str) -> String { + let index_name = format!("{}_vector_idx", name); + let operator = self.config.distance_metric.operator(); + + match self.config.index_type { + PgVectorIndexType::IvfFlat => { + format!( + r#" + CREATE INDEX IF NOT EXISTS {} ON {} + USING ivfflat (vector {}) + WITH (lists = 100) + "#, + index_name, name, operator + ) + } + PgVectorIndexType::Hnsw => { + format!( + r#" + CREATE INDEX IF NOT EXISTS {} ON {} + USING hnsw (vector {}) + WITH (m = 16, ef_construction = 64) + "#, + index_name, name, operator + ) + } + } + } + + /// Generates SQL for upserting vectors. + pub fn upsert_sql(&self, name: &str) -> String { + format!( + r#" + INSERT INTO {} (id, vector, metadata) + VALUES ($1, $2, $3) + ON CONFLICT (id) DO UPDATE SET + vector = EXCLUDED.vector, + metadata = EXCLUDED.metadata + "#, + name + ) + } + + /// Generates SQL for searching vectors. + pub fn search_sql(&self, name: &str, include_vector: bool) -> String { + let operator = self.config.distance_metric.operator(); + let vector_column = if include_vector { ", vector" } else { "" }; + + let distance_expr = match self.config.distance_metric { + PgVectorDistanceMetric::L2 => format!("vector {} $1", operator), + PgVectorDistanceMetric::InnerProduct => { + // Inner product returns negative, so we negate for similarity + format!("-(vector {} $1)", operator) + } + PgVectorDistanceMetric::Cosine => { + // Cosine distance, convert to similarity + format!("1 - (vector {} $1)", operator) + } + }; + + format!( + r#" + SELECT id, {} as score{}, metadata + FROM {} + ORDER BY vector {} $1 + LIMIT $2 + "#, + distance_expr, vector_column, name, operator + ) + } + + /// Generates SQL for deleting vectors. + pub fn delete_sql(&self, name: &str) -> String { + format!("DELETE FROM {} WHERE id = ANY($1)", name) + } + + /// Generates SQL for getting vectors by ID. + pub fn get_sql(&self, name: &str) -> String { + format!( + "SELECT id, vector, metadata FROM {} WHERE id = ANY($1)", + name + ) + } +} + +#[async_trait] +impl VectorStoreBackend for PgVectorBackend { + async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { + // In a real implementation, execute: + // 1. CREATE EXTENSION IF NOT EXISTS vector; + // 2. self.create_table_sql(name, dimensions) + // 3. self.create_index_sql(name) + + tracing::info!( + target: TRACING_TARGET, + collection = %name, + dimensions = %dimensions, + index_type = ?self.config.index_type, + "Would create pgvector table: {}", + self.create_table_sql(name, dimensions) + ); + + Ok(()) + } + + async fn delete_collection(&self, name: &str) -> VectorResult<()> { + // In a real implementation, execute: + // DROP TABLE IF EXISTS {name} + + tracing::info!( + target: TRACING_TARGET, + collection = %name, + "Would drop pgvector table" + ); + + Ok(()) + } + + async fn collection_exists(&self, _name: &str) -> VectorResult { + // In a real implementation, query information_schema.tables + Ok(true) + } + + async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { + // In a real implementation, execute batched upserts + let sql = self.upsert_sql(collection); + + tracing::debug!( + target: TRACING_TARGET, + collection = %collection, + count = %vectors.len(), + "Would upsert with SQL: {}", + sql + ); + + Ok(()) + } + + async fn search( + &self, + collection: &str, + _query: Vec, + _limit: usize, + options: SearchOptions, + ) -> VectorResult> { + let sql = self.search_sql(collection, options.include_vectors); + + tracing::debug!( + target: TRACING_TARGET, + collection = %collection, + "Would search with SQL: {}", + sql + ); + + // In a real implementation, execute the query and parse results + Ok(vec![]) + } + + async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { + let sql = self.delete_sql(collection); + + tracing::debug!( + target: TRACING_TARGET, + collection = %collection, + count = %ids.len(), + "Would delete with SQL: {}", + sql + ); + + Ok(()) + } + + async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { + let sql = self.get_sql(collection); + + tracing::debug!( + target: TRACING_TARGET, + collection = %collection, + count = %ids.len(), + "Would get with SQL: {}", + sql + ); + + // In a real implementation, execute the query and parse results + Ok(vec![]) + } +} + +/// Helper to format a vector for PostgreSQL. +#[allow(dead_code)] +pub fn format_vector(v: &[f32]) -> String { + format!( + "[{}]", + v.iter() + .map(|f| f.to_string()) + .collect::>() + .join(",") + ) +} + +/// Helper to parse a vector from PostgreSQL. +#[allow(dead_code)] +pub fn parse_vector(s: &str) -> VectorResult> { + let trimmed = s.trim_start_matches('[').trim_end_matches(']'); + trimmed + .split(',') + .map(|s| { + s.trim() + .parse::() + .map_err(|e| VectorError::serialization(e.to_string())) + }) + .collect() +} + +/// Helper to convert metadata to JSONB. +#[allow(dead_code)] +pub fn metadata_to_jsonb(metadata: &HashMap) -> String { + serde_json::to_string(metadata).unwrap_or_else(|_| "{}".to_string()) +} diff --git a/crates/nvisy-vector/src/pgvector/config.rs b/crates/nvisy-vector/src/pgvector/config.rs new file mode 100644 index 0000000..05ecb09 --- /dev/null +++ b/crates/nvisy-vector/src/pgvector/config.rs @@ -0,0 +1,91 @@ +//! pgvector configuration. + +use serde::{Deserialize, Serialize}; + +/// PostgreSQL pgvector configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PgVectorConfig { + /// PostgreSQL connection URL. + pub connection_url: String, + /// Table name for vectors. + #[serde(default = "default_pgvector_table")] + pub table: String, + /// Vector dimensions. + pub dimensions: usize, + /// Distance metric. + #[serde(default)] + pub distance_metric: PgVectorDistanceMetric, + /// Index type for similarity search. + #[serde(default)] + pub index_type: PgVectorIndexType, +} + +impl PgVectorConfig { + /// Creates a new pgvector configuration. + pub fn new(connection_url: impl Into, dimensions: usize) -> Self { + Self { + connection_url: connection_url.into(), + table: default_pgvector_table(), + dimensions, + distance_metric: PgVectorDistanceMetric::default(), + index_type: PgVectorIndexType::default(), + } + } + + /// Sets the table name. + pub fn with_table(mut self, table: impl Into) -> Self { + self.table = table.into(); + self + } + + /// Sets the distance metric. + pub fn with_distance_metric(mut self, metric: PgVectorDistanceMetric) -> Self { + self.distance_metric = metric; + self + } + + /// Sets the index type. + pub fn with_index_type(mut self, index_type: PgVectorIndexType) -> Self { + self.index_type = index_type; + self + } +} + +fn default_pgvector_table() -> String { + "vectors".to_string() +} + +/// Distance metric for pgvector. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum PgVectorDistanceMetric { + /// L2 (Euclidean) distance. + #[default] + L2, + /// Inner product (dot product). + InnerProduct, + /// Cosine distance. + Cosine, +} + +impl PgVectorDistanceMetric { + /// Returns the pgvector operator for this metric. + pub fn operator(&self) -> &'static str { + match self { + Self::L2 => "<->", + Self::InnerProduct => "<#>", + Self::Cosine => "<=>", + } + } +} + +/// Index type for pgvector. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum PgVectorIndexType { + /// IVFFlat index (faster build, good recall). + #[default] + IvfFlat, + /// HNSW index (slower build, better recall). + Hnsw, +} diff --git a/crates/nvisy-vector/src/pgvector/mod.rs b/crates/nvisy-vector/src/pgvector/mod.rs new file mode 100644 index 0000000..0755b83 --- /dev/null +++ b/crates/nvisy-vector/src/pgvector/mod.rs @@ -0,0 +1,7 @@ +//! PostgreSQL pgvector backend. + +mod backend; +mod config; + +pub use backend::PgVectorBackend; +pub use config::{PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType}; diff --git a/crates/nvisy-vector/src/pinecone/backend.rs b/crates/nvisy-vector/src/pinecone/backend.rs new file mode 100644 index 0000000..f69cfb4 --- /dev/null +++ b/crates/nvisy-vector/src/pinecone/backend.rs @@ -0,0 +1,124 @@ +//! Pinecone backend implementation. +//! +//! This is a stub implementation. The Pinecone SDK API differs significantly +//! from the interface we designed. A full implementation would require +//! adapting to the actual pinecone-sdk API. + +use async_trait::async_trait; + +use super::PineconeConfig; +use crate::TRACING_TARGET; +use crate::error::{VectorError, VectorResult}; +use crate::store::{SearchOptions, SearchResult, VectorData, VectorStoreBackend}; + +/// Pinecone backend implementation. +pub struct PineconeBackend { + #[allow(dead_code)] + config: PineconeConfig, +} + +impl PineconeBackend { + /// Creates a new Pinecone backend. + pub async fn new(config: &PineconeConfig) -> VectorResult { + tracing::debug!( + target: TRACING_TARGET, + environment = %config.environment, + index = %config.index, + "Pinecone backend initialized (stub implementation)" + ); + + Ok(Self { + config: config.clone(), + }) + } +} + +#[async_trait] +impl VectorStoreBackend for PineconeBackend { + async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { + tracing::warn!( + target: TRACING_TARGET, + collection = %name, + dimensions = %dimensions, + "Pinecone create_collection is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Pinecone backend is not yet implemented", + )) + } + + async fn delete_collection(&self, name: &str) -> VectorResult<()> { + tracing::warn!( + target: TRACING_TARGET, + collection = %name, + "Pinecone delete_collection is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Pinecone backend is not yet implemented", + )) + } + + async fn collection_exists(&self, name: &str) -> VectorResult { + tracing::warn!( + target: TRACING_TARGET, + collection = %name, + "Pinecone collection_exists is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Pinecone backend is not yet implemented", + )) + } + + async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { + tracing::warn!( + target: TRACING_TARGET, + collection = %collection, + count = %vectors.len(), + "Pinecone upsert is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Pinecone backend is not yet implemented", + )) + } + + async fn search( + &self, + collection: &str, + _query: Vec, + _limit: usize, + _options: SearchOptions, + ) -> VectorResult> { + tracing::warn!( + target: TRACING_TARGET, + collection = %collection, + "Pinecone search is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Pinecone backend is not yet implemented", + )) + } + + async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { + tracing::warn!( + target: TRACING_TARGET, + collection = %collection, + count = %ids.len(), + "Pinecone delete is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Pinecone backend is not yet implemented", + )) + } + + async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { + tracing::warn!( + target: TRACING_TARGET, + collection = %collection, + count = %ids.len(), + "Pinecone get is a stub - not yet implemented" + ); + Err(VectorError::backend( + "Pinecone backend is not yet implemented", + )) + } +} diff --git a/crates/nvisy-vector/src/pinecone/config.rs b/crates/nvisy-vector/src/pinecone/config.rs new file mode 100644 index 0000000..c5e9cdd --- /dev/null +++ b/crates/nvisy-vector/src/pinecone/config.rs @@ -0,0 +1,49 @@ +//! Pinecone configuration. + +use serde::{Deserialize, Serialize}; + +/// Pinecone configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PineconeConfig { + /// Pinecone API key. + pub api_key: String, + /// Environment (e.g., "us-east-1-aws"). + pub environment: String, + /// Index name. + pub index: String, + /// Namespace (optional). + #[serde(skip_serializing_if = "Option::is_none")] + pub namespace: Option, + /// Vector dimensions. + #[serde(skip_serializing_if = "Option::is_none")] + pub dimensions: Option, +} + +impl PineconeConfig { + /// Creates a new Pinecone configuration. + pub fn new( + api_key: impl Into, + environment: impl Into, + index: impl Into, + ) -> Self { + Self { + api_key: api_key.into(), + environment: environment.into(), + index: index.into(), + namespace: None, + dimensions: None, + } + } + + /// Sets the namespace. + pub fn with_namespace(mut self, namespace: impl Into) -> Self { + self.namespace = Some(namespace.into()); + self + } + + /// Sets the vector dimensions. + pub fn with_dimensions(mut self, dimensions: usize) -> Self { + self.dimensions = Some(dimensions); + self + } +} diff --git a/crates/nvisy-vector/src/pinecone/mod.rs b/crates/nvisy-vector/src/pinecone/mod.rs new file mode 100644 index 0000000..7e618ef --- /dev/null +++ b/crates/nvisy-vector/src/pinecone/mod.rs @@ -0,0 +1,7 @@ +//! Pinecone vector store backend. + +mod backend; +mod config; + +pub use backend::PineconeBackend; +pub use config::PineconeConfig; diff --git a/crates/nvisy-vector/src/qdrant/backend.rs b/crates/nvisy-vector/src/qdrant/backend.rs new file mode 100644 index 0000000..cb92445 --- /dev/null +++ b/crates/nvisy-vector/src/qdrant/backend.rs @@ -0,0 +1,331 @@ +//! Qdrant backend implementation. + +use std::collections::HashMap; + +use async_trait::async_trait; +use qdrant_client::Qdrant; +use qdrant_client::qdrant::vectors_config::Config as VectorsConfig; +use qdrant_client::qdrant::with_payload_selector::SelectorOptions; +use qdrant_client::qdrant::with_vectors_selector::SelectorOptions as VectorsSelectorOptions; +use qdrant_client::qdrant::{ + Condition, CreateCollectionBuilder, DeletePointsBuilder, Distance, Filter, GetPointsBuilder, + PointId, PointStruct, SearchPointsBuilder, UpsertPointsBuilder, VectorParamsBuilder, +}; + +use super::QdrantConfig; +use crate::TRACING_TARGET; +use crate::error::{VectorError, VectorResult}; +use crate::store::{SearchOptions, SearchResult, VectorData, VectorStoreBackend}; + +/// Qdrant backend implementation. +pub struct QdrantBackend { + client: Qdrant, + #[allow(dead_code)] + config: QdrantConfig, +} + +impl QdrantBackend { + /// Creates a new Qdrant backend. + pub async fn new(config: &QdrantConfig) -> VectorResult { + let client = Qdrant::from_url(&config.url) + .api_key(config.api_key.clone()) + .build() + .map_err(|e| VectorError::connection(e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET, + url = %config.url, + "Connected to Qdrant" + ); + + Ok(Self { + client, + config: config.clone(), + }) + } + + /// Extracts vector data from Qdrant's VectorsOutput. + fn extract_vector(vectors: Option) -> Option> { + use qdrant_client::qdrant::vectors_output::VectorsOptions; + + vectors.and_then(|v| match v.vectors_options { + #[allow(deprecated)] + Some(VectorsOptions::Vector(vec)) => Some(vec.data), + _ => None, + }) + } + + /// Extracts point ID as a string. + fn extract_point_id(id: Option) -> Option { + use qdrant_client::qdrant::point_id::PointIdOptions; + + match id { + Some(PointId { + point_id_options: Some(id), + }) => match id { + PointIdOptions::Num(n) => Some(n.to_string()), + PointIdOptions::Uuid(s) => Some(s), + }, + _ => None, + } + } +} + +#[async_trait] +impl VectorStoreBackend for QdrantBackend { + async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { + let vectors_config = VectorsConfig::Params( + VectorParamsBuilder::new(dimensions as u64, Distance::Cosine).build(), + ); + + self.client + .create_collection(CreateCollectionBuilder::new(name).vectors_config(vectors_config)) + .await + .map_err(|e| VectorError::backend(e.to_string()))?; + + tracing::info!( + target: TRACING_TARGET, + collection = %name, + dimensions = %dimensions, + "Created Qdrant collection" + ); + + Ok(()) + } + + async fn delete_collection(&self, name: &str) -> VectorResult<()> { + self.client + .delete_collection(name) + .await + .map_err(|e| VectorError::backend(e.to_string()))?; + + tracing::info!( + target: TRACING_TARGET, + collection = %name, + "Deleted Qdrant collection" + ); + + Ok(()) + } + + async fn collection_exists(&self, name: &str) -> VectorResult { + let exists = self + .client + .collection_exists(name) + .await + .map_err(|e| VectorError::backend(e.to_string()))?; + + Ok(exists) + } + + async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { + let points: Vec = vectors + .into_iter() + .map(|v| { + let payload: HashMap = v + .metadata + .into_iter() + .map(|(k, v)| (k, json_to_qdrant_value(v))) + .collect(); + + PointStruct::new(v.id, v.vector, payload) + }) + .collect(); + + self.client + .upsert_points(UpsertPointsBuilder::new(collection, points)) + .await + .map_err(|e| VectorError::backend(e.to_string()))?; + + Ok(()) + } + + async fn search( + &self, + collection: &str, + query: Vec, + limit: usize, + options: SearchOptions, + ) -> VectorResult> { + let mut search = SearchPointsBuilder::new(collection, query, limit as u64); + + if options.include_vectors { + search = search.with_vectors(VectorsSelectorOptions::Enable(true)); + } + + if options.include_metadata { + search = search.with_payload(SelectorOptions::Enable(true)); + } + + if let Some(filter_json) = options.filter + && let Some(conditions) = parse_filter(&filter_json) + { + search = search.filter(Filter::must(conditions)); + } + + let response = self + .client + .search_points(search) + .await + .map_err(|e| VectorError::backend(e.to_string()))?; + + let results = response + .result + .into_iter() + .map(|point| { + let id = Self::extract_point_id(point.id).unwrap_or_default(); + let vector = Self::extract_vector(point.vectors); + + let metadata: HashMap = point + .payload + .into_iter() + .map(|(k, v)| (k, qdrant_value_to_json(v))) + .collect(); + + SearchResult { + id, + score: point.score, + vector, + metadata, + } + }) + .collect(); + + Ok(results) + } + + async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { + let point_ids: Vec = ids.into_iter().map(PointId::from).collect(); + + self.client + .delete_points(DeletePointsBuilder::new(collection).points(point_ids)) + .await + .map_err(|e| VectorError::backend(e.to_string()))?; + + Ok(()) + } + + async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { + let point_ids: Vec = ids.into_iter().map(PointId::from).collect(); + + let response = self + .client + .get_points( + GetPointsBuilder::new(collection, point_ids) + .with_vectors(VectorsSelectorOptions::Enable(true)) + .with_payload(SelectorOptions::Enable(true)), + ) + .await + .map_err(|e| VectorError::backend(e.to_string()))?; + + let results = response + .result + .into_iter() + .filter_map(|point| { + let id = Self::extract_point_id(point.id)?; + let vector = Self::extract_vector(point.vectors)?; + + let metadata: HashMap = point + .payload + .into_iter() + .map(|(k, v)| (k, qdrant_value_to_json(v))) + .collect(); + + Some(VectorData { + id, + vector, + metadata, + }) + }) + .collect(); + + Ok(results) + } +} + +/// Converts JSON value to Qdrant value. +fn json_to_qdrant_value(value: serde_json::Value) -> qdrant_client::qdrant::Value { + use qdrant_client::qdrant::value::Kind; + + let kind = match value { + serde_json::Value::Null => Kind::NullValue(0), + serde_json::Value::Bool(b) => Kind::BoolValue(b), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Kind::IntegerValue(i) + } else if let Some(f) = n.as_f64() { + Kind::DoubleValue(f) + } else { + Kind::StringValue(n.to_string()) + } + } + serde_json::Value::String(s) => Kind::StringValue(s), + serde_json::Value::Array(arr) => { + let values: Vec = + arr.into_iter().map(json_to_qdrant_value).collect(); + Kind::ListValue(qdrant_client::qdrant::ListValue { values }) + } + serde_json::Value::Object(obj) => { + let fields: HashMap = obj + .into_iter() + .map(|(k, v)| (k, json_to_qdrant_value(v))) + .collect(); + Kind::StructValue(qdrant_client::qdrant::Struct { fields }) + } + }; + + qdrant_client::qdrant::Value { kind: Some(kind) } +} + +/// Converts Qdrant value to JSON value. +fn qdrant_value_to_json(value: qdrant_client::qdrant::Value) -> serde_json::Value { + use qdrant_client::qdrant::value::Kind; + + match value.kind { + Some(Kind::NullValue(_)) => serde_json::Value::Null, + Some(Kind::BoolValue(b)) => serde_json::Value::Bool(b), + Some(Kind::IntegerValue(i)) => serde_json::json!(i), + Some(Kind::DoubleValue(f)) => serde_json::json!(f), + Some(Kind::StringValue(s)) => serde_json::Value::String(s), + Some(Kind::ListValue(list)) => { + let arr: Vec = + list.values.into_iter().map(qdrant_value_to_json).collect(); + serde_json::Value::Array(arr) + } + Some(Kind::StructValue(obj)) => { + let map: serde_json::Map = obj + .fields + .into_iter() + .map(|(k, v)| (k, qdrant_value_to_json(v))) + .collect(); + serde_json::Value::Object(map) + } + None => serde_json::Value::Null, + } +} + +/// Parses a JSON filter into Qdrant conditions. +fn parse_filter(filter: &serde_json::Value) -> Option> { + // Simple filter parsing - can be extended for more complex queries + if let serde_json::Value::Object(obj) = filter { + let conditions: Vec = obj + .iter() + .filter_map(|(key, value)| match value { + serde_json::Value::String(s) => Some(Condition::matches(key.clone(), s.clone())), + serde_json::Value::Number(n) => { + n.as_i64().map(|i| Condition::matches(key.clone(), i)) + } + serde_json::Value::Bool(b) => Some(Condition::matches(key.clone(), *b)), + _ => None, + }) + .collect(); + + if conditions.is_empty() { + None + } else { + Some(conditions) + } + } else { + None + } +} diff --git a/crates/nvisy-vector/src/qdrant/config.rs b/crates/nvisy-vector/src/qdrant/config.rs new file mode 100644 index 0000000..05143ec --- /dev/null +++ b/crates/nvisy-vector/src/qdrant/config.rs @@ -0,0 +1,59 @@ +//! Qdrant configuration. + +use serde::{Deserialize, Serialize}; + +/// Qdrant configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct QdrantConfig { + /// Qdrant server URL (e.g., "http://localhost:6334"). + pub url: String, + /// API key for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub api_key: Option, + /// Default collection name. + #[serde(skip_serializing_if = "Option::is_none")] + pub collection: Option, + /// Vector dimensions. + #[serde(skip_serializing_if = "Option::is_none")] + pub dimensions: Option, + /// Use gRPC instead of REST. + #[serde(default)] + pub use_grpc: bool, +} + +impl QdrantConfig { + /// Creates a new Qdrant configuration. + pub fn new(url: impl Into) -> Self { + Self { + url: url.into(), + api_key: None, + collection: None, + dimensions: None, + use_grpc: false, + } + } + + /// Sets the API key. + pub fn with_api_key(mut self, api_key: impl Into) -> Self { + self.api_key = Some(api_key.into()); + self + } + + /// Sets the default collection. + pub fn with_collection(mut self, collection: impl Into) -> Self { + self.collection = Some(collection.into()); + self + } + + /// Sets the vector dimensions. + pub fn with_dimensions(mut self, dimensions: usize) -> Self { + self.dimensions = Some(dimensions); + self + } + + /// Enables gRPC mode. + pub fn with_grpc(mut self) -> Self { + self.use_grpc = true; + self + } +} diff --git a/crates/nvisy-vector/src/qdrant/mod.rs b/crates/nvisy-vector/src/qdrant/mod.rs new file mode 100644 index 0000000..929807c --- /dev/null +++ b/crates/nvisy-vector/src/qdrant/mod.rs @@ -0,0 +1,7 @@ +//! Qdrant vector store backend. + +mod backend; +mod config; + +pub use backend::QdrantBackend; +pub use config::QdrantConfig; diff --git a/crates/nvisy-vector/src/store.rs b/crates/nvisy-vector/src/store.rs new file mode 100644 index 0000000..84772b9 --- /dev/null +++ b/crates/nvisy-vector/src/store.rs @@ -0,0 +1,265 @@ +//! Vector store trait and implementations. + +use std::collections::HashMap; + +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +use crate::TRACING_TARGET; +use crate::config::VectorStoreConfig; +use crate::error::VectorResult; +use crate::milvus::MilvusBackend; +use crate::pgvector::PgVectorBackend; +use crate::pinecone::PineconeBackend; +use crate::qdrant::QdrantBackend; + +/// Vector data to be stored. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VectorData { + /// Unique identifier for the vector. + pub id: String, + /// The embedding vector. + pub vector: Vec, + /// Optional metadata. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub metadata: HashMap, +} + +impl VectorData { + /// Creates a new vector data with an ID and embedding. + pub fn new(id: impl Into, vector: Vec) -> Self { + Self { + id: id.into(), + vector, + metadata: HashMap::new(), + } + } + + /// Adds metadata to the vector. + pub fn with_metadata( + mut self, + metadata: impl IntoIterator, serde_json::Value)>, + ) -> Self { + self.metadata = metadata.into_iter().map(|(k, v)| (k.into(), v)).collect(); + self + } + + /// Adds a single metadata field. + pub fn with_field(mut self, key: impl Into, value: serde_json::Value) -> Self { + self.metadata.insert(key.into(), value); + self + } +} + +/// Search result from a vector query. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResult { + /// Vector ID. + pub id: String, + /// Similarity score. + pub score: f32, + /// The vector (if requested). + #[serde(skip_serializing_if = "Option::is_none")] + pub vector: Option>, + /// Associated metadata. + #[serde(default, skip_serializing_if = "HashMap::is_empty")] + pub metadata: HashMap, +} + +/// Search options. +#[derive(Debug, Clone, Default)] +pub struct SearchOptions { + /// Include vectors in results. + pub include_vectors: bool, + /// Include metadata in results. + pub include_metadata: bool, + /// Metadata filter (backend-specific JSON). + pub filter: Option, + /// Namespace/partition (for backends that support it). + pub namespace: Option, +} + +impl SearchOptions { + /// Creates default search options. + pub fn new() -> Self { + Self::default() + } + + /// Include vectors in results. + pub fn with_vectors(mut self) -> Self { + self.include_vectors = true; + self + } + + /// Include metadata in results. + pub fn with_metadata(mut self) -> Self { + self.include_metadata = true; + self + } + + /// Set a metadata filter. + pub fn with_filter(mut self, filter: serde_json::Value) -> Self { + self.filter = Some(filter); + self + } + + /// Set the namespace. + pub fn with_namespace(mut self, namespace: impl Into) -> Self { + self.namespace = Some(namespace.into()); + self + } +} + +/// Trait for vector store backends. +#[async_trait] +pub trait VectorStoreBackend: Send + Sync { + /// Creates or ensures a collection exists. + async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()>; + + /// Deletes a collection. + async fn delete_collection(&self, name: &str) -> VectorResult<()>; + + /// Checks if a collection exists. + async fn collection_exists(&self, name: &str) -> VectorResult; + + /// Upserts vectors into a collection. + async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()>; + + /// Searches for similar vectors. + async fn search( + &self, + collection: &str, + query: Vec, + limit: usize, + options: SearchOptions, + ) -> VectorResult>; + + /// Deletes vectors by their IDs. + async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()>; + + /// Gets vectors by their IDs. + async fn get(&self, collection: &str, ids: Vec) -> VectorResult>; +} + +/// Unified vector store that wraps backend implementations. +pub struct VectorStore { + #[allow(dead_code)] + config: VectorStoreConfig, + #[allow(dead_code)] + backend: Box, +} + +impl VectorStore { + /// Creates a new vector store from configuration. + pub async fn new(config: VectorStoreConfig) -> VectorResult { + let backend: Box = match &config { + VectorStoreConfig::Qdrant(cfg) => Box::new(QdrantBackend::new(cfg).await?), + VectorStoreConfig::Milvus(cfg) => Box::new(MilvusBackend::new(cfg).await?), + VectorStoreConfig::Pinecone(cfg) => Box::new(PineconeBackend::new(cfg).await?), + VectorStoreConfig::PgVector(cfg) => Box::new(PgVectorBackend::new(cfg).await?), + }; + + tracing::info!( + target: TRACING_TARGET, + backend = %config.backend_name(), + "Vector store initialized" + ); + + Ok(Self { config, backend }) + } + + /// Creates or ensures a collection exists. + pub async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { + tracing::debug!( + target: TRACING_TARGET, + collection = %name, + dimensions = %dimensions, + "Creating collection" + ); + self.backend.create_collection(name, dimensions).await + } + + /// Deletes a collection. + pub async fn delete_collection(&self, name: &str) -> VectorResult<()> { + tracing::debug!( + target: TRACING_TARGET, + collection = %name, + "Deleting collection" + ); + self.backend.delete_collection(name).await + } + + /// Checks if a collection exists. + pub async fn collection_exists(&self, name: &str) -> VectorResult { + self.backend.collection_exists(name).await + } + + /// Upserts vectors into a collection. + pub async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { + tracing::debug!( + target: TRACING_TARGET, + collection = %collection, + count = %vectors.len(), + "Upserting vectors" + ); + self.backend.upsert(collection, vectors).await + } + + /// Searches for similar vectors. + pub async fn search( + &self, + collection: &str, + query: Vec, + limit: usize, + ) -> VectorResult> { + self.search_with_options(collection, query, limit, SearchOptions::default()) + .await + } + + /// Searches for similar vectors with options. + pub async fn search_with_options( + &self, + collection: &str, + query: Vec, + limit: usize, + options: SearchOptions, + ) -> VectorResult> { + tracing::debug!( + target: TRACING_TARGET, + collection = %collection, + limit = %limit, + "Searching vectors" + ); + self.backend.search(collection, query, limit, options).await + } + + /// Deletes vectors by their IDs. + pub async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { + tracing::debug!( + target: TRACING_TARGET, + collection = %collection, + count = %ids.len(), + "Deleting vectors" + ); + self.backend.delete(collection, ids).await + } + + /// Gets vectors by their IDs. + pub async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { + tracing::debug!( + target: TRACING_TARGET, + collection = %collection, + count = %ids.len(), + "Getting vectors" + ); + self.backend.get(collection, ids).await + } +} + +impl std::fmt::Debug for VectorStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("VectorStore") + .field("backend", &self.config.backend_name()) + .finish() + } +} From 1f12f0b6744c5b05169e29dd23f309f56f0ad209 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 19 Jan 2026 11:29:15 +0100 Subject: [PATCH 06/28] chore: move opendal, runtime, vector to integrations/, remove chrono --- Cargo.lock | 360 +++++++++--------- Cargo.toml | 14 +- .../nvisy-opendal/Cargo.toml | 0 .../nvisy-opendal/README.md | 0 .../nvisy-opendal/src/azblob/config.rs | 0 .../nvisy-opendal/src/azblob/mod.rs | 0 .../nvisy-opendal/src/backend.rs | 0 .../nvisy-opendal/src/config.rs | 0 .../nvisy-opendal/src/dropbox/config.rs | 0 .../nvisy-opendal/src/dropbox/mod.rs | 0 .../nvisy-opendal/src/error.rs | 0 .../nvisy-opendal/src/gcs/config.rs | 0 .../nvisy-opendal/src/gcs/mod.rs | 0 .../nvisy-opendal/src/gdrive/config.rs | 0 .../nvisy-opendal/src/gdrive/mod.rs | 0 .../nvisy-opendal/src/lib.rs | 0 .../nvisy-opendal/src/onedrive/config.rs | 0 .../nvisy-opendal/src/onedrive/mod.rs | 0 .../nvisy-opendal/src/s3/config.rs | 0 .../nvisy-opendal/src/s3/mod.rs | 0 .../nvisy-runtime/Cargo.toml | 0 .../nvisy-runtime/README.md | 0 .../nvisy-runtime/src/engine/config.rs | 0 .../nvisy-runtime/src/engine/executor.rs | 0 .../nvisy-runtime/src/engine/mod.rs | 0 .../nvisy-runtime/src/error.rs | 0 .../nvisy-runtime/src/graph/edge.rs | 0 .../nvisy-runtime/src/graph/mod.rs | 0 .../nvisy-runtime/src/graph/workflow.rs | 0 .../nvisy-runtime/src/lib.rs | 0 .../nvisy-runtime/src/node/data.rs | 0 .../nvisy-runtime/src/node/id.rs | 0 .../nvisy-runtime/src/node/input/config.rs | 0 .../nvisy-runtime/src/node/input/mod.rs | 0 .../nvisy-runtime/src/node/mod.rs | 0 .../nvisy-runtime/src/node/output/config.rs | 0 .../nvisy-runtime/src/node/output/mod.rs | 0 .../src/node/transformer/chunking.rs | 0 .../src/node/transformer/config.rs | 0 .../src/node/transformer/document.rs | 0 .../src/node/transformer/embedding.rs | 0 .../src/node/transformer/extraction.rs | 0 .../nvisy-runtime/src/node/transformer/mod.rs | 0 .../src/node/transformer/processing.rs | 0 .../src/node/transformer/quality.rs | 0 .../src/node/transformer/routing.rs | 0 .../nvisy-runtime/src/runtime/config.rs | 0 .../nvisy-runtime/src/runtime/mod.rs | 0 .../nvisy-runtime/src/runtime/service.rs | 0 .../nvisy-vector/Cargo.toml | 0 .../nvisy-vector/README.md | 0 .../nvisy-vector/src/config.rs | 0 .../nvisy-vector/src/error.rs | 0 .../nvisy-vector/src/lib.rs | 0 .../nvisy-vector/src/milvus/backend.rs | 0 .../nvisy-vector/src/milvus/config.rs | 0 .../nvisy-vector/src/milvus/mod.rs | 0 .../nvisy-vector/src/pgvector/backend.rs | 0 .../nvisy-vector/src/pgvector/config.rs | 0 .../nvisy-vector/src/pgvector/mod.rs | 0 .../nvisy-vector/src/pinecone/backend.rs | 0 .../nvisy-vector/src/pinecone/config.rs | 0 .../nvisy-vector/src/pinecone/mod.rs | 0 .../nvisy-vector/src/qdrant/backend.rs | 0 .../nvisy-vector/src/qdrant/config.rs | 0 .../nvisy-vector/src/qdrant/mod.rs | 0 .../nvisy-vector/src/store.rs | 0 67 files changed, 192 insertions(+), 182 deletions(-) rename {crates => integrations}/nvisy-opendal/Cargo.toml (100%) rename {crates => integrations}/nvisy-opendal/README.md (100%) rename {crates => integrations}/nvisy-opendal/src/azblob/config.rs (100%) rename {crates => integrations}/nvisy-opendal/src/azblob/mod.rs (100%) rename {crates => integrations}/nvisy-opendal/src/backend.rs (100%) rename {crates => integrations}/nvisy-opendal/src/config.rs (100%) rename {crates => integrations}/nvisy-opendal/src/dropbox/config.rs (100%) rename {crates => integrations}/nvisy-opendal/src/dropbox/mod.rs (100%) rename {crates => integrations}/nvisy-opendal/src/error.rs (100%) rename {crates => integrations}/nvisy-opendal/src/gcs/config.rs (100%) rename {crates => integrations}/nvisy-opendal/src/gcs/mod.rs (100%) rename {crates => integrations}/nvisy-opendal/src/gdrive/config.rs (100%) rename {crates => integrations}/nvisy-opendal/src/gdrive/mod.rs (100%) rename {crates => integrations}/nvisy-opendal/src/lib.rs (100%) rename {crates => integrations}/nvisy-opendal/src/onedrive/config.rs (100%) rename {crates => integrations}/nvisy-opendal/src/onedrive/mod.rs (100%) rename {crates => integrations}/nvisy-opendal/src/s3/config.rs (100%) rename {crates => integrations}/nvisy-opendal/src/s3/mod.rs (100%) rename {crates => integrations}/nvisy-runtime/Cargo.toml (100%) rename {crates => integrations}/nvisy-runtime/README.md (100%) rename {crates => integrations}/nvisy-runtime/src/engine/config.rs (100%) rename {crates => integrations}/nvisy-runtime/src/engine/executor.rs (100%) rename {crates => integrations}/nvisy-runtime/src/engine/mod.rs (100%) rename {crates => integrations}/nvisy-runtime/src/error.rs (100%) rename {crates => integrations}/nvisy-runtime/src/graph/edge.rs (100%) rename {crates => integrations}/nvisy-runtime/src/graph/mod.rs (100%) rename {crates => integrations}/nvisy-runtime/src/graph/workflow.rs (100%) rename {crates => integrations}/nvisy-runtime/src/lib.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/data.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/id.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/input/config.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/input/mod.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/mod.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/output/config.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/output/mod.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/chunking.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/config.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/document.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/embedding.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/extraction.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/mod.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/processing.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/quality.rs (100%) rename {crates => integrations}/nvisy-runtime/src/node/transformer/routing.rs (100%) rename {crates => integrations}/nvisy-runtime/src/runtime/config.rs (100%) rename {crates => integrations}/nvisy-runtime/src/runtime/mod.rs (100%) rename {crates => integrations}/nvisy-runtime/src/runtime/service.rs (100%) rename {crates => integrations}/nvisy-vector/Cargo.toml (100%) rename {crates => integrations}/nvisy-vector/README.md (100%) rename {crates => integrations}/nvisy-vector/src/config.rs (100%) rename {crates => integrations}/nvisy-vector/src/error.rs (100%) rename {crates => integrations}/nvisy-vector/src/lib.rs (100%) rename {crates => integrations}/nvisy-vector/src/milvus/backend.rs (100%) rename {crates => integrations}/nvisy-vector/src/milvus/config.rs (100%) rename {crates => integrations}/nvisy-vector/src/milvus/mod.rs (100%) rename {crates => integrations}/nvisy-vector/src/pgvector/backend.rs (100%) rename {crates => integrations}/nvisy-vector/src/pgvector/config.rs (100%) rename {crates => integrations}/nvisy-vector/src/pgvector/mod.rs (100%) rename {crates => integrations}/nvisy-vector/src/pinecone/backend.rs (100%) rename {crates => integrations}/nvisy-vector/src/pinecone/config.rs (100%) rename {crates => integrations}/nvisy-vector/src/pinecone/mod.rs (100%) rename {crates => integrations}/nvisy-vector/src/qdrant/backend.rs (100%) rename {crates => integrations}/nvisy-vector/src/qdrant/config.rs (100%) rename {crates => integrations}/nvisy-vector/src/qdrant/mod.rs (100%) rename {crates => integrations}/nvisy-vector/src/store.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 721b514..1e0a237 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,7 +62,7 @@ dependencies = [ "bytes", "cfg-if", "http", - "indexmap 2.12.1", + "indexmap 2.13.0", "schemars 0.9.0", "serde", "serde_json", @@ -197,13 +197,12 @@ checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" [[package]] name = "async-compression" -version = "0.4.36" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98ec5f6c2f8bc326c994cb9e241cc257ddaba9afa8555a43cffbb5dd86efaa37" +checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" dependencies = [ "compression-codecs", "compression-core", - "futures-core", "pin-project-lite", "tokio", ] @@ -315,9 +314,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-lc-rs" -version = "1.15.2" +version = "1.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88aab2464f1f25453baa7a07c84c5b7684e274054ba06817f382357f77a288" +checksum = "e84ce723ab67259cfeb9877c6a639ee9eb7a27b28123abd71db7f0d5d0cc9d86" dependencies = [ "aws-lc-sys", "untrusted 0.7.1", @@ -326,9 +325,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45afffdee1e7c9126814751f88dddc747f41d91da16c9551a0f1e8a11e788a1" +checksum = "43a442ece363113bd4bd4c8b18977a7798dd4d3c3383f34fb61936960e8f4ad8" dependencies = [ "cc", "cmake", @@ -358,7 +357,7 @@ dependencies = [ "rustversion", "serde", "sync_wrapper", - "tower 0.5.2", + "tower 0.5.3", "tower-layer", "tower-service", ] @@ -369,7 +368,7 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ - "axum-core 0.5.5", + "axum-core 0.5.6", "axum-macros", "bytes", "form_urlencoded", @@ -392,7 +391,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tower 0.5.2", + "tower 0.5.3", "tower-layer", "tower-service", "tracing", @@ -431,9 +430,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", @@ -455,7 +454,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9963ff19f40c6102c76756ef0a46004c0d58957d87259fc9208ff8441c12ab96" dependencies = [ "axum 0.8.8", - "axum-core 0.5.5", + "axum-core 0.5.6", "bytes", "futures-util", "http", @@ -477,7 +476,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fef252edff26ddba56bbcdf2ee3307b8129acb86f5749b68990c168a6fcc9c76" dependencies = [ "axum 0.8.8", - "axum-core 0.5.5", + "axum-core 0.5.6", "bytes", "form_urlencoded", "futures-core", @@ -554,7 +553,7 @@ dependencies = [ "serde_urlencoded", "smallvec", "tokio", - "tower 0.5.2", + "tower 0.5.3", "url", ] @@ -592,9 +591,9 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "base64ct" -version = "1.8.1" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e050f626429857a27ddccb31e0aca21356bfa709c04041aefddac081a8f068a" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" [[package]] name = "bigdecimal" @@ -637,7 +636,7 @@ version = "0.11.0-rc.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "679065eb2b85a078ace42411e657bef3a6afe93a40d1b9cb04e39ca303cc3f36" dependencies = [ - "digest 0.11.0-rc.4", + "digest 0.11.0-rc.5", ] [[package]] @@ -735,9 +734,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.50" +version = "1.2.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d563227a1c37cc0a263f64eca3334388c01c5e4c4861a9def205c614383c" +checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932" dependencies = [ "find-msvc-tools", "jobserver", @@ -770,9 +769,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -787,7 +786,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" dependencies = [ - "crypto-common 0.1.6", + "crypto-common 0.1.7", "inout", ] @@ -827,9 +826,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "client-ip" @@ -857,9 +856,9 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "compression-codecs" -version = "0.4.35" +version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f7ac3e5b97fdce45e8922fb05cae2c37f7bbd63d30dd94821dacfd8f3f2bf2" +checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" dependencies = [ "brotli", "compression-core", @@ -905,7 +904,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] @@ -1044,9 +1043,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -1054,9 +1053,9 @@ dependencies = [ [[package]] name = "crypto-common" -version = "0.2.0-rc.5" +version = "0.2.0-rc.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919bd05924682a5480aec713596b9e2aabed3a0a6022fab6847f85a99e5f190a" +checksum = "41b8986f836d4aeb30ccf4c9d3bd562fd716074cfd7fc4a2948359fbd21ed809" dependencies = [ "hybrid-array", ] @@ -1180,9 +1179,9 @@ dependencies = [ [[package]] name = "data-encoding" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" [[package]] name = "deadpool" @@ -1393,18 +1392,18 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.4", "const-oid", - "crypto-common 0.1.6", + "crypto-common 0.1.7", "subtle", ] [[package]] name = "digest" -version = "0.11.0-rc.4" +version = "0.11.0-rc.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea390c940e465846d64775e55e3115d5dc934acb953de6f6e6360bc232fe2bf7" +checksum = "ebf9423bafb058e4142194330c52273c343f8a5beb7176d052f0e73b17dd35b9" dependencies = [ "block-buffer 0.11.0", - "crypto-common 0.2.0-rc.5", + "crypto-common 0.2.0-rc.9", "subtle", ] @@ -1631,21 +1630,20 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "filetime" -version = "0.2.26" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" dependencies = [ "cfg-if", "libc", "libredox", - "windows-sys 0.60.2", ] [[package]] name = "find-msvc-tools" -version = "0.1.5" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" [[package]] name = "fixedbitset" @@ -1687,9 +1685,9 @@ dependencies = [ [[package]] name = "fs-err" -version = "3.2.1" +version = "3.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824f08d01d0f496b3eca4f001a13cf17690a6ee930043d20817f547455fd98f8" +checksum = "baf68cef89750956493a66a10f512b9e58d9db21f2a573c079c0bdf1207a54a7" dependencies = [ "autocfg", "tokio", @@ -1823,14 +1821,14 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -1887,9 +1885,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -1897,7 +1895,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.12.1", + "indexmap 2.13.0", "slab", "tokio", "tokio-util", @@ -2114,7 +2112,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots 1.0.4", + "webpki-roots 1.0.5", ] [[package]] @@ -2224,9 +2222,9 @@ dependencies = [ [[package]] name = "icu_locale_data" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f03e2fcaefecdf05619f3d6f91740e79ab969b4dd54f77cbf546b1d0d28e3147" +checksum = "1c5f1d16b4c3a2642d3a719f18f6b06070ab0aef246a6418130c955ae08aa831" [[package]] name = "icu_normalizer" @@ -2346,9 +2344,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -2386,9 +2384,9 @@ dependencies = [ [[package]] name = "iri-string" -version = "0.7.9" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +checksum = "c91338f0783edbd6195decb37bae672fd3b165faffb89bf7b9e6942f8b1a731a" dependencies = [ "memchr", "serde", @@ -2420,9 +2418,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ee5b5339afb4c41626dde77b7a611bd4f2c202b897852b4bcf5d03eddc61010" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jiff" @@ -2487,9 +2485,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -2518,7 +2516,7 @@ checksum = "c76e1c7d7df3e34443b3621b459b066a7b79644f059fc8b2db7070c825fd417e" dependencies = [ "aws-lc-rs", "base64", - "getrandom 0.2.16", + "getrandom 0.2.17", "js-sys", "pem", "serde", @@ -2544,9 +2542,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.178" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libm" @@ -2556,13 +2554,13 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libredox" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df15f6eac291ed1cf25865b1ee60399f57e7c227e7f51bdbd4c5270396a9ed50" +checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ "bitflags", "libc", - "redox_syscall 0.6.0", + "redox_syscall 0.7.0", ] [[package]] @@ -2738,29 +2736,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.61.2", ] [[package]] name = "moka" -version = "0.12.10" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926" +checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" dependencies = [ "async-lock", "crossbeam-channel", "crossbeam-epoch", "crossbeam-utils", + "equivalent", "event-listener", "futures-util", - "loom", "parking_lot", "portable-atomic", - "rustc_version", "smallvec", "tagptr", - "thiserror 1.0.69", "uuid", ] @@ -2790,7 +2786,7 @@ dependencies = [ "data-encoding", "ed25519", "ed25519-dalek", - "getrandom 0.2.16", + "getrandom 0.2.17", "log", "rand 0.8.5", "signatory", @@ -2998,7 +2994,7 @@ dependencies = [ "opendal", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", ] @@ -3088,7 +3084,7 @@ dependencies = [ "serde", "sha2", "strum", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "uuid", ] @@ -3107,7 +3103,7 @@ dependencies = [ "serde", "serde_json", "strum", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "uuid", ] @@ -3120,7 +3116,7 @@ dependencies = [ "async-trait", "bytes", "nvisy-rt-document", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -3149,7 +3145,7 @@ dependencies = [ "async-trait", "bytes", "nvisy-rt-document", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -3160,7 +3156,7 @@ dependencies = [ "async-trait", "bytes", "nvisy-rt-document", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -3174,7 +3170,7 @@ dependencies = [ "markdown", "nvisy-rt-document", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -3192,7 +3188,7 @@ dependencies = [ "semver", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "uuid", @@ -3235,7 +3231,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tower 0.5.2", + "tower 0.5.3", "tower-http", "tracing", "tracing-subscriber", @@ -3258,7 +3254,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", ] @@ -3320,7 +3316,7 @@ dependencies = [ "chrono", "crc32c", "futures", - "getrandom 0.2.16", + "getrandom 0.2.17", "http", "http-body", "log", @@ -3399,11 +3395,11 @@ dependencies = [ [[package]] name = "password-hash" -version = "0.6.0-rc.6" +version = "0.6.0-rc.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "383d290055c99f2dd7dece082088d89494dff6d79277fbac4a7da21c1bf2ab6b" +checksum = "f77af9403a6489b7b51f552693bd48d8e81a710c92d3d77648b203558578762d" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.0-rc.0", "phc", "rand_core 0.10.0-rc-3", ] @@ -3451,7 +3447,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.12.1", + "indexmap 2.13.0", "serde", "serde_derive", ] @@ -3476,12 +3472,12 @@ dependencies = [ [[package]] name = "phc" -version = "0.6.0-rc.0" +version = "0.6.0-rc.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f960577aaac5c259bc0866d685ba315c0ed30793c602d7287f54980913863" +checksum = "71d390c5fe8d102c2c18ff39f1e72b9ad5996de282c2d831b0312f56910f5508" dependencies = [ "base64ct", - "getrandom 0.3.4", + "getrandom 0.4.0-rc.0", "rand_core 0.10.0-rc-3", "subtle", ] @@ -3583,9 +3579,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f59e70c4aef1e55797c2e8fd94a4f2a973fc972cfde0e0b05f683667b0cd39dd" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" [[package]] name = "portable-atomic-util" @@ -3598,9 +3594,9 @@ dependencies = [ [[package]] name = "postgres-protocol" -version = "0.6.9" +version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbef655056b916eb868048276cfd5d6a7dea4f81560dfd047f97c8c6fe3fcfd4" +checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491" dependencies = [ "base64", "byteorder", @@ -3616,9 +3612,9 @@ dependencies = [ [[package]] name = "postgres-types" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4605b7c057056dd35baeb6ac0c0338e4975b1f2bef0f65da953285eb007095" +checksum = "54b858f82211e84682fecd373f68e1ceae642d8d751a1ebd13f33de6257b3e20" dependencies = [ "bytes", "fallible-iterator", @@ -3787,7 +3783,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2 0.6.1", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -3861,7 +3857,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3893,7 +3889,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3902,14 +3898,14 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -3931,9 +3927,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec96166dafa0886eb81fe1c0a388bece180fbef2135f97c1e2cf8302e74b43b5" +checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" dependencies = [ "bitflags", ] @@ -3998,7 +3994,7 @@ dependencies = [ "base64", "chrono", "form_urlencoded", - "getrandom 0.2.16", + "getrandom 0.2.17", "hex", "hmac", "home", @@ -4052,7 +4048,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-util", - "tower 0.5.2", + "tower 0.5.3", "tower-http", "tower-service", "url", @@ -4060,7 +4056,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.4", + "webpki-roots 1.0.5", ] [[package]] @@ -4096,7 +4092,7 @@ dependencies = [ "schemars 1.2.0", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "tokio", "tracing", "tracing-futures", @@ -4111,7 +4107,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted 0.9.0", "windows-sys 0.52.0", @@ -4165,9 +4161,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" [[package]] name = "rustc-hash" @@ -4199,16 +4195,16 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "aws-lc-rs", "log", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.8", + "rustls-webpki 0.103.9", "subtle", "zeroize", ] @@ -4249,9 +4245,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", @@ -4269,9 +4265,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "aws-lc-rs", "ring", @@ -4287,9 +4283,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.21" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62049b2877bf12821e8f9ad256ee38fdc31db7387ec2d3b3f403024de2034aea" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" [[package]] name = "salsa20" @@ -4316,7 +4312,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" dependencies = [ "dyn-clone", - "indexmap 2.12.1", + "indexmap 2.13.0", "jiff", "ref-cast", "schemars_derive 0.9.0", @@ -4488,7 +4484,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2f2d7ff8a2140333718bb329f5c40fc5f0865b84c426183ce14c97d2ab8154f" dependencies = [ "form_urlencoded", - "indexmap 2.12.1", + "indexmap 2.13.0", "itoa", "ryu", "serde_core", @@ -4611,10 +4607,11 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.7" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -4912,30 +4909,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd" dependencies = [ "num-conv", "time-core", @@ -5005,9 +5002,9 @@ dependencies = [ [[package]] name = "tokio-postgres" -version = "0.7.15" +version = "0.7.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b40d66d9b2cfe04b628173409368e58247e8eddbbd3b0e6c6ba1d09f20f6c9e" +checksum = "dcea47c8f71744367793f16c2db1f11cb859d28f436bdb4ca9193eb1f787ee42" dependencies = [ "async-trait", "byteorder", @@ -5041,9 +5038,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -5086,9 +5083,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.10+spec-1.1.0" +version = "0.9.11+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0825052159284a1a8b4d6c0c86cbc801f2da5afd2b225fa548c72f2e74002f48" +checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46" dependencies = [ "serde_core", "serde_spanned", @@ -5178,7 +5175,7 @@ dependencies = [ "futures-core", "futures-util", "hdrhistogram", - "indexmap 2.12.1", + "indexmap 2.13.0", "pin-project-lite", "slab", "sync_wrapper", @@ -5213,7 +5210,7 @@ dependencies = [ "pin-project-lite", "tokio", "tokio-util", - "tower 0.5.2", + "tower 0.5.3", "tower-layer", "tower-service", "tracing", @@ -5373,9 +5370,9 @@ dependencies = [ [[package]] name = "unicase" -version = "2.8.1" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" +checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" [[package]] name = "unicode-bidi" @@ -5534,26 +5531,38 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasite" -version = "0.1.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" +checksum = "66fe902b4a6b8028a753d5424909b764ccf79b7a209eac9bf97e59cda9f71a42" +dependencies = [ + "wasi 0.14.7+wasi-0.2.4", +] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -5564,11 +5573,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -5577,9 +5587,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5587,9 +5597,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -5600,9 +5610,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] @@ -5622,9 +5632,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -5646,23 +5656,23 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.4", + "webpki-roots 1.0.5", ] [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" dependencies = [ "rustls-pki-types", ] [[package]] name = "whoami" -version = "1.6.1" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +checksum = "ace4d5c7b5ab3d99629156d4e0997edbe98a4beb6d5ba99e2cae830207a81983" dependencies = [ "libredox", "wasite", @@ -5903,9 +5913,9 @@ checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "woothee" @@ -5973,18 +5983,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", @@ -6081,7 +6091,7 @@ dependencies = [ "generic-array", "getrandom 0.3.4", "hmac", - "indexmap 2.12.1", + "indexmap 2.13.0", "lzma-rust2", "memchr", "pbkdf2", @@ -6101,9 +6111,9 @@ checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" [[package]] name = "zmij" -version = "1.0.0" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d6085d62852e35540689d1f97ad663e3971fc19cf5eceab364d62c646ea167" +checksum = "94f63c051f4fe3c1509da62131a678643c5b6fbdc9273b2b79d4378ebda003d2" [[package]] name = "zopfli" diff --git a/Cargo.toml b/Cargo.toml index e40905b..2dd5bab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,13 +6,13 @@ members = [ "./crates/nvisy-cli", "./crates/nvisy-core", "./crates/nvisy-nats", - "./crates/nvisy-opendal", "./crates/nvisy-postgres", "./crates/nvisy-rig", - "./crates/nvisy-runtime", "./crates/nvisy-server", - "./crates/nvisy-vector", "./crates/nvisy-webhook", + "./integrations/nvisy-opendal", + "./integrations/nvisy-runtime", + "./integrations/nvisy-vector", ] [workspace.package] @@ -36,12 +36,12 @@ documentation = "https://docs.rs/nvisy-server" # Internal crates nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } nvisy-nats = { path = "./crates/nvisy-nats", version = "0.1.0" } -nvisy-opendal = { path = "./crates/nvisy-opendal", version = "0.1.0" } +nvisy-opendal = { path = "./integrations/nvisy-opendal", version = "0.1.0" } nvisy-postgres = { path = "./crates/nvisy-postgres", version = "0.1.0" } nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" } -nvisy-runtime = { path = "./crates/nvisy-runtime", version = "0.1.0" } +nvisy-runtime = { path = "./integrations/nvisy-runtime", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } -nvisy-vector = { path = "./crates/nvisy-vector", version = "0.1.0" } +nvisy-vector = { path = "./integrations/nvisy-vector", version = "0.1.0" } nvisy-webhook = { path = "./crates/nvisy-webhook", version = "0.1.0" } # Runtime crates (from github.com/nvisycom/runtime) @@ -140,7 +140,7 @@ rig-core = { version = "0.28", default-features = false, features = ["reqwest-ru # Storage abstraction opendal = { version = "0.53", features = [] } -chrono = { version = "0.4", features = ["serde"] } + # Graph data structures petgraph = { version = "0.8", features = ["serde-1"] } diff --git a/crates/nvisy-opendal/Cargo.toml b/integrations/nvisy-opendal/Cargo.toml similarity index 100% rename from crates/nvisy-opendal/Cargo.toml rename to integrations/nvisy-opendal/Cargo.toml diff --git a/crates/nvisy-opendal/README.md b/integrations/nvisy-opendal/README.md similarity index 100% rename from crates/nvisy-opendal/README.md rename to integrations/nvisy-opendal/README.md diff --git a/crates/nvisy-opendal/src/azblob/config.rs b/integrations/nvisy-opendal/src/azblob/config.rs similarity index 100% rename from crates/nvisy-opendal/src/azblob/config.rs rename to integrations/nvisy-opendal/src/azblob/config.rs diff --git a/crates/nvisy-opendal/src/azblob/mod.rs b/integrations/nvisy-opendal/src/azblob/mod.rs similarity index 100% rename from crates/nvisy-opendal/src/azblob/mod.rs rename to integrations/nvisy-opendal/src/azblob/mod.rs diff --git a/crates/nvisy-opendal/src/backend.rs b/integrations/nvisy-opendal/src/backend.rs similarity index 100% rename from crates/nvisy-opendal/src/backend.rs rename to integrations/nvisy-opendal/src/backend.rs diff --git a/crates/nvisy-opendal/src/config.rs b/integrations/nvisy-opendal/src/config.rs similarity index 100% rename from crates/nvisy-opendal/src/config.rs rename to integrations/nvisy-opendal/src/config.rs diff --git a/crates/nvisy-opendal/src/dropbox/config.rs b/integrations/nvisy-opendal/src/dropbox/config.rs similarity index 100% rename from crates/nvisy-opendal/src/dropbox/config.rs rename to integrations/nvisy-opendal/src/dropbox/config.rs diff --git a/crates/nvisy-opendal/src/dropbox/mod.rs b/integrations/nvisy-opendal/src/dropbox/mod.rs similarity index 100% rename from crates/nvisy-opendal/src/dropbox/mod.rs rename to integrations/nvisy-opendal/src/dropbox/mod.rs diff --git a/crates/nvisy-opendal/src/error.rs b/integrations/nvisy-opendal/src/error.rs similarity index 100% rename from crates/nvisy-opendal/src/error.rs rename to integrations/nvisy-opendal/src/error.rs diff --git a/crates/nvisy-opendal/src/gcs/config.rs b/integrations/nvisy-opendal/src/gcs/config.rs similarity index 100% rename from crates/nvisy-opendal/src/gcs/config.rs rename to integrations/nvisy-opendal/src/gcs/config.rs diff --git a/crates/nvisy-opendal/src/gcs/mod.rs b/integrations/nvisy-opendal/src/gcs/mod.rs similarity index 100% rename from crates/nvisy-opendal/src/gcs/mod.rs rename to integrations/nvisy-opendal/src/gcs/mod.rs diff --git a/crates/nvisy-opendal/src/gdrive/config.rs b/integrations/nvisy-opendal/src/gdrive/config.rs similarity index 100% rename from crates/nvisy-opendal/src/gdrive/config.rs rename to integrations/nvisy-opendal/src/gdrive/config.rs diff --git a/crates/nvisy-opendal/src/gdrive/mod.rs b/integrations/nvisy-opendal/src/gdrive/mod.rs similarity index 100% rename from crates/nvisy-opendal/src/gdrive/mod.rs rename to integrations/nvisy-opendal/src/gdrive/mod.rs diff --git a/crates/nvisy-opendal/src/lib.rs b/integrations/nvisy-opendal/src/lib.rs similarity index 100% rename from crates/nvisy-opendal/src/lib.rs rename to integrations/nvisy-opendal/src/lib.rs diff --git a/crates/nvisy-opendal/src/onedrive/config.rs b/integrations/nvisy-opendal/src/onedrive/config.rs similarity index 100% rename from crates/nvisy-opendal/src/onedrive/config.rs rename to integrations/nvisy-opendal/src/onedrive/config.rs diff --git a/crates/nvisy-opendal/src/onedrive/mod.rs b/integrations/nvisy-opendal/src/onedrive/mod.rs similarity index 100% rename from crates/nvisy-opendal/src/onedrive/mod.rs rename to integrations/nvisy-opendal/src/onedrive/mod.rs diff --git a/crates/nvisy-opendal/src/s3/config.rs b/integrations/nvisy-opendal/src/s3/config.rs similarity index 100% rename from crates/nvisy-opendal/src/s3/config.rs rename to integrations/nvisy-opendal/src/s3/config.rs diff --git a/crates/nvisy-opendal/src/s3/mod.rs b/integrations/nvisy-opendal/src/s3/mod.rs similarity index 100% rename from crates/nvisy-opendal/src/s3/mod.rs rename to integrations/nvisy-opendal/src/s3/mod.rs diff --git a/crates/nvisy-runtime/Cargo.toml b/integrations/nvisy-runtime/Cargo.toml similarity index 100% rename from crates/nvisy-runtime/Cargo.toml rename to integrations/nvisy-runtime/Cargo.toml diff --git a/crates/nvisy-runtime/README.md b/integrations/nvisy-runtime/README.md similarity index 100% rename from crates/nvisy-runtime/README.md rename to integrations/nvisy-runtime/README.md diff --git a/crates/nvisy-runtime/src/engine/config.rs b/integrations/nvisy-runtime/src/engine/config.rs similarity index 100% rename from crates/nvisy-runtime/src/engine/config.rs rename to integrations/nvisy-runtime/src/engine/config.rs diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/integrations/nvisy-runtime/src/engine/executor.rs similarity index 100% rename from crates/nvisy-runtime/src/engine/executor.rs rename to integrations/nvisy-runtime/src/engine/executor.rs diff --git a/crates/nvisy-runtime/src/engine/mod.rs b/integrations/nvisy-runtime/src/engine/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/engine/mod.rs rename to integrations/nvisy-runtime/src/engine/mod.rs diff --git a/crates/nvisy-runtime/src/error.rs b/integrations/nvisy-runtime/src/error.rs similarity index 100% rename from crates/nvisy-runtime/src/error.rs rename to integrations/nvisy-runtime/src/error.rs diff --git a/crates/nvisy-runtime/src/graph/edge.rs b/integrations/nvisy-runtime/src/graph/edge.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/edge.rs rename to integrations/nvisy-runtime/src/graph/edge.rs diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/integrations/nvisy-runtime/src/graph/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/mod.rs rename to integrations/nvisy-runtime/src/graph/mod.rs diff --git a/crates/nvisy-runtime/src/graph/workflow.rs b/integrations/nvisy-runtime/src/graph/workflow.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/workflow.rs rename to integrations/nvisy-runtime/src/graph/workflow.rs diff --git a/crates/nvisy-runtime/src/lib.rs b/integrations/nvisy-runtime/src/lib.rs similarity index 100% rename from crates/nvisy-runtime/src/lib.rs rename to integrations/nvisy-runtime/src/lib.rs diff --git a/crates/nvisy-runtime/src/node/data.rs b/integrations/nvisy-runtime/src/node/data.rs similarity index 100% rename from crates/nvisy-runtime/src/node/data.rs rename to integrations/nvisy-runtime/src/node/data.rs diff --git a/crates/nvisy-runtime/src/node/id.rs b/integrations/nvisy-runtime/src/node/id.rs similarity index 100% rename from crates/nvisy-runtime/src/node/id.rs rename to integrations/nvisy-runtime/src/node/id.rs diff --git a/crates/nvisy-runtime/src/node/input/config.rs b/integrations/nvisy-runtime/src/node/input/config.rs similarity index 100% rename from crates/nvisy-runtime/src/node/input/config.rs rename to integrations/nvisy-runtime/src/node/input/config.rs diff --git a/crates/nvisy-runtime/src/node/input/mod.rs b/integrations/nvisy-runtime/src/node/input/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/node/input/mod.rs rename to integrations/nvisy-runtime/src/node/input/mod.rs diff --git a/crates/nvisy-runtime/src/node/mod.rs b/integrations/nvisy-runtime/src/node/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/node/mod.rs rename to integrations/nvisy-runtime/src/node/mod.rs diff --git a/crates/nvisy-runtime/src/node/output/config.rs b/integrations/nvisy-runtime/src/node/output/config.rs similarity index 100% rename from crates/nvisy-runtime/src/node/output/config.rs rename to integrations/nvisy-runtime/src/node/output/config.rs diff --git a/crates/nvisy-runtime/src/node/output/mod.rs b/integrations/nvisy-runtime/src/node/output/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/node/output/mod.rs rename to integrations/nvisy-runtime/src/node/output/mod.rs diff --git a/crates/nvisy-runtime/src/node/transformer/chunking.rs b/integrations/nvisy-runtime/src/node/transformer/chunking.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/chunking.rs rename to integrations/nvisy-runtime/src/node/transformer/chunking.rs diff --git a/crates/nvisy-runtime/src/node/transformer/config.rs b/integrations/nvisy-runtime/src/node/transformer/config.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/config.rs rename to integrations/nvisy-runtime/src/node/transformer/config.rs diff --git a/crates/nvisy-runtime/src/node/transformer/document.rs b/integrations/nvisy-runtime/src/node/transformer/document.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/document.rs rename to integrations/nvisy-runtime/src/node/transformer/document.rs diff --git a/crates/nvisy-runtime/src/node/transformer/embedding.rs b/integrations/nvisy-runtime/src/node/transformer/embedding.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/embedding.rs rename to integrations/nvisy-runtime/src/node/transformer/embedding.rs diff --git a/crates/nvisy-runtime/src/node/transformer/extraction.rs b/integrations/nvisy-runtime/src/node/transformer/extraction.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/extraction.rs rename to integrations/nvisy-runtime/src/node/transformer/extraction.rs diff --git a/crates/nvisy-runtime/src/node/transformer/mod.rs b/integrations/nvisy-runtime/src/node/transformer/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/mod.rs rename to integrations/nvisy-runtime/src/node/transformer/mod.rs diff --git a/crates/nvisy-runtime/src/node/transformer/processing.rs b/integrations/nvisy-runtime/src/node/transformer/processing.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/processing.rs rename to integrations/nvisy-runtime/src/node/transformer/processing.rs diff --git a/crates/nvisy-runtime/src/node/transformer/quality.rs b/integrations/nvisy-runtime/src/node/transformer/quality.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/quality.rs rename to integrations/nvisy-runtime/src/node/transformer/quality.rs diff --git a/crates/nvisy-runtime/src/node/transformer/routing.rs b/integrations/nvisy-runtime/src/node/transformer/routing.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/routing.rs rename to integrations/nvisy-runtime/src/node/transformer/routing.rs diff --git a/crates/nvisy-runtime/src/runtime/config.rs b/integrations/nvisy-runtime/src/runtime/config.rs similarity index 100% rename from crates/nvisy-runtime/src/runtime/config.rs rename to integrations/nvisy-runtime/src/runtime/config.rs diff --git a/crates/nvisy-runtime/src/runtime/mod.rs b/integrations/nvisy-runtime/src/runtime/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/runtime/mod.rs rename to integrations/nvisy-runtime/src/runtime/mod.rs diff --git a/crates/nvisy-runtime/src/runtime/service.rs b/integrations/nvisy-runtime/src/runtime/service.rs similarity index 100% rename from crates/nvisy-runtime/src/runtime/service.rs rename to integrations/nvisy-runtime/src/runtime/service.rs diff --git a/crates/nvisy-vector/Cargo.toml b/integrations/nvisy-vector/Cargo.toml similarity index 100% rename from crates/nvisy-vector/Cargo.toml rename to integrations/nvisy-vector/Cargo.toml diff --git a/crates/nvisy-vector/README.md b/integrations/nvisy-vector/README.md similarity index 100% rename from crates/nvisy-vector/README.md rename to integrations/nvisy-vector/README.md diff --git a/crates/nvisy-vector/src/config.rs b/integrations/nvisy-vector/src/config.rs similarity index 100% rename from crates/nvisy-vector/src/config.rs rename to integrations/nvisy-vector/src/config.rs diff --git a/crates/nvisy-vector/src/error.rs b/integrations/nvisy-vector/src/error.rs similarity index 100% rename from crates/nvisy-vector/src/error.rs rename to integrations/nvisy-vector/src/error.rs diff --git a/crates/nvisy-vector/src/lib.rs b/integrations/nvisy-vector/src/lib.rs similarity index 100% rename from crates/nvisy-vector/src/lib.rs rename to integrations/nvisy-vector/src/lib.rs diff --git a/crates/nvisy-vector/src/milvus/backend.rs b/integrations/nvisy-vector/src/milvus/backend.rs similarity index 100% rename from crates/nvisy-vector/src/milvus/backend.rs rename to integrations/nvisy-vector/src/milvus/backend.rs diff --git a/crates/nvisy-vector/src/milvus/config.rs b/integrations/nvisy-vector/src/milvus/config.rs similarity index 100% rename from crates/nvisy-vector/src/milvus/config.rs rename to integrations/nvisy-vector/src/milvus/config.rs diff --git a/crates/nvisy-vector/src/milvus/mod.rs b/integrations/nvisy-vector/src/milvus/mod.rs similarity index 100% rename from crates/nvisy-vector/src/milvus/mod.rs rename to integrations/nvisy-vector/src/milvus/mod.rs diff --git a/crates/nvisy-vector/src/pgvector/backend.rs b/integrations/nvisy-vector/src/pgvector/backend.rs similarity index 100% rename from crates/nvisy-vector/src/pgvector/backend.rs rename to integrations/nvisy-vector/src/pgvector/backend.rs diff --git a/crates/nvisy-vector/src/pgvector/config.rs b/integrations/nvisy-vector/src/pgvector/config.rs similarity index 100% rename from crates/nvisy-vector/src/pgvector/config.rs rename to integrations/nvisy-vector/src/pgvector/config.rs diff --git a/crates/nvisy-vector/src/pgvector/mod.rs b/integrations/nvisy-vector/src/pgvector/mod.rs similarity index 100% rename from crates/nvisy-vector/src/pgvector/mod.rs rename to integrations/nvisy-vector/src/pgvector/mod.rs diff --git a/crates/nvisy-vector/src/pinecone/backend.rs b/integrations/nvisy-vector/src/pinecone/backend.rs similarity index 100% rename from crates/nvisy-vector/src/pinecone/backend.rs rename to integrations/nvisy-vector/src/pinecone/backend.rs diff --git a/crates/nvisy-vector/src/pinecone/config.rs b/integrations/nvisy-vector/src/pinecone/config.rs similarity index 100% rename from crates/nvisy-vector/src/pinecone/config.rs rename to integrations/nvisy-vector/src/pinecone/config.rs diff --git a/crates/nvisy-vector/src/pinecone/mod.rs b/integrations/nvisy-vector/src/pinecone/mod.rs similarity index 100% rename from crates/nvisy-vector/src/pinecone/mod.rs rename to integrations/nvisy-vector/src/pinecone/mod.rs diff --git a/crates/nvisy-vector/src/qdrant/backend.rs b/integrations/nvisy-vector/src/qdrant/backend.rs similarity index 100% rename from crates/nvisy-vector/src/qdrant/backend.rs rename to integrations/nvisy-vector/src/qdrant/backend.rs diff --git a/crates/nvisy-vector/src/qdrant/config.rs b/integrations/nvisy-vector/src/qdrant/config.rs similarity index 100% rename from crates/nvisy-vector/src/qdrant/config.rs rename to integrations/nvisy-vector/src/qdrant/config.rs diff --git a/crates/nvisy-vector/src/qdrant/mod.rs b/integrations/nvisy-vector/src/qdrant/mod.rs similarity index 100% rename from crates/nvisy-vector/src/qdrant/mod.rs rename to integrations/nvisy-vector/src/qdrant/mod.rs diff --git a/crates/nvisy-vector/src/store.rs b/integrations/nvisy-vector/src/store.rs similarity index 100% rename from crates/nvisy-vector/src/store.rs rename to integrations/nvisy-vector/src/store.rs From f139015b7ffefc67e143d3db38b2fcf47edf6b9e Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 19 Jan 2026 12:07:00 +0100 Subject: [PATCH 07/28] feat(integrations): add nvisy-data crate, move nvisy-rig, remove pipeline workers - Add nvisy-data foundational crate with DataInput, DataOutput, VectorOutput traits - Move nvisy-rig from crates/ to integrations/ - Update nvisy-vector and nvisy-opendal to use nvisy-data traits - Remove pipeline worker folder from nvisy-server (keep handlers/models) - Remove nvisy-rig dependency from nvisy-server --- Cargo.lock | 1190 ++++++++++++++--- Cargo.toml | 6 +- crates/nvisy-cli/src/config/mod.rs | 5 - crates/nvisy-cli/src/main.rs | 12 +- crates/nvisy-server/Cargo.toml | 2 - crates/nvisy-server/src/lib.rs | 1 - .../nvisy-server/src/pipeline/job_handler.rs | 60 - crates/nvisy-server/src/pipeline/mod.rs | 176 --- .../src/pipeline/postprocessing.rs | 93 -- .../src/pipeline/preprocessing.rs | 73 - .../nvisy-server/src/pipeline/processing.rs | 93 -- crates/nvisy-server/src/pipeline/state.rs | 72 - crates/nvisy-server/src/pipeline/worker.rs | 190 --- crates/nvisy-server/src/service/config.rs | 5 - crates/nvisy-server/src/service/mod.rs | 20 - integrations/nvisy-data/Cargo.toml | 39 + integrations/nvisy-data/src/error.rs | 99 ++ integrations/nvisy-data/src/input.rs | 55 + integrations/nvisy-data/src/lib.rs | 20 + integrations/nvisy-data/src/output.rs | 61 + integrations/nvisy-data/src/types.rs | 52 + integrations/nvisy-data/src/vector.rs | 86 ++ integrations/nvisy-opendal/Cargo.toml | 6 +- integrations/nvisy-opendal/src/backend.rs | 297 ++-- integrations/nvisy-opendal/src/error.rs | 98 -- integrations/nvisy-opendal/src/lib.rs | 11 +- {crates => integrations}/nvisy-rig/Cargo.toml | 0 {crates => integrations}/nvisy-rig/README.md | 0 .../nvisy-rig/src/chat/agent/context.rs | 0 .../nvisy-rig/src/chat/agent/executor.rs | 0 .../nvisy-rig/src/chat/agent/mod.rs | 0 .../nvisy-rig/src/chat/agent/prompt.rs | 0 .../nvisy-rig/src/chat/event.rs | 0 .../nvisy-rig/src/chat/mod.rs | 0 .../nvisy-rig/src/chat/response.rs | 0 .../nvisy-rig/src/chat/service.rs | 0 .../nvisy-rig/src/chat/stream.rs | 0 .../nvisy-rig/src/chat/usage.rs | 0 .../nvisy-rig/src/error.rs | 0 {crates => integrations}/nvisy-rig/src/lib.rs | 0 .../nvisy-rig/src/provider/config.rs | 0 .../nvisy-rig/src/provider/embedding.rs | 0 .../nvisy-rig/src/provider/mod.rs | 0 .../nvisy-rig/src/provider/registry.rs | 0 .../nvisy-rig/src/rag/config.rs | 0 .../nvisy-rig/src/rag/indexer/indexed.rs | 0 .../nvisy-rig/src/rag/indexer/mod.rs | 0 .../nvisy-rig/src/rag/mod.rs | 0 .../nvisy-rig/src/rag/searcher/mod.rs | 0 .../nvisy-rig/src/rag/searcher/retrieved.rs | 0 .../nvisy-rig/src/rag/searcher/scope.rs | 0 .../nvisy-rig/src/rag/splitter/chunk.rs | 0 .../nvisy-rig/src/rag/splitter/metadata.rs | 0 .../nvisy-rig/src/rag/splitter/mod.rs | 0 .../nvisy-rig/src/service/config.rs | 0 .../nvisy-rig/src/service/mod.rs | 0 .../nvisy-rig/src/service/rig.rs | 0 .../nvisy-rig/src/session/message.rs | 0 .../nvisy-rig/src/session/mod.rs | 0 .../nvisy-rig/src/session/policy.rs | 0 .../nvisy-rig/src/session/store.rs | 0 .../nvisy-rig/src/tool/definition.rs | 0 .../nvisy-rig/src/tool/edit/mod.rs | 0 .../nvisy-rig/src/tool/edit/operation.rs | 0 .../nvisy-rig/src/tool/edit/proposed.rs | 0 .../nvisy-rig/src/tool/mod.rs | 0 .../nvisy-rig/src/tool/registry.rs | 0 .../nvisy-rig/src/tool/types.rs | 0 integrations/nvisy-runtime/src/error.rs | 2 +- integrations/nvisy-vector/Cargo.toml | 13 +- integrations/nvisy-vector/src/error.rs | 99 -- integrations/nvisy-vector/src/lib.rs | 16 +- .../nvisy-vector/src/milvus/backend.rs | 286 ++-- .../nvisy-vector/src/pgvector/backend.rs | 376 +++--- .../nvisy-vector/src/pinecone/backend.rs | 276 ++-- .../nvisy-vector/src/qdrant/backend.rs | 166 +-- integrations/nvisy-vector/src/store.rs | 214 +-- 77 files changed, 2314 insertions(+), 1956 deletions(-) delete mode 100644 crates/nvisy-server/src/pipeline/job_handler.rs delete mode 100644 crates/nvisy-server/src/pipeline/mod.rs delete mode 100644 crates/nvisy-server/src/pipeline/postprocessing.rs delete mode 100644 crates/nvisy-server/src/pipeline/preprocessing.rs delete mode 100644 crates/nvisy-server/src/pipeline/processing.rs delete mode 100644 crates/nvisy-server/src/pipeline/state.rs delete mode 100644 crates/nvisy-server/src/pipeline/worker.rs create mode 100644 integrations/nvisy-data/Cargo.toml create mode 100644 integrations/nvisy-data/src/error.rs create mode 100644 integrations/nvisy-data/src/input.rs create mode 100644 integrations/nvisy-data/src/lib.rs create mode 100644 integrations/nvisy-data/src/output.rs create mode 100644 integrations/nvisy-data/src/types.rs create mode 100644 integrations/nvisy-data/src/vector.rs delete mode 100644 integrations/nvisy-opendal/src/error.rs rename {crates => integrations}/nvisy-rig/Cargo.toml (100%) rename {crates => integrations}/nvisy-rig/README.md (100%) rename {crates => integrations}/nvisy-rig/src/chat/agent/context.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/agent/executor.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/agent/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/agent/prompt.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/event.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/response.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/service.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/stream.rs (100%) rename {crates => integrations}/nvisy-rig/src/chat/usage.rs (100%) rename {crates => integrations}/nvisy-rig/src/error.rs (100%) rename {crates => integrations}/nvisy-rig/src/lib.rs (100%) rename {crates => integrations}/nvisy-rig/src/provider/config.rs (100%) rename {crates => integrations}/nvisy-rig/src/provider/embedding.rs (100%) rename {crates => integrations}/nvisy-rig/src/provider/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/provider/registry.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/config.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/indexer/indexed.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/indexer/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/searcher/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/searcher/retrieved.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/searcher/scope.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/splitter/chunk.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/splitter/metadata.rs (100%) rename {crates => integrations}/nvisy-rig/src/rag/splitter/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/service/config.rs (100%) rename {crates => integrations}/nvisy-rig/src/service/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/service/rig.rs (100%) rename {crates => integrations}/nvisy-rig/src/session/message.rs (100%) rename {crates => integrations}/nvisy-rig/src/session/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/session/policy.rs (100%) rename {crates => integrations}/nvisy-rig/src/session/store.rs (100%) rename {crates => integrations}/nvisy-rig/src/tool/definition.rs (100%) rename {crates => integrations}/nvisy-rig/src/tool/edit/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/tool/edit/operation.rs (100%) rename {crates => integrations}/nvisy-rig/src/tool/edit/proposed.rs (100%) rename {crates => integrations}/nvisy-rig/src/tool/mod.rs (100%) rename {crates => integrations}/nvisy-rig/src/tool/registry.rs (100%) rename {crates => integrations}/nvisy-rig/src/tool/types.rs (100%) delete mode 100644 integrations/nvisy-vector/src/error.rs diff --git a/Cargo.lock b/Cargo.lock index 1e0a237..75d3f39 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -61,7 +61,7 @@ dependencies = [ "axum-extra 0.10.3", "bytes", "cfg-if", - "http", + "http 1.4.0", "indexmap 2.13.0", "schemars 0.9.0", "serde", @@ -82,7 +82,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -224,7 +224,7 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86dde77d8a733a9dbaf865a9eb65c72e09c88f3d14d3dd0d2aecf511920ee4fe" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-util", "memchr", @@ -235,9 +235,9 @@ dependencies = [ "portable-atomic", "rand 0.8.5", "regex", - "ring", + "ring 0.17.14", "rustls-native-certs 0.7.3", - "rustls-pemfile", + "rustls-pemfile 2.2.0", "rustls-webpki 0.102.8", "serde", "serde_json", @@ -246,7 +246,7 @@ dependencies = [ "thiserror 1.0.69", "time", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-stream", "tokio-util", "tokio-websockets", @@ -274,7 +274,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -285,7 +285,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -303,7 +303,7 @@ dependencies = [ "derive_utils", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -335,6 +335,34 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "axum" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" +dependencies = [ + "async-trait", + "axum-core 0.3.4", + "bitflags 1.3.2", + "bytes", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "itoa", + "matchit 0.7.3", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 0.1.2", + "tower 0.4.13", + "tower-layer", + "tower-service", +] + [[package]] name = "axum" version = "0.7.9" @@ -345,8 +373,8 @@ dependencies = [ "axum-core 0.4.5", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "itoa", "matchit 0.7.3", @@ -356,7 +384,7 @@ dependencies = [ "pin-project-lite", "rustversion", "serde", - "sync_wrapper", + "sync_wrapper 1.0.2", "tower 0.5.3", "tower-layer", "tower-service", @@ -373,10 +401,10 @@ dependencies = [ "bytes", "form_urlencoded", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-util", "itoa", "matchit 0.8.4", @@ -389,7 +417,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tower 0.5.3", "tower-layer", @@ -408,6 +436,23 @@ dependencies = [ "serde", ] +[[package]] +name = "axum-core" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "mime", + "rustversion", + "tower-layer", + "tower-service", +] + [[package]] name = "axum-core" version = "0.4.5" @@ -417,13 +462,13 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", "rustversion", - "sync_wrapper", + "sync_wrapper 1.0.2", "tower-layer", "tower-service", ] @@ -436,12 +481,12 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", - "sync_wrapper", + "sync_wrapper 1.0.2", "tower-layer", "tower-service", "tracing", @@ -457,8 +502,8 @@ dependencies = [ "axum-core 0.5.6", "bytes", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -482,8 +527,8 @@ dependencies = [ "futures-core", "futures-util", "headers", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", @@ -503,7 +548,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -515,16 +560,16 @@ dependencies = [ "arc-swap", "bytes", "fs-err", - "http", - "http-body", - "hyper", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", "hyper-util", "pin-project-lite", - "rustls", - "rustls-pemfile", + "rustls 0.23.36", + "rustls-pemfile 2.2.0", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", ] @@ -540,9 +585,9 @@ dependencies = [ "bytesize", "cookie", "expect-json", - "http", + "http 1.4.0", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-util", "mime", "pretty_assertions", @@ -583,6 +628,18 @@ dependencies = [ "windows-link", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -624,6 +681,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.10.0" @@ -821,7 +884,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -836,7 +899,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31211fc26899744f5b22521fdc971e5f3875991d8880537537470685a0e9552d" dependencies = [ - "http", + "http 1.4.0", ] [[package]] @@ -1104,7 +1167,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1138,7 +1201,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.114", ] [[package]] @@ -1152,7 +1215,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn", + "syn 2.0.114", ] [[package]] @@ -1163,7 +1226,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1174,7 +1237,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1249,7 +1312,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1259,7 +1322,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn", + "syn 2.0.114", ] [[package]] @@ -1281,7 +1344,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn", + "syn 2.0.114", "unicode-xid", ] @@ -1293,7 +1356,7 @@ checksum = "ccfae181bab5ab6c5478b2ccb69e4c68a02f8c3ec72f6616bfec9dbc599d2ee0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1303,7 +1366,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e130c806dccc85428c564f2dc5a96e05b6615a27c9a28776bd7761a9af4bb552" dependencies = [ "bigdecimal", - "bitflags", + "bitflags 2.10.0", "byteorder", "diesel_derives", "downcast-rs", @@ -1342,7 +1405,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1355,7 +1418,7 @@ dependencies = [ "dsl_auto_type", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1375,7 +1438,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe2444076b48641147115697648dc743c2c00b61adade0f01ce67133c7babe8c" dependencies = [ - "syn", + "syn 2.0.114", ] [[package]] @@ -1415,7 +1478,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1450,7 +1513,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1596,7 +1659,7 @@ checksum = "f464e1e518bc97a6749590758411784df7dda4f36384e1fb11a58f040c1d0459" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1645,6 +1708,12 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -1674,6 +1743,21 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -1755,7 +1839,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -1883,6 +1967,25 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap 2.13.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.13" @@ -1894,7 +1997,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.4.0", "indexmap 2.13.0", "slab", "tokio", @@ -1945,10 +2048,10 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3314d5adb5d94bcdf56771f2e50dbbc80bb4bdf88967526706205ac9eff24eb" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "headers-core", - "http", + "http 1.4.0", "httpdate", "mime", "sha1", @@ -1960,7 +2063,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" dependencies = [ - "http", + "http 1.4.0", ] [[package]] @@ -2015,6 +2118,17 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.4.0" @@ -2025,6 +2139,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -2032,7 +2157,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.4.0", ] [[package]] @@ -2043,8 +2168,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -2075,6 +2200,30 @@ dependencies = [ "typenum", ] +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.8.1" @@ -2085,9 +2234,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", - "http", - "http-body", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", "httparse", "httpdate", "itoa", @@ -2104,44 +2253,72 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.4.0", + "hyper 1.8.1", "hyper-util", - "rustls", + "rustls 0.23.36", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", "webpki-roots 1.0.5", ] +[[package]] +name = "hyper-timeout" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" +dependencies = [ + "hyper 0.14.32", + "pin-project-lite", + "tokio", + "tokio-io-timeout", +] + [[package]] name = "hyper-timeout" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper", + "hyper 1.8.1", "hyper-util", "pin-project-lite", "tokio", "tower-service", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.8.1", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http", - "http-body", - "hyper", + "http 1.4.0", + "http-body 1.0.1", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", @@ -2398,6 +2575,24 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.13.0" @@ -2455,7 +2650,7 @@ checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -2499,10 +2694,10 @@ version = "9.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" dependencies = [ - "base64", + "base64 0.22.1", "js-sys", "pem", - "ring", + "ring 0.17.14", "serde", "serde_json", "simple_asn1", @@ -2515,7 +2710,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c76e1c7d7df3e34443b3621b459b066a7b79644f059fc8b2db7070c825fd417e" dependencies = [ "aws-lc-rs", - "base64", + "base64 0.22.1", "getrandom 0.2.17", "js-sys", "pem", @@ -2531,7 +2726,7 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" dependencies = [ - "spin", + "spin 0.9.8", ] [[package]] @@ -2558,11 +2753,17 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ - "bitflags", + "bitflags 2.10.0", "libc", "redox_syscall 0.7.0", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -2697,6 +2898,25 @@ dependencies = [ "quote", ] +[[package]] +name = "milvus-sdk-rust" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e291050787c486091140a968f4d90759638a55f0883bcf30acc8f0470efaf0" +dependencies = [ + "anyhow", + "base64 0.21.7", + "prost 0.11.9", + "serde", + "serde_json", + "strum 0.24.1", + "strum_macros 0.24.3", + "thiserror 1.0.69", + "tokio", + "tonic 0.8.3", + "tonic-build 0.8.4", +] + [[package]] name = "mime" version = "0.3.17" @@ -2769,14 +2989,43 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http", + "http 1.4.0", "httparse", "memchr", "mime", - "spin", + "spin 0.9.8", "version_check", ] +[[package]] +name = "multimap" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" + +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + +[[package]] +name = "native-tls" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe 0.1.6", + "openssl-sys", + "schannel", + "security-framework 2.11.1", + "security-framework-sys", + "tempfile", +] + [[package]] name = "nkeys" version = "0.4.5" @@ -2958,13 +3207,26 @@ dependencies = [ "tracing", ] +[[package]] +name = "nvisy-data" +version = "0.1.0" +dependencies = [ + "async-trait", + "bytes", + "futures", + "serde", + "serde_json", + "thiserror 2.0.18", + "tokio", +] + [[package]] name = "nvisy-nats" version = "0.1.0" dependencies = [ "async-nats", "async-stream", - "base64", + "base64 0.22.1", "bytes", "clap", "derive_more", @@ -2976,7 +3238,7 @@ dependencies = [ "serde", "serde_json", "sha2", - "strum", + "strum 0.27.2", "thiserror 2.0.18", "tokio", "tracing", @@ -2987,14 +3249,14 @@ dependencies = [ name = "nvisy-opendal" version = "0.1.0" dependencies = [ - "derive_more", + "async-trait", + "bytes", "futures", "jiff", - "nvisy-core", + "nvisy-data", "opendal", "serde", "serde_json", - "thiserror 2.0.18", "tokio", "tracing", ] @@ -3003,7 +3265,7 @@ dependencies = [ name = "nvisy-postgres" version = "0.1.0" dependencies = [ - "base64", + "base64 0.22.1", "bigdecimal", "clap", "deadpool", @@ -3021,7 +3283,7 @@ dependencies = [ "schemars 0.9.0", "serde", "serde_json", - "strum", + "strum 0.27.2", "thiserror 2.0.18", "tokio", "tracing", @@ -3062,7 +3324,7 @@ dependencies = [ "derive_more", "flate2", "nvisy-rt-core", - "strum", + "strum 0.27.2", "tar", "tempfile", "tokio", @@ -3083,7 +3345,7 @@ dependencies = [ "jiff", "serde", "sha2", - "strum", + "strum 0.27.2", "thiserror 2.0.18", "tokio", "uuid", @@ -3095,14 +3357,14 @@ version = "0.1.0" source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "async-trait", - "base64", + "base64 0.22.1", "bytes", "derive_more", "jiff", "nvisy-rt-core", "serde", "serde_json", - "strum", + "strum 0.27.2", "thiserror 2.0.18", "tokio", "uuid", @@ -3184,7 +3446,7 @@ dependencies = [ "nvisy-opendal", "nvisy-rt-core", "nvisy-rt-engine", - "petgraph", + "petgraph 0.8.3", "semver", "serde", "serde_json", @@ -3206,7 +3468,7 @@ dependencies = [ "axum-client-ip", "axum-extra 0.12.5", "axum-test", - "base64", + "base64 0.22.1", "bigdecimal", "clap", "derive_more", @@ -3217,7 +3479,6 @@ dependencies = [ "jsonwebtoken 10.2.0", "nvisy-nats", "nvisy-postgres", - "nvisy-rig", "nvisy-webhook", "rand 0.10.0-rc.6", "regex", @@ -3225,7 +3486,7 @@ dependencies = [ "serde", "serde_json", "sha2", - "strum", + "strum 0.27.2", "tempfile", "thiserror 2.0.18", "tokio", @@ -3247,14 +3508,19 @@ name = "nvisy-vector" version = "0.1.0" dependencies = [ "async-trait", - "derive_more", + "deadpool", + "diesel", + "diesel-async", "futures", - "nvisy-core", + "milvus-sdk-rust", + "nvisy-data", + "pgvector", + "pinecone-sdk", + "prost-types 0.12.6", "qdrant-client", "reqwest", "serde", "serde_json", - "thiserror 2.0.18", "tokio", "tracing", ] @@ -3311,14 +3577,14 @@ dependencies = [ "anyhow", "async-trait", "backon", - "base64", + "base64 0.22.1", "bytes", "chrono", "crc32c", "futures", "getrandom 0.2.17", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "log", "md-5", "moka", @@ -3333,6 +3599,32 @@ dependencies = [ "uuid", ] +[[package]] +name = "openssl" +version = "0.10.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" +dependencies = [ + "bitflags 2.10.0", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "openssl-probe" version = "0.1.6" @@ -3345,6 +3637,18 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" +[[package]] +name = "openssl-sys" +version = "0.9.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "ordered-float" version = "5.1.0" @@ -3420,7 +3724,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64", + "base64 0.22.1", "serde_core", ] @@ -3439,13 +3743,23 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" +dependencies = [ + "fixedbitset 0.4.2", + "indexmap 2.13.0", +] + [[package]] name = "petgraph" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ - "fixedbitset", + "fixedbitset 0.5.7", "hashbrown 0.15.5", "indexmap 2.13.0", "serde", @@ -3518,7 +3832,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -3534,21 +3848,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "pkcs1" -version = "0.7.5" +name = "pinecone-sdk" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +checksum = "f571fcb86d81e70a5de2817a029fa9e52160f66f10d662584b56607ae6c5dab9" dependencies = [ - "der", - "pkcs8", - "spki", -] - -[[package]] -name = "pkcs5" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" + "anyhow", + "once_cell", + "prost 0.12.6", + "prost-types 0.12.6", + "rand 0.8.5", + "regex", + "reqwest", + "serde", + "serde_json", + "snafu", + "thiserror 1.0.69", + "tokio", + "tonic 0.11.0", + "tonic-build 0.11.0", + "url", + "uuid", +] + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" dependencies = [ "aes", "cbc", @@ -3598,7 +3936,7 @@ version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491" dependencies = [ - "base64", + "base64 0.22.1", "byteorder", "bytes", "fallible-iterator", @@ -3674,6 +4012,26 @@ dependencies = [ "yansi", ] +[[package]] +name = "prettyplease" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" +dependencies = [ + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn 2.0.114", +] + [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -3693,7 +4051,7 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -3705,6 +4063,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" +dependencies = [ + "bytes", + "prost-derive 0.11.9", +] + +[[package]] +name = "prost" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +dependencies = [ + "bytes", + "prost-derive 0.12.6", +] + [[package]] name = "prost" version = "0.13.5" @@ -3712,7 +4090,76 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.13.5", +] + +[[package]] +name = "prost-build" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" +dependencies = [ + "bytes", + "heck 0.4.1", + "itertools 0.10.5", + "lazy_static", + "log", + "multimap 0.8.3", + "petgraph 0.6.5", + "prettyplease 0.1.25", + "prost 0.11.9", + "prost-types 0.11.9", + "regex", + "syn 1.0.109", + "tempfile", + "which", +] + +[[package]] +name = "prost-build" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +dependencies = [ + "bytes", + "heck 0.5.0", + "itertools 0.12.1", + "log", + "multimap 0.10.1", + "once_cell", + "petgraph 0.6.5", + "prettyplease 0.2.37", + "prost 0.12.6", + "prost-types 0.12.6", + "regex", + "syn 2.0.114", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" +dependencies = [ + "anyhow", + "itertools 0.10.5", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "prost-derive" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +dependencies = [ + "anyhow", + "itertools 0.12.1", + "proc-macro2", + "quote", + "syn 2.0.114", ] [[package]] @@ -3725,7 +4172,25 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn", + "syn 2.0.114", +] + +[[package]] +name = "prost-types" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" +dependencies = [ + "prost 0.11.9", +] + +[[package]] +name = "prost-types" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +dependencies = [ + "prost 0.12.6", ] [[package]] @@ -3734,7 +4199,7 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" dependencies = [ - "prost", + "prost 0.13.5", ] [[package]] @@ -3748,15 +4213,15 @@ dependencies = [ "futures", "futures-util", "parking_lot", - "prost", - "prost-types", + "prost 0.13.5", + "prost-types 0.13.5", "reqwest", "semver", "serde", "serde_json", "thiserror 1.0.69", "tokio", - "tonic", + "tonic 0.12.3", ] [[package]] @@ -3781,7 +4246,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", + "rustls 0.23.36", "socket2 0.6.1", "thiserror 2.0.18", "tokio", @@ -3799,9 +4264,9 @@ dependencies = [ "getrandom 0.3.4", "lru-slab", "rand 0.9.2", - "ring", + "ring 0.17.14", "rustc-hash", - "rustls", + "rustls 0.23.36", "rustls-pki-types", "slab", "thiserror 2.0.18", @@ -3922,7 +4387,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags", + "bitflags 2.10.0", ] [[package]] @@ -3931,7 +4396,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" dependencies = [ - "bitflags", + "bitflags 2.10.0", ] [[package]] @@ -3951,7 +4416,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -3991,14 +4456,14 @@ checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" dependencies = [ "anyhow", "async-trait", - "base64", + "base64 0.22.1", "chrono", "form_urlencoded", "getrandom 0.2.17", "hex", "hmac", "home", - "http", + "http 1.4.0", "jsonwebtoken 9.3.1", "log", "percent-encoding", @@ -4020,33 +4485,36 @@ version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-rustls", + "hyper-tls", "hyper-util", "js-sys", "log", "mime", "mime_guess", + "native-tls", "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.36", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", - "tokio-rustls", + "tokio-native-tls", + "tokio-rustls 0.26.4", "tokio-util", "tower 0.5.3", "tower-http", @@ -4076,14 +4544,14 @@ checksum = "5b1a48121c1ecd6f6ce59d64ec353c791aac6fc07bf4aa353380e8185659e6eb" dependencies = [ "as-any", "async-stream", - "base64", + "base64 0.22.1", "bytes", "eventsource-stream", "fastrand", "futures", "futures-timer", "glob", - "http", + "http 1.4.0", "mime", "mime_guess", "ordered-float", @@ -4099,6 +4567,21 @@ dependencies = [ "url", ] +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + [[package]] name = "ring" version = "0.17.14" @@ -4153,7 +4636,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", + "http 1.4.0", "mime", "rand 0.9.2", "thiserror 2.0.18", @@ -4180,19 +4663,58 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.10.0", + "errno", + "libc", + "linux-raw-sys 0.4.15", + "windows-sys 0.52.0", +] + [[package]] name = "rustix" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags", + "bitflags 2.10.0", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.11.0", "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.20.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" +dependencies = [ + "log", + "ring 0.16.20", + "sct", + "webpki", +] + +[[package]] +name = "rustls" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" +dependencies = [ + "log", + "ring 0.17.14", + "rustls-pki-types", + "rustls-webpki 0.102.8", + "subtle", + "zeroize", +] + [[package]] name = "rustls" version = "0.23.36" @@ -4202,13 +4724,25 @@ dependencies = [ "aws-lc-rs", "log", "once_cell", - "ring", + "ring 0.17.14", "rustls-pki-types", "rustls-webpki 0.103.9", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe 0.1.6", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.7.3" @@ -4216,7 +4750,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" dependencies = [ "openssl-probe 0.1.6", - "rustls-pemfile", + "rustls-pemfile 2.2.0", "rustls-pki-types", "schannel", "security-framework 2.11.1", @@ -4234,6 +4768,15 @@ dependencies = [ "security-framework 3.5.1", ] +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + [[package]] name = "rustls-pemfile" version = "2.2.0" @@ -4259,6 +4802,7 @@ version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ + "ring 0.17.14", "rustls-pki-types", "untrusted 0.9.0", ] @@ -4270,7 +4814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "aws-lc-rs", - "ring", + "ring 0.17.14", "rustls-pki-types", "untrusted 0.9.0", ] @@ -4343,7 +4887,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn", + "syn 2.0.114", ] [[package]] @@ -4355,7 +4899,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn", + "syn 2.0.114", ] [[package]] @@ -4390,13 +4934,23 @@ dependencies = [ "sha2", ] +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring 0.17.14", + "untrusted 0.9.0", +] + [[package]] name = "security-framework" version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags", + "bitflags 2.10.0", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -4409,7 +4963,7 @@ version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ - "bitflags", + "bitflags 2.10.0", "core-foundation 0.10.1", "core-foundation-sys", "libc", @@ -4463,7 +5017,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -4474,7 +5028,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -4544,7 +5098,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -4673,6 +5227,27 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "snafu" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.114", +] + [[package]] name = "socket2" version = "0.5.10" @@ -4693,6 +5268,12 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.9.8" @@ -4732,13 +5313,32 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" + [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros", + "strum_macros 0.27.2", +] + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", ] [[package]] @@ -4750,7 +5350,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -4759,6 +5359,17 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.114" @@ -4770,6 +5381,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -4787,7 +5404,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -4796,7 +5413,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags", + "bitflags 2.10.0", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -4837,7 +5454,7 @@ dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix", + "rustix 1.1.3", "windows-sys 0.61.2", ] @@ -4854,7 +5471,7 @@ dependencies = [ "icu_segmenter", "itertools 0.14.0", "memchr", - "strum", + "strum 0.27.2", "thiserror 2.0.18", ] @@ -4884,7 +5501,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -4895,7 +5512,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -4982,6 +5599,7 @@ dependencies = [ "bytes", "libc", "mio", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2 0.6.1", @@ -4989,6 +5607,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tokio-io-timeout" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bd86198d9ee903fedd2f9a2e72014287c0d9167e4ae43b5853007205dda1b76" +dependencies = [ + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-macros" version = "2.6.0" @@ -4997,7 +5625,17 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", ] [[package]] @@ -5026,13 +5664,35 @@ dependencies = [ "whoami", ] +[[package]] +name = "tokio-rustls" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +dependencies = [ + "rustls 0.20.9", + "tokio", + "webpki", +] + +[[package]] +name = "tokio-rustls" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" +dependencies = [ + "rustls 0.22.4", + "rustls-pki-types", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.36", "tokio", ] @@ -5066,17 +5726,17 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-core", "futures-sink", - "http", + "http 1.4.0", "httparse", "rand 0.8.5", - "ring", + "ring 0.17.14", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "webpki-roots 0.26.11", ] @@ -5112,6 +5772,72 @@ dependencies = [ "winnow", ] +[[package]] +name = "tonic" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb" +dependencies = [ + "async-stream", + "async-trait", + "axum 0.6.20", + "base64 0.13.1", + "bytes", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-timeout 0.4.1", + "percent-encoding", + "pin-project", + "prost 0.11.9", + "prost-derive 0.11.9", + "rustls-native-certs 0.6.3", + "rustls-pemfile 1.0.4", + "tokio", + "tokio-rustls 0.23.4", + "tokio-stream", + "tokio-util", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", + "tracing-futures", +] + +[[package]] +name = "tonic" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" +dependencies = [ + "async-stream", + "async-trait", + "axum 0.6.20", + "base64 0.21.7", + "bytes", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-timeout 0.4.1", + "percent-encoding", + "pin-project", + "prost 0.12.6", + "rustls-native-certs 0.7.3", + "rustls-pemfile 2.2.0", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.25.0", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tonic" version = "0.12.3" @@ -5121,24 +5847,24 @@ dependencies = [ "async-stream", "async-trait", "axum 0.7.9", - "base64", + "base64 0.22.1", "bytes", "flate2", - "h2", - "http", - "http-body", + "h2 0.4.13", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-timeout", + "hyper 1.8.1", + "hyper-timeout 0.5.2", "hyper-util", "percent-encoding", "pin-project", - "prost", + "prost 0.13.5", "rustls-native-certs 0.8.3", - "rustls-pemfile", + "rustls-pemfile 2.2.0", "socket2 0.5.10", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-stream", "tower 0.4.13", "tower-layer", @@ -5146,6 +5872,32 @@ dependencies = [ "tracing", ] +[[package]] +name = "tonic-build" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" +dependencies = [ + "prettyplease 0.1.25", + "proc-macro2", + "prost-build 0.11.9", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "tonic-build" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4ef6dd70a610078cb4e338a0f79d06bc759ff1b22d2120c2ff02ae264ba9c2" +dependencies = [ + "prettyplease 0.2.37", + "proc-macro2", + "prost-build 0.12.6", + "quote", + "syn 2.0.114", +] + [[package]] name = "tower" version = "0.4.13" @@ -5178,7 +5930,7 @@ dependencies = [ "indexmap 2.13.0", "pin-project-lite", "slab", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tokio-util", "tower-layer", @@ -5193,13 +5945,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "async-compression", - "base64", - "bitflags", + "base64 0.22.1", + "bitflags 2.10.0", "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 1.4.0", + "http-body 1.0.1", "http-body-util", "http-range-header", "httpdate", @@ -5249,7 +6001,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -5365,7 +6117,7 @@ checksum = "27a7a9b72ba121f6f1f6c3632b85604cac41aedb5ddc70accbebb6cac83de846" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -5495,7 +6247,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -5604,7 +6356,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn", + "syn 2.0.114", "wasm-bindgen-shared", ] @@ -5650,6 +6402,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" +dependencies = [ + "ring 0.17.14", + "untrusted 0.9.0", +] + [[package]] name = "webpki-roots" version = "0.26.11" @@ -5668,6 +6430,18 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + [[package]] name = "whoami" version = "2.0.2" @@ -5679,6 +6453,28 @@ dependencies = [ "web-sys", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.62.2" @@ -5700,7 +6496,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -5711,7 +6507,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -5940,7 +6736,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix", + "rustix 1.1.3", ] [[package]] @@ -5977,7 +6773,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", "synstructure", ] @@ -5998,7 +6794,7 @@ checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -6018,7 +6814,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", "synstructure", ] @@ -6039,7 +6835,7 @@ checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] @@ -6073,7 +6869,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.114", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 2dd5bab..03afe35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,10 +7,11 @@ members = [ "./crates/nvisy-core", "./crates/nvisy-nats", "./crates/nvisy-postgres", - "./crates/nvisy-rig", "./crates/nvisy-server", "./crates/nvisy-webhook", + "./integrations/nvisy-data", "./integrations/nvisy-opendal", + "./integrations/nvisy-rig", "./integrations/nvisy-runtime", "./integrations/nvisy-vector", ] @@ -35,10 +36,11 @@ documentation = "https://docs.rs/nvisy-server" # Internal crates nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } +nvisy-data = { path = "./integrations/nvisy-data", version = "0.1.0" } nvisy-nats = { path = "./crates/nvisy-nats", version = "0.1.0" } nvisy-opendal = { path = "./integrations/nvisy-opendal", version = "0.1.0" } nvisy-postgres = { path = "./crates/nvisy-postgres", version = "0.1.0" } -nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" } +nvisy-rig = { path = "./integrations/nvisy-rig", version = "0.1.0" } nvisy-runtime = { path = "./integrations/nvisy-runtime", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } nvisy-vector = { path = "./integrations/nvisy-vector", version = "0.1.0" } diff --git a/crates/nvisy-cli/src/config/mod.rs b/crates/nvisy-cli/src/config/mod.rs index 53b8598..c00b006 100644 --- a/crates/nvisy-cli/src/config/mod.rs +++ b/crates/nvisy-cli/src/config/mod.rs @@ -30,7 +30,6 @@ mod server; use std::process; use clap::Parser; -use nvisy_server::pipeline::PipelineConfig; use nvisy_server::service::{ServiceConfig, ServiceState}; use nvisy_webhook::WebhookService; use nvisy_webhook::reqwest::{ReqwestClient, ReqwestConfig}; @@ -71,10 +70,6 @@ pub struct Cli { #[clap(flatten)] pub service: ServiceConfig, - /// Pipeline configuration for document processing workers. - #[clap(flatten)] - pub pipeline: PipelineConfig, - /// HTTP client configuration for webhook delivery. #[clap(flatten)] pub reqwest: ReqwestConfig, diff --git a/crates/nvisy-cli/src/main.rs b/crates/nvisy-cli/src/main.rs index 0b53e18..acb0829 100644 --- a/crates/nvisy-cli/src/main.rs +++ b/crates/nvisy-cli/src/main.rs @@ -10,7 +10,6 @@ use std::process; use axum::Router; use nvisy_server::handler::{CustomRoutes, routes}; use nvisy_server::middleware::*; -use nvisy_server::pipeline::{PipelineState, WorkerHandles}; use nvisy_server::service::ServiceState; use crate::config::{Cli, MiddlewareConfig}; @@ -45,20 +44,11 @@ async fn run() -> anyhow::Result<()> { // Initialize application state let state = cli.service_state().await?; - // Spawn pipeline workers - let pipeline_state = PipelineState::new(&state, cli.pipeline.clone()); - let workers = WorkerHandles::spawn(&pipeline_state); - // Build router let router = create_router(state, &cli.middleware); // Run the HTTP server - let result = server::serve(router, cli.server).await; - - // Shutdown workers - workers.shutdown(); - - result?; + server::serve(router, cli.server).await?; Ok(()) } diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index dfc0410..7a3bb4b 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -28,14 +28,12 @@ config = [ "dep:clap", "nvisy-nats/config", "nvisy-postgres/config", - "nvisy-rig/config", ] [dependencies] # Internal crates nvisy-nats = { workspace = true, features = ["schema"] } nvisy-postgres = { workspace = true, features = ["schema"] } -nvisy-rig = { workspace = true, features = [] } nvisy-webhook = { workspace = true, features = ["schema"] } # Async runtime diff --git a/crates/nvisy-server/src/lib.rs b/crates/nvisy-server/src/lib.rs index 54b3dff..9987e42 100644 --- a/crates/nvisy-server/src/lib.rs +++ b/crates/nvisy-server/src/lib.rs @@ -7,7 +7,6 @@ mod error; pub mod extract; pub mod handler; pub mod middleware; -pub mod pipeline; pub mod service; pub use crate::error::{BoxedError, Error, ErrorKind, Result}; diff --git a/crates/nvisy-server/src/pipeline/job_handler.rs b/crates/nvisy-server/src/pipeline/job_handler.rs deleted file mode 100644 index df42ea6..0000000 --- a/crates/nvisy-server/src/pipeline/job_handler.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! Job handler trait for stage-specific processing logic. - -use std::future::Future; - -use nvisy_nats::stream::{DocumentJob, Stage}; - -use super::PipelineState; -use crate::Result; - -/// Trait for implementing stage-specific job processing logic. -/// -/// Each processing stage implements this trait to define how jobs -/// are handled. The framework takes care of subscription, concurrency, -/// shutdown, and error handling. -/// -/// # Example -/// -/// ```ignore -/// pub struct MyHandler; -/// -/// impl JobHandler for MyHandler { -/// type Stage = MyStageData; -/// const TRACING_TARGET: &'static str = "my_worker::stage"; -/// const WORKER_NAME: &'static str = "my_stage"; -/// -/// async fn handle_job(state: &PipelineState, job: &DocumentJob) -> Result<()> { -/// // Process the job -/// Ok(()) -/// } -/// } -/// ``` -pub trait JobHandler: Send + Sync + 'static { - /// The processing stage this handler operates on. - type Stage: Stage; - - /// Tracing target for this handler's log messages. - const TRACING_TARGET: &'static str; - - /// Human-readable name for this worker (used in logs). - const WORKER_NAME: &'static str; - - /// Process a single job. - /// - /// This is the only method that stage-specific implementations need to define. - /// The framework handles message acknowledgment, concurrency control, and - /// error logging. - fn handle_job( - state: &PipelineState, - job: &DocumentJob, - ) -> impl Future> + Send; - - /// Optional: Log additional context when a job starts. - /// - /// Override this to add stage-specific fields to the "Processing job" log. - /// Default implementation logs nothing extra. - #[inline] - fn log_job_start(_job: &DocumentJob) { - // Default: no extra logging - } -} diff --git a/crates/nvisy-server/src/pipeline/mod.rs b/crates/nvisy-server/src/pipeline/mod.rs deleted file mode 100644 index a0d1863..0000000 --- a/crates/nvisy-server/src/pipeline/mod.rs +++ /dev/null @@ -1,176 +0,0 @@ -//! Document processing pipeline. -//! -//! This module provides a generic worker framework for document processing stages. -//! -//! ## Architecture -//! -//! - [`JobHandler`] - Trait for implementing stage-specific job processing -//! - [`Worker`] - Generic worker that handles subscription, concurrency, and shutdown -//! - [`WorkerHandles`] - Manages all three processing workers -//! -//! ## Stages -//! -//! - **Preprocessing**: Format validation, OCR, thumbnail generation, embeddings -//! - **Processing**: VLM-based transformations, annotations, predefined tasks -//! - **Postprocessing**: Format conversion, compression, cleanup - -/// Tracing target for pipeline events. -const TRACING_TARGET: &str = "nvisy_server::pipeline"; - -mod job_handler; -mod postprocessing; -mod preprocessing; -mod processing; -mod state; -mod worker; - -pub use job_handler::JobHandler; -pub use postprocessing::PostprocessingHandler; -pub use preprocessing::PreprocessingHandler; -pub use processing::ProcessingHandler; -pub use state::{DEFAULT_MAX_CONCURRENT_JOBS, PipelineConfig, PipelineState}; -use tokio::task::JoinHandle; -use tokio_util::sync::CancellationToken; -use uuid::Uuid; -pub use worker::Worker; - -use crate::{Error, Result}; - -/// Type aliases for concrete worker types. -pub type PreprocessingWorker = Worker; -pub type ProcessingWorker = Worker; -pub type PostprocessingWorker = Worker; - -/// Handles for background workers. -/// -/// Holds join handles for all spawned workers, allowing graceful shutdown -/// and status monitoring. -pub struct WorkerHandles { - preprocessing: JoinHandle>, - processing: JoinHandle>, - postprocessing: JoinHandle>, - cancel_token: CancellationToken, -} - -impl WorkerHandles { - /// Spawns all document processing workers. - /// - /// Creates preprocessing, processing, and postprocessing workers with - /// the given state and spawns them as background tasks. Each worker - /// gets a unique consumer name in the format `{uuid}-{stage}`. - /// - /// All workers share a single semaphore for global concurrency control. - pub fn spawn(state: &PipelineState) -> Self { - let cancel_token = CancellationToken::new(); - let instance_id = Uuid::now_v7(); - let semaphore = state.config.create_semaphore(); - - tracing::info!( - target: TRACING_TARGET, - instance_id = %instance_id, - max_concurrent_jobs = state.config.max_concurrent_jobs, - "Starting document processing workers" - ); - - let preprocessing = Worker::::new( - state.clone(), - format!("{}-preprocessing", instance_id), - cancel_token.clone(), - semaphore.clone(), - ) - .spawn(); - - let processing = Worker::::new( - state.clone(), - format!("{}-processing", instance_id), - cancel_token.clone(), - semaphore.clone(), - ) - .spawn(); - - let postprocessing = Worker::::new( - state.clone(), - format!("{}-postprocessing", instance_id), - cancel_token.clone(), - semaphore, - ) - .spawn(); - - tracing::debug!( - target: TRACING_TARGET, - "All workers spawned successfully" - ); - - Self { - preprocessing, - processing, - postprocessing, - cancel_token, - } - } - - /// Requests graceful shutdown of all workers. - /// - /// Workers will finish processing their current job before stopping. - /// Use [`abort_all`](Self::abort_all) for immediate cancellation. - pub fn shutdown(&self) { - tracing::info!( - target: TRACING_TARGET, - "Initiating graceful shutdown of document processing workers" - ); - self.cancel_token.cancel(); - } - - /// Aborts all worker tasks immediately. - /// - /// This cancels workers without waiting for graceful shutdown. - /// Prefer [`shutdown`](Self::shutdown) for clean termination. - pub fn abort_all(&self) { - tracing::warn!( - target: TRACING_TARGET, - "Aborting all document processing workers immediately" - ); - self.cancel_token.cancel(); - self.preprocessing.abort(); - self.processing.abort(); - self.postprocessing.abort(); - } - - /// Checks if all workers are still running. - pub fn all_running(&self) -> bool { - !self.preprocessing.is_finished() - && !self.processing.is_finished() - && !self.postprocessing.is_finished() - } - - /// Checks if any worker has finished (possibly due to error). - pub fn any_finished(&self) -> bool { - self.preprocessing.is_finished() - || self.processing.is_finished() - || self.postprocessing.is_finished() - } - - /// Waits for all workers to complete. - /// - /// Returns the first error encountered, if any. - pub async fn wait_all(self) -> Result<()> { - tracing::debug!( - target: TRACING_TARGET, - "Waiting for all workers to complete" - ); - - let (pre, proc, post) = - tokio::join!(self.preprocessing, self.processing, self.postprocessing); - - pre.map_err(|e| Error::internal("pipeline", e.to_string()))??; - proc.map_err(|e| Error::internal("pipeline", e.to_string()))??; - post.map_err(|e| Error::internal("pipeline", e.to_string()))??; - - tracing::info!( - target: TRACING_TARGET, - "All document processing workers stopped" - ); - - Ok(()) - } -} diff --git a/crates/nvisy-server/src/pipeline/postprocessing.rs b/crates/nvisy-server/src/pipeline/postprocessing.rs deleted file mode 100644 index 628743e..0000000 --- a/crates/nvisy-server/src/pipeline/postprocessing.rs +++ /dev/null @@ -1,93 +0,0 @@ -//! Postprocessing handler for document download pipeline. -//! -//! Handles jobs triggered by download requests: -//! - Format conversion to requested format -//! - Compression settings -//! - Annotation flattening (burning into document) -//! - Cleanup of temporary artifacts - -use nvisy_nats::stream::{DocumentJob, PostprocessingData}; - -use super::{JobHandler, PipelineState}; -use crate::Result; - -const TRACING_TARGET: &str = "nvisy_server::pipeline::postprocessing"; - -/// Postprocessing job handler. -pub struct PostprocessingHandler; - -impl JobHandler for PostprocessingHandler { - type Stage = PostprocessingData; - - const TRACING_TARGET: &'static str = TRACING_TARGET; - const WORKER_NAME: &'static str = "postprocessing"; - - fn log_job_start(job: &DocumentJob) { - tracing::debug!( - target: TRACING_TARGET, - target_format = ?job.data().target_format, - "Postprocessing job context" - ); - } - - async fn handle_job(_state: &PipelineState, job: &DocumentJob) -> Result<()> { - let data = job.data(); - - // TODO: Update database status to "processing" - // TODO: Fetch document from object store - - // Step 1: Flatten annotations if requested - if let Some(true) = data.flatten_annotations { - tracing::debug!( - target: TRACING_TARGET, - "Flattening annotations into document" - ); - // TODO: Burn annotations into document - // - Fetch annotations from database - // - Render them permanently into document - } - - // Step 2: Convert format if specified - if let Some(ref target_format) = data.target_format { - tracing::debug!( - target: TRACING_TARGET, - target_format = %target_format, - source_format = %job.file_extension, - "Converting document format" - ); - // TODO: Implement format conversion - // - PDF <-> DOCX, PNG, etc. - // - Use appropriate conversion libraries - } - - // Step 3: Apply compression if specified - if let Some(ref compression_level) = data.compression_level { - tracing::debug!( - target: TRACING_TARGET, - compression_level = ?compression_level, - "Applying compression" - ); - // TODO: Implement compression - // - Compress images in document - // - Optimize file size based on level - } - - // Step 4: Run cleanup tasks - if let Some(ref cleanup_tasks) = data.cleanup_tasks { - tracing::debug!( - target: TRACING_TARGET, - task_count = cleanup_tasks.len(), - "Running cleanup tasks" - ); - // TODO: Implement cleanup - // - Remove temporary files - // - Clean intermediate processing artifacts - } - - // TODO: Store processed document to object store - // TODO: Update database with final file info - // TODO: Update database status to "completed" - - Ok(()) - } -} diff --git a/crates/nvisy-server/src/pipeline/preprocessing.rs b/crates/nvisy-server/src/pipeline/preprocessing.rs deleted file mode 100644 index 65f8fc7..0000000 --- a/crates/nvisy-server/src/pipeline/preprocessing.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Preprocessing handler for document upload pipeline. -//! -//! Handles jobs triggered by file uploads: -//! - Format detection and validation -//! - Metadata extraction and fixes -//! - OCR for scanned documents -//! - Thumbnail generation -//! - Embedding generation for semantic search - -use nvisy_nats::stream::{DocumentJob, PreprocessingData}; - -use super::{JobHandler, PipelineState}; -use crate::Result; - -const TRACING_TARGET: &str = "nvisy_server::pipeline::preprocessing"; - -/// Preprocessing job handler. -pub struct PreprocessingHandler; - -impl JobHandler for PreprocessingHandler { - type Stage = PreprocessingData; - - const TRACING_TARGET: &'static str = TRACING_TARGET; - const WORKER_NAME: &'static str = "preprocessing"; - - async fn handle_job(_state: &PipelineState, job: &DocumentJob) -> Result<()> { - let data = job.data(); - - // TODO: Update database status to "processing" - - // Step 1: Validate metadata - if data.validate_metadata { - tracing::debug!( - target: TRACING_TARGET, - "Validating file metadata" - ); - // TODO: Implement metadata validation - // - Format detection - // - File integrity checks - // - Metadata extraction - } - - // Step 2: Run OCR - if data.run_ocr { - tracing::debug!(target: TRACING_TARGET, "Running OCR"); - // TODO: Implement OCR - // - Detect if document needs OCR (scanned vs native text) - // - Extract text using OCR service - // - Store extracted text in database - } - - // Step 3: Generate embeddings - if data.generate_embeddings { - tracing::debug!(target: TRACING_TARGET, "Generating embeddings"); - // TODO: Implement embedding generation - // - Split document into chunks - // - Generate embeddings using nvisy-rig - // - Store embeddings for semantic search - } - - // Step 4: Generate thumbnails - if let Some(true) = data.generate_thumbnails { - tracing::debug!(target: TRACING_TARGET, "Generating thumbnails"); - // TODO: Implement thumbnail generation - // - Render first page(s) as images - // - Store thumbnails in object store - } - - // TODO: Update database status to "completed" - - Ok(()) - } -} diff --git a/crates/nvisy-server/src/pipeline/processing.rs b/crates/nvisy-server/src/pipeline/processing.rs deleted file mode 100644 index 76490cb..0000000 --- a/crates/nvisy-server/src/pipeline/processing.rs +++ /dev/null @@ -1,93 +0,0 @@ -//! Processing handler for document editing pipeline. -//! -//! Handles jobs triggered by edit requests: -//! - VLM-based document transformations -//! - Annotation processing -//! - Predefined tasks (redaction, translation, summarization, etc.) - -use nvisy_nats::stream::{DocumentJob, ProcessingData}; - -use super::{JobHandler, PipelineState}; -use crate::Result; - -const TRACING_TARGET: &str = "nvisy_server::pipeline::processing"; - -/// Processing job handler. -pub struct ProcessingHandler; - -impl JobHandler for ProcessingHandler { - type Stage = ProcessingData; - - const TRACING_TARGET: &'static str = TRACING_TARGET; - const WORKER_NAME: &'static str = "processing"; - - fn log_job_start(job: &DocumentJob) { - tracing::debug!( - target: TRACING_TARGET, - task_count = job.data().tasks.len(), - "Processing job context" - ); - } - - async fn handle_job(_state: &PipelineState, job: &DocumentJob) -> Result<()> { - let data = job.data(); - - // TODO: Update database status to "processing" - // TODO: Fetch document from object store - - // Step 1: Process main prompt if provided - if !data.prompt.is_empty() { - tracing::debug!( - target: TRACING_TARGET, - prompt_length = data.prompt.len(), - has_context = data.context.is_some(), - "Executing VLM prompt" - ); - // TODO: Implement VLM processing - // - Send document + prompt to nvisy-rig - // - Apply transformations based on VLM output - } - - // Step 2: Process annotations if specified - if let Some(ref annotation_ids) = data.annotation_ids { - tracing::debug!( - target: TRACING_TARGET, - annotation_count = annotation_ids.len(), - "Processing annotations" - ); - // TODO: Fetch annotations from database - // TODO: Apply each annotation using VLM - } - - // Step 3: Execute predefined tasks - for task in &data.tasks { - tracing::debug!( - target: TRACING_TARGET, - task = ?task, - "Executing predefined task" - ); - // TODO: Implement task execution - // - Redact: Find and redact sensitive patterns - // - Translate: Translate document to target language - // - Summarize: Generate document summary - // - ExtractInfo: Extract structured information - // - etc. - } - - // Step 4: Handle reference files if provided - if let Some(ref reference_ids) = data.reference_file_ids { - tracing::debug!( - target: TRACING_TARGET, - reference_count = reference_ids.len(), - "Using reference files for context" - ); - // TODO: Fetch reference files - // TODO: Include in VLM context for style matching, etc. - } - - // TODO: Store processed document back to object store - // TODO: Update database status to "completed" - - Ok(()) - } -} diff --git a/crates/nvisy-server/src/pipeline/state.rs b/crates/nvisy-server/src/pipeline/state.rs deleted file mode 100644 index bac8a64..0000000 --- a/crates/nvisy-server/src/pipeline/state.rs +++ /dev/null @@ -1,72 +0,0 @@ -//! Pipeline state and configuration. - -use std::sync::Arc; - -use clap::Args; -use nvisy_nats::NatsClient; -use nvisy_postgres::PgClient; -use serde::{Deserialize, Serialize}; -use tokio::sync::Semaphore; - -use crate::service::ServiceState; - -/// Default maximum concurrent jobs. -pub const DEFAULT_MAX_CONCURRENT_JOBS: usize = 10; - -/// Configuration for the document processing pipeline. -#[derive(Debug, Clone, Serialize, Deserialize, Args)] -pub struct PipelineConfig { - /// Maximum concurrent jobs workers can process simultaneously. - #[arg(long, env = "PIPELINE_MAX_CONCURRENT_JOBS", default_value_t = DEFAULT_MAX_CONCURRENT_JOBS)] - pub max_concurrent_jobs: usize, -} - -impl Default for PipelineConfig { - fn default() -> Self { - Self { - max_concurrent_jobs: DEFAULT_MAX_CONCURRENT_JOBS, - } - } -} - -impl PipelineConfig { - /// Creates a new pipeline configuration with default values. - pub fn new() -> Self { - Self::default() - } - - /// Sets the maximum concurrent jobs. - pub fn with_max_concurrent_jobs(mut self, max_concurrent_jobs: usize) -> Self { - self.max_concurrent_jobs = max_concurrent_jobs; - self - } - - /// Creates a semaphore for limiting concurrent job processing. - pub fn create_semaphore(&self) -> Arc { - Arc::new(Semaphore::new(self.max_concurrent_jobs)) - } -} - -/// Application state for pipeline workers. -/// -/// Contains the services needed by document processing workers. -#[derive(Clone)] -pub struct PipelineState { - /// PostgreSQL database client. - pub postgres: PgClient, - /// NATS messaging client. - pub nats: NatsClient, - /// Pipeline configuration. - pub config: PipelineConfig, -} - -impl PipelineState { - /// Creates a new pipeline state from service state and configuration. - pub fn new(state: &ServiceState, config: PipelineConfig) -> Self { - Self { - postgres: state.postgres.clone(), - nats: state.nats.clone(), - config, - } - } -} diff --git a/crates/nvisy-server/src/pipeline/worker.rs b/crates/nvisy-server/src/pipeline/worker.rs deleted file mode 100644 index 1c4035a..0000000 --- a/crates/nvisy-server/src/pipeline/worker.rs +++ /dev/null @@ -1,190 +0,0 @@ -//! Generic document processing worker. - -use std::marker::PhantomData; -use std::sync::Arc; - -use nvisy_nats::stream::{DocumentJob, DocumentJobSubscriber, Stage, TypedMessage}; -use tokio::sync::Semaphore; -use tokio::task::JoinHandle; -use tokio_util::sync::CancellationToken; - -use super::{JobHandler, PipelineState}; -use crate::Result; - -/// Tracing target for worker infrastructure. -const TRACING_TARGET: &str = "nvisy_server::pipeline"; - -/// Generic document processing worker. -/// -/// Handles all the boilerplate for subscribing to a NATS stream, -/// processing jobs concurrently with semaphore-based limiting, -/// and graceful shutdown via cancellation token. -/// -/// The actual job processing logic is delegated to the `H: JobHandler` implementation. -pub struct Worker { - state: PipelineState, - consumer_name: String, - cancel_token: CancellationToken, - semaphore: Arc, - _marker: PhantomData, -} - -impl Worker { - /// Creates a new worker with the given handler type. - pub fn new( - state: PipelineState, - consumer_name: impl Into, - cancel_token: CancellationToken, - semaphore: Arc, - ) -> Self { - Self { - state, - consumer_name: consumer_name.into(), - cancel_token, - semaphore, - _marker: PhantomData, - } - } - - /// Spawns the worker as a background task. - pub fn spawn(self) -> JoinHandle> { - tokio::spawn(async move { self.run().await }) - } - - /// Runs the worker loop, processing jobs as they arrive. - async fn run(self) -> Result<()> { - tracing::info!( - target: TRACING_TARGET, - worker = H::WORKER_NAME, - consumer = %self.consumer_name, - "Starting worker" - ); - - let subscriber: DocumentJobSubscriber = self - .state - .nats - .document_job_subscriber(&self.consumer_name) - .await?; - - tracing::info!( - target: TRACING_TARGET, - consumer = %self.consumer_name, - stage = ::NAME, - "Subscribed to jobs" - ); - - let mut stream = subscriber.subscribe().await?; - - loop { - tokio::select! { - biased; - - () = self.cancel_token.cancelled() => { - tracing::info!( - target: TRACING_TARGET, - worker = H::WORKER_NAME, - "Shutdown requested, stopping worker" - ); - break; - } - - result = stream.next() => { - if !self.handle_stream_result(result).await { - break; - } - } - } - } - - Ok(()) - } - - /// Handles a stream result, returning false if the worker should stop. - async fn handle_stream_result( - &self, - result: nvisy_nats::Result>>>, - ) -> bool { - let msg = match result { - Ok(Some(msg)) => msg, - Ok(None) => { - tracing::trace!(target: TRACING_TARGET, "No messages available"); - return true; - } - Err(err) => { - tracing::error!( - target: TRACING_TARGET, - error = %err, - "Failed to receive message" - ); - return true; - } - }; - - // Acquire semaphore permit for concurrency control - let permit = match self.semaphore.clone().acquire_owned().await { - Ok(permit) => permit, - Err(_) => { - tracing::error!( - target: TRACING_TARGET, - "Semaphore closed, stopping worker" - ); - return false; - } - }; - - let state = self.state.clone(); - let job = msg.payload().clone(); - let job_id = job.id; - let file_id = job.file_id; - - // Ack immediately to prevent redelivery while processing - let mut msg = msg; - if let Err(err) = msg.ack().await { - tracing::error!( - target: TRACING_TARGET, - job_id = %job_id, - error = %err, - "Failed to ack message" - ); - } - - tokio::spawn(async move { - // Hold permit until job completes - let _permit = permit; - - tracing::info!( - target: TRACING_TARGET, - job_id = %job_id, - file_id = %file_id, - stage = ::NAME, - "Processing job" - ); - - // Allow handler to log extra context - H::log_job_start(&job); - - match H::handle_job(&state, &job).await { - Ok(()) => { - tracing::info!( - target: TRACING_TARGET, - job_id = %job_id, - file_id = %file_id, - "Job completed" - ); - } - Err(err) => { - tracing::error!( - target: TRACING_TARGET, - job_id = %job_id, - file_id = %file_id, - error = %err, - "Job failed" - ); - // TODO: Implement retry logic or dead letter queue - } - } - }); - - true - } -} diff --git a/crates/nvisy-server/src/service/config.rs b/crates/nvisy-server/src/service/config.rs index 19b565c..1e14dbc 100644 --- a/crates/nvisy-server/src/service/config.rs +++ b/crates/nvisy-server/src/service/config.rs @@ -4,7 +4,6 @@ use clap::Args; use clap::Parser; use nvisy_nats::{NatsClient, NatsConfig}; use nvisy_postgres::{PgClient, PgClientMigrationExt, PgConfig}; -use nvisy_rig::RigConfig; use serde::{Deserialize, Serialize}; use crate::service::security::{SessionKeys, SessionKeysConfig}; @@ -29,10 +28,6 @@ pub struct ServiceConfig { /// Authentication key paths configuration. #[cfg_attr(any(test, feature = "config"), command(flatten))] pub session_config: SessionKeysConfig, - - /// AI services configuration. - #[cfg_attr(any(test, feature = "config"), command(flatten))] - pub rig_config: RigConfig, } impl ServiceConfig { diff --git a/crates/nvisy-server/src/service/mod.rs b/crates/nvisy-server/src/service/mod.rs index 6394045..c3323de 100644 --- a/crates/nvisy-server/src/service/mod.rs +++ b/crates/nvisy-server/src/service/mod.rs @@ -7,7 +7,6 @@ mod security; use nvisy_nats::NatsClient; use nvisy_postgres::PgClient; -use nvisy_rig::RigService; use nvisy_webhook::WebhookService; use crate::Result; @@ -31,9 +30,6 @@ pub struct ServiceState { pub nats: NatsClient, pub webhook: WebhookService, - // AI services: - pub rig: RigService, - // Internal services: pub health_cache: HealthCache, pub integration_provider: IntegrationProvider, @@ -54,24 +50,11 @@ impl ServiceState { let postgres = service_config.connect_postgres().await?; let nats = service_config.connect_nats().await?; - // Initialize AI services - let rig = RigService::new( - service_config.rig_config.clone(), - postgres.clone(), - nats.clone(), - ) - .await - .map_err(|e| { - crate::Error::internal("rig", "Failed to initialize rig service").with_source(e) - })?; - let service_state = Self { postgres, nats, webhook: webhook_service, - rig, - health_cache: HealthCache::new(), integration_provider: IntegrationProvider::new(), password_hasher: PasswordHasher::new(), @@ -99,9 +82,6 @@ impl_di!(postgres: PgClient); impl_di!(nats: NatsClient); impl_di!(webhook: WebhookService); -// AI services: -impl_di!(rig: RigService); - // Internal services: impl_di!(health_cache: HealthCache); impl_di!(integration_provider: IntegrationProvider); diff --git a/integrations/nvisy-data/Cargo.toml b/integrations/nvisy-data/Cargo.toml new file mode 100644 index 0000000..249baaf --- /dev/null +++ b/integrations/nvisy-data/Cargo.toml @@ -0,0 +1,39 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-data" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" +description = "Foundational traits for data I/O and vector operations" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +# Async runtime +tokio = { workspace = true, features = ["rt", "sync"] } +futures = { workspace = true, features = [] } + +# Bytes +bytes = { workspace = true, features = [] } + +# (De)serialization +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true, features = [] } + +# Derive macros & utilities +thiserror = { workspace = true, features = [] } +async-trait = { workspace = true, features = [] } + +[dev-dependencies] +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/integrations/nvisy-data/src/error.rs b/integrations/nvisy-data/src/error.rs new file mode 100644 index 0000000..235b5c5 --- /dev/null +++ b/integrations/nvisy-data/src/error.rs @@ -0,0 +1,99 @@ +//! Error types for data operations. + +use std::fmt; + +/// Result type for data operations. +pub type DataResult = Result; + +/// Error type for data operations. +#[derive(Debug)] +pub struct DataError { + kind: DataErrorKind, + message: String, + source: Option>, +} + +/// The kind of data error. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DataErrorKind { + /// Connection error (e.g., network failure). + Connection, + /// Not found error (e.g., file or collection doesn't exist). + NotFound, + /// Permission denied. + Permission, + /// Invalid input or configuration. + Invalid, + /// Serialization/deserialization error. + Serialization, + /// Backend-specific error. + Backend, + /// Unknown or unclassified error. + Unknown, +} + +impl DataError { + /// Creates a new error with the given kind and message. + pub fn new(kind: DataErrorKind, message: impl Into) -> Self { + Self { + kind, + message: message.into(), + source: None, + } + } + + /// Adds a source error. + pub fn with_source(mut self, source: impl std::error::Error + Send + Sync + 'static) -> Self { + self.source = Some(Box::new(source)); + self + } + + /// Returns the error kind. + pub fn kind(&self) -> DataErrorKind { + self.kind + } + + /// Creates a connection error. + pub fn connection(message: impl Into) -> Self { + Self::new(DataErrorKind::Connection, message) + } + + /// Creates a not found error. + pub fn not_found(message: impl Into) -> Self { + Self::new(DataErrorKind::NotFound, message) + } + + /// Creates a permission error. + pub fn permission(message: impl Into) -> Self { + Self::new(DataErrorKind::Permission, message) + } + + /// Creates an invalid input error. + pub fn invalid(message: impl Into) -> Self { + Self::new(DataErrorKind::Invalid, message) + } + + /// Creates a serialization error. + pub fn serialization(message: impl Into) -> Self { + Self::new(DataErrorKind::Serialization, message) + } + + /// Creates a backend error. + pub fn backend(message: impl Into) -> Self { + Self::new(DataErrorKind::Backend, message) + } +} + +impl fmt::Display for DataError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}: {}", self.kind, self.message) + } +} + +impl std::error::Error for DataError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.source + .as_ref() + .map(|e| e.as_ref() as &(dyn std::error::Error + 'static)) + } +} diff --git a/integrations/nvisy-data/src/input.rs b/integrations/nvisy-data/src/input.rs new file mode 100644 index 0000000..15444d3 --- /dev/null +++ b/integrations/nvisy-data/src/input.rs @@ -0,0 +1,55 @@ +//! Data input trait for reading from storage backends. + +use async_trait::async_trait; +use bytes::Bytes; +use futures::Stream; + +use crate::error::DataResult; + +/// Context for data input operations. +#[derive(Debug, Clone, Default)] +pub struct InputContext { + /// The bucket or container name (for object storage). + pub bucket: Option, + /// Additional options as key-value pairs. + pub options: std::collections::HashMap, +} + +impl InputContext { + /// Creates a new empty context. + pub fn new() -> Self { + Self::default() + } + + /// Sets the bucket/container. + pub fn with_bucket(mut self, bucket: impl Into) -> Self { + self.bucket = Some(bucket.into()); + self + } + + /// Adds an option. + pub fn with_option(mut self, key: impl Into, value: impl Into) -> Self { + self.options.insert(key.into(), value.into()); + self + } +} + +/// Trait for reading data from storage backends. +#[async_trait] +pub trait DataInput: Send + Sync { + /// Reads the entire contents at the given path. + async fn read(&self, ctx: &InputContext, path: &str) -> DataResult; + + /// Reads the contents as a stream of chunks. + async fn read_stream( + &self, + ctx: &InputContext, + path: &str, + ) -> DataResult> + Send + Unpin>>; + + /// Checks if a path exists. + async fn exists(&self, ctx: &InputContext, path: &str) -> DataResult; + + /// Lists paths under the given prefix. + async fn list(&self, ctx: &InputContext, prefix: &str) -> DataResult>; +} diff --git a/integrations/nvisy-data/src/lib.rs b/integrations/nvisy-data/src/lib.rs new file mode 100644 index 0000000..376591a --- /dev/null +++ b/integrations/nvisy-data/src/lib.rs @@ -0,0 +1,20 @@ +//! Foundational traits for data I/O and vector operations. +//! +//! This crate provides the core abstractions for: +//! - Data input/output operations (storage backends) +//! - Vector store operations (embeddings storage) +//! - Common types used across integrations + +#![forbid(unsafe_code)] + +mod error; +mod input; +mod output; +mod types; +mod vector; + +pub use error::{DataError, DataErrorKind, DataResult}; +pub use input::{DataInput, InputContext}; +pub use output::{DataOutput, OutputContext}; +pub use types::{Metadata, VectorData, VectorSearchResult}; +pub use vector::{VectorContext, VectorOutput, VectorSearchOptions}; diff --git a/integrations/nvisy-data/src/output.rs b/integrations/nvisy-data/src/output.rs new file mode 100644 index 0000000..672f659 --- /dev/null +++ b/integrations/nvisy-data/src/output.rs @@ -0,0 +1,61 @@ +//! Data output trait for writing to storage backends. + +use async_trait::async_trait; +use bytes::Bytes; +use futures::Stream; + +use crate::error::DataResult; + +/// Context for data output operations. +#[derive(Debug, Clone, Default)] +pub struct OutputContext { + /// The bucket or container name (for object storage). + pub bucket: Option, + /// Content type for the data being written. + pub content_type: Option, + /// Additional options as key-value pairs. + pub options: std::collections::HashMap, +} + +impl OutputContext { + /// Creates a new empty context. + pub fn new() -> Self { + Self::default() + } + + /// Sets the bucket/container. + pub fn with_bucket(mut self, bucket: impl Into) -> Self { + self.bucket = Some(bucket.into()); + self + } + + /// Sets the content type. + pub fn with_content_type(mut self, content_type: impl Into) -> Self { + self.content_type = Some(content_type.into()); + self + } + + /// Adds an option. + pub fn with_option(mut self, key: impl Into, value: impl Into) -> Self { + self.options.insert(key.into(), value.into()); + self + } +} + +/// Trait for writing data to storage backends. +#[async_trait] +pub trait DataOutput: Send + Sync { + /// Writes data to the given path. + async fn write(&self, ctx: &OutputContext, path: &str, data: Bytes) -> DataResult<()>; + + /// Writes data from a stream to the given path. + async fn write_stream( + &self, + ctx: &OutputContext, + path: &str, + stream: Box> + Send + Unpin>, + ) -> DataResult<()>; + + /// Deletes the data at the given path. + async fn delete(&self, ctx: &OutputContext, path: &str) -> DataResult<()>; +} diff --git a/integrations/nvisy-data/src/types.rs b/integrations/nvisy-data/src/types.rs new file mode 100644 index 0000000..0410f2a --- /dev/null +++ b/integrations/nvisy-data/src/types.rs @@ -0,0 +1,52 @@ +//! Common types used across integrations. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +/// Metadata associated with data or vectors. +pub type Metadata = HashMap; + +/// A vector with its ID and metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VectorData { + /// Unique identifier for this vector. + pub id: String, + /// The embedding vector. + pub vector: Vec, + /// Optional metadata associated with this vector. + #[serde(default)] + pub metadata: Metadata, +} + +impl VectorData { + /// Creates a new vector data with the given ID and vector. + pub fn new(id: impl Into, vector: Vec) -> Self { + Self { + id: id.into(), + vector, + metadata: Metadata::new(), + } + } + + /// Adds metadata to this vector. + pub fn with_metadata(mut self, metadata: Metadata) -> Self { + self.metadata = metadata; + self + } +} + +/// Result from a vector similarity search. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VectorSearchResult { + /// The ID of the matched vector. + pub id: String, + /// Similarity score (interpretation depends on distance metric). + pub score: f32, + /// The vector data, if requested. + #[serde(skip_serializing_if = "Option::is_none")] + pub vector: Option>, + /// Metadata associated with this vector. + #[serde(default)] + pub metadata: Metadata, +} diff --git a/integrations/nvisy-data/src/vector.rs b/integrations/nvisy-data/src/vector.rs new file mode 100644 index 0000000..b268338 --- /dev/null +++ b/integrations/nvisy-data/src/vector.rs @@ -0,0 +1,86 @@ +//! Vector output trait for inserting into vector stores. + +use async_trait::async_trait; + +use crate::error::DataResult; +use crate::types::{VectorData, VectorSearchResult}; + +/// Context for vector operations. +#[derive(Debug, Clone, Default)] +pub struct VectorContext { + /// The collection/index/namespace to operate on. + pub collection: String, + /// Additional options as key-value pairs. + pub options: std::collections::HashMap, +} + +impl VectorContext { + /// Creates a new context with the given collection name. + pub fn new(collection: impl Into) -> Self { + Self { + collection: collection.into(), + options: std::collections::HashMap::new(), + } + } + + /// Adds an option. + pub fn with_option(mut self, key: impl Into, value: impl Into) -> Self { + self.options.insert(key.into(), value.into()); + self + } +} + +/// Options for vector search operations. +#[derive(Debug, Clone, Default)] +pub struct VectorSearchOptions { + /// Whether to include the vector data in results. + pub include_vectors: bool, + /// Whether to include metadata in results. + pub include_metadata: bool, + /// Optional filter (backend-specific format). + pub filter: Option, +} + +impl VectorSearchOptions { + /// Creates new search options. + pub fn new() -> Self { + Self::default() + } + + /// Include vectors in the results. + pub fn with_vectors(mut self) -> Self { + self.include_vectors = true; + self + } + + /// Include metadata in the results. + pub fn with_metadata(mut self) -> Self { + self.include_metadata = true; + self + } + + /// Set a filter for the search. + pub fn with_filter(mut self, filter: serde_json::Value) -> Self { + self.filter = Some(filter); + self + } +} + +/// Trait for inserting vectors into vector stores. +#[async_trait] +pub trait VectorOutput: Send + Sync { + /// Inserts vectors into the specified collection. + /// + /// If vectors with the same IDs already exist, they may be overwritten + /// (behavior depends on the backend). + async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()>; + + /// Searches for similar vectors. + async fn search( + &self, + ctx: &VectorContext, + query: Vec, + limit: usize, + options: VectorSearchOptions, + ) -> DataResult>; +} diff --git a/integrations/nvisy-opendal/Cargo.toml b/integrations/nvisy-opendal/Cargo.toml index 17b9ff4..47406aa 100644 --- a/integrations/nvisy-opendal/Cargo.toml +++ b/integrations/nvisy-opendal/Cargo.toml @@ -20,7 +20,7 @@ rustdoc-args = ["--cfg", "docsrs"] [dependencies] # Internal crates -nvisy-core = { workspace = true } +nvisy-data = { workspace = true } # Async runtime tokio = { workspace = true, features = ["rt", "sync", "io-util"] } @@ -44,8 +44,8 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = [] } # Derive macros & utilities -thiserror = { workspace = true, features = [] } -derive_more = { workspace = true, features = ["debug", "display", "from", "into"] } +async-trait = { workspace = true, features = [] } +bytes = { workspace = true, features = [] } # Data types jiff = { workspace = true, features = ["serde"] } diff --git a/integrations/nvisy-opendal/src/backend.rs b/integrations/nvisy-opendal/src/backend.rs index 0a810e6..80d54c6 100644 --- a/integrations/nvisy-opendal/src/backend.rs +++ b/integrations/nvisy-opendal/src/backend.rs @@ -1,12 +1,15 @@ //! Storage backend implementation. +use async_trait::async_trait; +use bytes::Bytes; +use futures::Stream; +use nvisy_data::{DataError, DataInput, DataOutput, DataResult, InputContext, OutputContext}; use opendal::{Operator, services}; use crate::TRACING_TARGET; use crate::azblob::AzureBlobConfig; use crate::config::StorageConfig; use crate::dropbox::DropboxConfig; -use crate::error::{StorageError, StorageResult}; use crate::gcs::GcsConfig; use crate::gdrive::GoogleDriveConfig; use crate::onedrive::OneDriveConfig; @@ -21,7 +24,7 @@ pub struct StorageBackend { impl StorageBackend { /// Creates a new storage backend from configuration. - pub async fn new(config: StorageConfig) -> StorageResult { + pub async fn new(config: StorageConfig) -> DataResult { let operator = Self::create_operator(&config)?; tracing::info!( @@ -43,75 +46,14 @@ impl StorageBackend { self.config.backend_name() } - /// Reads a file from storage. - pub async fn read(&self, path: &str) -> StorageResult> { - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "Reading file" - ); - - let data = self.operator.read(path).await?.to_vec(); - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - size = data.len(), - "File read complete" - ); - - Ok(data) - } - - /// Writes data to a file in storage. - pub async fn write(&self, path: &str, data: &[u8]) -> StorageResult<()> { - tracing::debug!( - target: TRACING_TARGET, - path = %path, - size = data.len(), - "Writing file" - ); - - self.operator.write(path, data.to_vec()).await?; - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "File write complete" - ); - - Ok(()) - } - - /// Deletes a file from storage. - pub async fn delete(&self, path: &str) -> StorageResult<()> { - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "Deleting file" - ); - - self.operator.delete(path).await?; - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "File deleted" - ); - - Ok(()) - } - - /// Checks if a file exists. - pub async fn exists(&self, path: &str) -> StorageResult { - Ok(self.operator.exists(path).await?) - } - /// Gets metadata for a file. - pub async fn stat(&self, path: &str) -> StorageResult { - let meta = self.operator.stat(path).await?; + pub async fn stat(&self, path: &str) -> DataResult { + let meta = self + .operator + .stat(path) + .await + .map_err(|e| DataError::backend(e.to_string()))?; - // Convert chrono DateTime to jiff Timestamp let last_modified = meta .last_modified() .and_then(|dt| jiff::Timestamp::from_second(dt.timestamp()).ok()); @@ -123,17 +65,8 @@ impl StorageBackend { }) } - /// Lists files in a directory. - pub async fn list(&self, path: &str) -> StorageResult> { - use futures::TryStreamExt; - - let entries: Vec<_> = self.operator.lister(path).await?.try_collect().await?; - - Ok(entries.into_iter().map(|e| e.path().to_string()).collect()) - } - /// Copies a file from one path to another. - pub async fn copy(&self, from: &str, to: &str) -> StorageResult<()> { + pub async fn copy(&self, from: &str, to: &str) -> DataResult<()> { tracing::debug!( target: TRACING_TARGET, from = %from, @@ -141,13 +74,16 @@ impl StorageBackend { "Copying file" ); - self.operator.copy(from, to).await?; + self.operator + .copy(from, to) + .await + .map_err(|e| DataError::backend(e.to_string()))?; Ok(()) } /// Moves a file from one path to another. - pub async fn rename(&self, from: &str, to: &str) -> StorageResult<()> { + pub async fn rename(&self, from: &str, to: &str) -> DataResult<()> { tracing::debug!( target: TRACING_TARGET, from = %from, @@ -155,13 +91,16 @@ impl StorageBackend { "Moving file" ); - self.operator.rename(from, to).await?; + self.operator + .rename(from, to) + .await + .map_err(|e| DataError::backend(e.to_string()))?; Ok(()) } /// Creates an OpenDAL operator based on configuration. - fn create_operator(config: &StorageConfig) -> StorageResult { + fn create_operator(config: &StorageConfig) -> DataResult { match config { StorageConfig::S3(cfg) => Self::create_s3_operator(cfg), StorageConfig::Gcs(cfg) => Self::create_gcs_operator(cfg), @@ -172,7 +111,7 @@ impl StorageBackend { } } - fn create_s3_operator(cfg: &S3Config) -> StorageResult { + fn create_s3_operator(cfg: &S3Config) -> DataResult { let mut builder = services::S3::default() .bucket(&cfg.bucket) .region(&cfg.region); @@ -189,34 +128,32 @@ impl StorageBackend { builder = builder.secret_access_key(secret_access_key); } - // Apply prefix as root path if let Some(ref prefix) = cfg.prefix { builder = builder.root(prefix); } Operator::new(builder) .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) + .map_err(|e| DataError::backend(e.to_string())) } - fn create_gcs_operator(cfg: &GcsConfig) -> StorageResult { + fn create_gcs_operator(cfg: &GcsConfig) -> DataResult { let mut builder = services::Gcs::default().bucket(&cfg.bucket); if let Some(ref credentials) = cfg.credentials { builder = builder.credential(credentials); } - // Apply prefix as root path if let Some(ref prefix) = cfg.prefix { builder = builder.root(prefix); } Operator::new(builder) .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) + .map_err(|e| DataError::backend(e.to_string())) } - fn create_azblob_operator(cfg: &AzureBlobConfig) -> StorageResult { + fn create_azblob_operator(cfg: &AzureBlobConfig) -> DataResult { let mut builder = services::Azblob::default() .container(&cfg.container) .account_name(&cfg.account_name); @@ -225,17 +162,16 @@ impl StorageBackend { builder = builder.account_key(account_key); } - // Apply prefix as root path if let Some(ref prefix) = cfg.prefix { builder = builder.root(prefix); } Operator::new(builder) .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) + .map_err(|e| DataError::backend(e.to_string())) } - fn create_gdrive_operator(cfg: &GoogleDriveConfig) -> StorageResult { + fn create_gdrive_operator(cfg: &GoogleDriveConfig) -> DataResult { let mut builder = services::Gdrive::default().root(&cfg.root); if let Some(ref access_token) = cfg.access_token { @@ -244,10 +180,10 @@ impl StorageBackend { Operator::new(builder) .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) + .map_err(|e| DataError::backend(e.to_string())) } - fn create_dropbox_operator(cfg: &DropboxConfig) -> StorageResult { + fn create_dropbox_operator(cfg: &DropboxConfig) -> DataResult { let mut builder = services::Dropbox::default().root(&cfg.root); if let Some(ref access_token) = cfg.access_token { @@ -268,10 +204,10 @@ impl StorageBackend { Operator::new(builder) .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) + .map_err(|e| DataError::backend(e.to_string())) } - fn create_onedrive_operator(cfg: &OneDriveConfig) -> StorageResult { + fn create_onedrive_operator(cfg: &OneDriveConfig) -> DataResult { let mut builder = services::Onedrive::default().root(&cfg.root); if let Some(ref access_token) = cfg.access_token { @@ -280,7 +216,172 @@ impl StorageBackend { Operator::new(builder) .map(|op| op.finish()) - .map_err(|e| StorageError::init(e.to_string())) + .map_err(|e| DataError::backend(e.to_string())) + } +} + +#[async_trait] +impl DataInput for StorageBackend { + async fn read(&self, _ctx: &InputContext, path: &str) -> DataResult { + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "Reading file" + ); + + let data = self + .operator + .read(path) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + size = data.len(), + "File read complete" + ); + + Ok(data.to_bytes()) + } + + async fn read_stream( + &self, + _ctx: &InputContext, + path: &str, + ) -> DataResult> + Send + Unpin>> { + use futures::StreamExt; + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "Reading file as stream" + ); + + let reader = self + .operator + .reader(path) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + let stream = reader + .into_bytes_stream(0..u64::MAX) + .await + .map_err(|e| DataError::backend(e.to_string()))? + .map(|result| result.map_err(|e| DataError::backend(e.to_string()))); + + Ok(Box::new(stream)) + } + + async fn exists(&self, _ctx: &InputContext, path: &str) -> DataResult { + self.operator + .exists(path) + .await + .map_err(|e| DataError::backend(e.to_string())) + } + + async fn list(&self, _ctx: &InputContext, prefix: &str) -> DataResult> { + use futures::TryStreamExt; + + let entries: Vec<_> = self + .operator + .lister(prefix) + .await + .map_err(|e| DataError::backend(e.to_string()))? + .try_collect() + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + Ok(entries.into_iter().map(|e| e.path().to_string()).collect()) + } +} + +#[async_trait] +impl DataOutput for StorageBackend { + async fn write(&self, _ctx: &OutputContext, path: &str, data: Bytes) -> DataResult<()> { + tracing::debug!( + target: TRACING_TARGET, + path = %path, + size = data.len(), + "Writing file" + ); + + self.operator + .write(path, data) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "File write complete" + ); + + Ok(()) + } + + async fn write_stream( + &self, + _ctx: &OutputContext, + path: &str, + stream: Box> + Send + Unpin>, + ) -> DataResult<()> { + use futures::StreamExt; + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "Writing file from stream" + ); + + let mut writer = self + .operator + .writer(path) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + let mut stream = stream; + while let Some(result) = stream.next().await { + let chunk = result?; + writer + .write(chunk) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + } + + writer + .close() + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "File stream write complete" + ); + + Ok(()) + } + + async fn delete(&self, _ctx: &OutputContext, path: &str) -> DataResult<()> { + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "Deleting file" + ); + + self.operator + .delete(path) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET, + path = %path, + "File deleted" + ); + + Ok(()) } } diff --git a/integrations/nvisy-opendal/src/error.rs b/integrations/nvisy-opendal/src/error.rs deleted file mode 100644 index b9ff0b4..0000000 --- a/integrations/nvisy-opendal/src/error.rs +++ /dev/null @@ -1,98 +0,0 @@ -//! Storage error types. - -/// Result type for storage operations. -pub type StorageResult = Result; - -/// Errors that can occur during storage operations. -#[derive(Debug, thiserror::Error)] -pub enum StorageError { - /// Failed to initialize the storage backend. - #[error("storage initialization failed: {0}")] - Init(String), - - /// File or object not found. - #[error("not found: {0}")] - NotFound(String), - - /// Permission denied. - #[error("permission denied: {0}")] - PermissionDenied(String), - - /// Read operation failed. - #[error("read failed: {0}")] - Read(String), - - /// Write operation failed. - #[error("write failed: {0}")] - Write(String), - - /// Delete operation failed. - #[error("delete failed: {0}")] - Delete(String), - - /// List operation failed. - #[error("list failed: {0}")] - List(String), - - /// Invalid path or URI. - #[error("invalid path: {0}")] - InvalidPath(String), - - /// Backend-specific error. - #[error("backend error: {0}")] - Backend(opendal::Error), -} - -impl StorageError { - /// Creates a new initialization error. - pub fn init(msg: impl Into) -> Self { - Self::Init(msg.into()) - } - - /// Creates a new not found error. - pub fn not_found(path: impl Into) -> Self { - Self::NotFound(path.into()) - } - - /// Creates a new permission denied error. - pub fn permission_denied(msg: impl Into) -> Self { - Self::PermissionDenied(msg.into()) - } - - /// Creates a new read error. - pub fn read(msg: impl Into) -> Self { - Self::Read(msg.into()) - } - - /// Creates a new write error. - pub fn write(msg: impl Into) -> Self { - Self::Write(msg.into()) - } - - /// Creates a new delete error. - pub fn delete(msg: impl Into) -> Self { - Self::Delete(msg.into()) - } - - /// Creates a new list error. - pub fn list(msg: impl Into) -> Self { - Self::List(msg.into()) - } - - /// Creates a new invalid path error. - pub fn invalid_path(msg: impl Into) -> Self { - Self::InvalidPath(msg.into()) - } -} - -impl From for StorageError { - fn from(err: opendal::Error) -> Self { - use opendal::ErrorKind; - - match err.kind() { - ErrorKind::NotFound => Self::NotFound(err.to_string()), - ErrorKind::PermissionDenied => Self::PermissionDenied(err.to_string()), - _ => Self::Backend(err), - } - } -} diff --git a/integrations/nvisy-opendal/src/lib.rs b/integrations/nvisy-opendal/src/lib.rs index 808c9c2..783080d 100644 --- a/integrations/nvisy-opendal/src/lib.rs +++ b/integrations/nvisy-opendal/src/lib.rs @@ -1,6 +1,10 @@ +//! Storage backends using OpenDAL. +//! +//! This crate provides storage implementations that implement the +//! [`DataInput`] and [`DataOutput`] traits from `nvisy-data`. + #![forbid(unsafe_code)] #![cfg_attr(docsrs, feature(doc_cfg))] -#![doc = include_str!("../README.md")] pub mod azblob; pub mod dropbox; @@ -11,14 +15,15 @@ pub mod s3; mod backend; mod config; -mod error; pub use backend::{FileMetadata, StorageBackend}; pub use config::{ AzureBlobConfig, DropboxConfig, GcsConfig, GoogleDriveConfig, OneDriveConfig, S3Config, StorageConfig, }; -pub use error::{StorageError, StorageResult}; + +// Re-export types from nvisy-data for convenience +pub use nvisy_data::{DataError, DataInput, DataOutput, DataResult, InputContext, OutputContext}; /// Tracing target for storage operations. pub const TRACING_TARGET: &str = "nvisy_opendal"; diff --git a/crates/nvisy-rig/Cargo.toml b/integrations/nvisy-rig/Cargo.toml similarity index 100% rename from crates/nvisy-rig/Cargo.toml rename to integrations/nvisy-rig/Cargo.toml diff --git a/crates/nvisy-rig/README.md b/integrations/nvisy-rig/README.md similarity index 100% rename from crates/nvisy-rig/README.md rename to integrations/nvisy-rig/README.md diff --git a/crates/nvisy-rig/src/chat/agent/context.rs b/integrations/nvisy-rig/src/chat/agent/context.rs similarity index 100% rename from crates/nvisy-rig/src/chat/agent/context.rs rename to integrations/nvisy-rig/src/chat/agent/context.rs diff --git a/crates/nvisy-rig/src/chat/agent/executor.rs b/integrations/nvisy-rig/src/chat/agent/executor.rs similarity index 100% rename from crates/nvisy-rig/src/chat/agent/executor.rs rename to integrations/nvisy-rig/src/chat/agent/executor.rs diff --git a/crates/nvisy-rig/src/chat/agent/mod.rs b/integrations/nvisy-rig/src/chat/agent/mod.rs similarity index 100% rename from crates/nvisy-rig/src/chat/agent/mod.rs rename to integrations/nvisy-rig/src/chat/agent/mod.rs diff --git a/crates/nvisy-rig/src/chat/agent/prompt.rs b/integrations/nvisy-rig/src/chat/agent/prompt.rs similarity index 100% rename from crates/nvisy-rig/src/chat/agent/prompt.rs rename to integrations/nvisy-rig/src/chat/agent/prompt.rs diff --git a/crates/nvisy-rig/src/chat/event.rs b/integrations/nvisy-rig/src/chat/event.rs similarity index 100% rename from crates/nvisy-rig/src/chat/event.rs rename to integrations/nvisy-rig/src/chat/event.rs diff --git a/crates/nvisy-rig/src/chat/mod.rs b/integrations/nvisy-rig/src/chat/mod.rs similarity index 100% rename from crates/nvisy-rig/src/chat/mod.rs rename to integrations/nvisy-rig/src/chat/mod.rs diff --git a/crates/nvisy-rig/src/chat/response.rs b/integrations/nvisy-rig/src/chat/response.rs similarity index 100% rename from crates/nvisy-rig/src/chat/response.rs rename to integrations/nvisy-rig/src/chat/response.rs diff --git a/crates/nvisy-rig/src/chat/service.rs b/integrations/nvisy-rig/src/chat/service.rs similarity index 100% rename from crates/nvisy-rig/src/chat/service.rs rename to integrations/nvisy-rig/src/chat/service.rs diff --git a/crates/nvisy-rig/src/chat/stream.rs b/integrations/nvisy-rig/src/chat/stream.rs similarity index 100% rename from crates/nvisy-rig/src/chat/stream.rs rename to integrations/nvisy-rig/src/chat/stream.rs diff --git a/crates/nvisy-rig/src/chat/usage.rs b/integrations/nvisy-rig/src/chat/usage.rs similarity index 100% rename from crates/nvisy-rig/src/chat/usage.rs rename to integrations/nvisy-rig/src/chat/usage.rs diff --git a/crates/nvisy-rig/src/error.rs b/integrations/nvisy-rig/src/error.rs similarity index 100% rename from crates/nvisy-rig/src/error.rs rename to integrations/nvisy-rig/src/error.rs diff --git a/crates/nvisy-rig/src/lib.rs b/integrations/nvisy-rig/src/lib.rs similarity index 100% rename from crates/nvisy-rig/src/lib.rs rename to integrations/nvisy-rig/src/lib.rs diff --git a/crates/nvisy-rig/src/provider/config.rs b/integrations/nvisy-rig/src/provider/config.rs similarity index 100% rename from crates/nvisy-rig/src/provider/config.rs rename to integrations/nvisy-rig/src/provider/config.rs diff --git a/crates/nvisy-rig/src/provider/embedding.rs b/integrations/nvisy-rig/src/provider/embedding.rs similarity index 100% rename from crates/nvisy-rig/src/provider/embedding.rs rename to integrations/nvisy-rig/src/provider/embedding.rs diff --git a/crates/nvisy-rig/src/provider/mod.rs b/integrations/nvisy-rig/src/provider/mod.rs similarity index 100% rename from crates/nvisy-rig/src/provider/mod.rs rename to integrations/nvisy-rig/src/provider/mod.rs diff --git a/crates/nvisy-rig/src/provider/registry.rs b/integrations/nvisy-rig/src/provider/registry.rs similarity index 100% rename from crates/nvisy-rig/src/provider/registry.rs rename to integrations/nvisy-rig/src/provider/registry.rs diff --git a/crates/nvisy-rig/src/rag/config.rs b/integrations/nvisy-rig/src/rag/config.rs similarity index 100% rename from crates/nvisy-rig/src/rag/config.rs rename to integrations/nvisy-rig/src/rag/config.rs diff --git a/crates/nvisy-rig/src/rag/indexer/indexed.rs b/integrations/nvisy-rig/src/rag/indexer/indexed.rs similarity index 100% rename from crates/nvisy-rig/src/rag/indexer/indexed.rs rename to integrations/nvisy-rig/src/rag/indexer/indexed.rs diff --git a/crates/nvisy-rig/src/rag/indexer/mod.rs b/integrations/nvisy-rig/src/rag/indexer/mod.rs similarity index 100% rename from crates/nvisy-rig/src/rag/indexer/mod.rs rename to integrations/nvisy-rig/src/rag/indexer/mod.rs diff --git a/crates/nvisy-rig/src/rag/mod.rs b/integrations/nvisy-rig/src/rag/mod.rs similarity index 100% rename from crates/nvisy-rig/src/rag/mod.rs rename to integrations/nvisy-rig/src/rag/mod.rs diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/integrations/nvisy-rig/src/rag/searcher/mod.rs similarity index 100% rename from crates/nvisy-rig/src/rag/searcher/mod.rs rename to integrations/nvisy-rig/src/rag/searcher/mod.rs diff --git a/crates/nvisy-rig/src/rag/searcher/retrieved.rs b/integrations/nvisy-rig/src/rag/searcher/retrieved.rs similarity index 100% rename from crates/nvisy-rig/src/rag/searcher/retrieved.rs rename to integrations/nvisy-rig/src/rag/searcher/retrieved.rs diff --git a/crates/nvisy-rig/src/rag/searcher/scope.rs b/integrations/nvisy-rig/src/rag/searcher/scope.rs similarity index 100% rename from crates/nvisy-rig/src/rag/searcher/scope.rs rename to integrations/nvisy-rig/src/rag/searcher/scope.rs diff --git a/crates/nvisy-rig/src/rag/splitter/chunk.rs b/integrations/nvisy-rig/src/rag/splitter/chunk.rs similarity index 100% rename from crates/nvisy-rig/src/rag/splitter/chunk.rs rename to integrations/nvisy-rig/src/rag/splitter/chunk.rs diff --git a/crates/nvisy-rig/src/rag/splitter/metadata.rs b/integrations/nvisy-rig/src/rag/splitter/metadata.rs similarity index 100% rename from crates/nvisy-rig/src/rag/splitter/metadata.rs rename to integrations/nvisy-rig/src/rag/splitter/metadata.rs diff --git a/crates/nvisy-rig/src/rag/splitter/mod.rs b/integrations/nvisy-rig/src/rag/splitter/mod.rs similarity index 100% rename from crates/nvisy-rig/src/rag/splitter/mod.rs rename to integrations/nvisy-rig/src/rag/splitter/mod.rs diff --git a/crates/nvisy-rig/src/service/config.rs b/integrations/nvisy-rig/src/service/config.rs similarity index 100% rename from crates/nvisy-rig/src/service/config.rs rename to integrations/nvisy-rig/src/service/config.rs diff --git a/crates/nvisy-rig/src/service/mod.rs b/integrations/nvisy-rig/src/service/mod.rs similarity index 100% rename from crates/nvisy-rig/src/service/mod.rs rename to integrations/nvisy-rig/src/service/mod.rs diff --git a/crates/nvisy-rig/src/service/rig.rs b/integrations/nvisy-rig/src/service/rig.rs similarity index 100% rename from crates/nvisy-rig/src/service/rig.rs rename to integrations/nvisy-rig/src/service/rig.rs diff --git a/crates/nvisy-rig/src/session/message.rs b/integrations/nvisy-rig/src/session/message.rs similarity index 100% rename from crates/nvisy-rig/src/session/message.rs rename to integrations/nvisy-rig/src/session/message.rs diff --git a/crates/nvisy-rig/src/session/mod.rs b/integrations/nvisy-rig/src/session/mod.rs similarity index 100% rename from crates/nvisy-rig/src/session/mod.rs rename to integrations/nvisy-rig/src/session/mod.rs diff --git a/crates/nvisy-rig/src/session/policy.rs b/integrations/nvisy-rig/src/session/policy.rs similarity index 100% rename from crates/nvisy-rig/src/session/policy.rs rename to integrations/nvisy-rig/src/session/policy.rs diff --git a/crates/nvisy-rig/src/session/store.rs b/integrations/nvisy-rig/src/session/store.rs similarity index 100% rename from crates/nvisy-rig/src/session/store.rs rename to integrations/nvisy-rig/src/session/store.rs diff --git a/crates/nvisy-rig/src/tool/definition.rs b/integrations/nvisy-rig/src/tool/definition.rs similarity index 100% rename from crates/nvisy-rig/src/tool/definition.rs rename to integrations/nvisy-rig/src/tool/definition.rs diff --git a/crates/nvisy-rig/src/tool/edit/mod.rs b/integrations/nvisy-rig/src/tool/edit/mod.rs similarity index 100% rename from crates/nvisy-rig/src/tool/edit/mod.rs rename to integrations/nvisy-rig/src/tool/edit/mod.rs diff --git a/crates/nvisy-rig/src/tool/edit/operation.rs b/integrations/nvisy-rig/src/tool/edit/operation.rs similarity index 100% rename from crates/nvisy-rig/src/tool/edit/operation.rs rename to integrations/nvisy-rig/src/tool/edit/operation.rs diff --git a/crates/nvisy-rig/src/tool/edit/proposed.rs b/integrations/nvisy-rig/src/tool/edit/proposed.rs similarity index 100% rename from crates/nvisy-rig/src/tool/edit/proposed.rs rename to integrations/nvisy-rig/src/tool/edit/proposed.rs diff --git a/crates/nvisy-rig/src/tool/mod.rs b/integrations/nvisy-rig/src/tool/mod.rs similarity index 100% rename from crates/nvisy-rig/src/tool/mod.rs rename to integrations/nvisy-rig/src/tool/mod.rs diff --git a/crates/nvisy-rig/src/tool/registry.rs b/integrations/nvisy-rig/src/tool/registry.rs similarity index 100% rename from crates/nvisy-rig/src/tool/registry.rs rename to integrations/nvisy-rig/src/tool/registry.rs diff --git a/crates/nvisy-rig/src/tool/types.rs b/integrations/nvisy-rig/src/tool/types.rs similarity index 100% rename from crates/nvisy-rig/src/tool/types.rs rename to integrations/nvisy-rig/src/tool/types.rs diff --git a/integrations/nvisy-runtime/src/error.rs b/integrations/nvisy-runtime/src/error.rs index 5b6b0dc..b809d69 100644 --- a/integrations/nvisy-runtime/src/error.rs +++ b/integrations/nvisy-runtime/src/error.rs @@ -42,7 +42,7 @@ pub enum WorkflowError { /// Storage operation failed. #[error("storage error: {0}")] - Storage(#[from] nvisy_opendal::StorageError), + Storage(#[from] nvisy_opendal::DataError), /// Serialization/deserialization error. #[error("serialization error: {0}")] diff --git a/integrations/nvisy-vector/Cargo.toml b/integrations/nvisy-vector/Cargo.toml index 281d574..7f5971e 100644 --- a/integrations/nvisy-vector/Cargo.toml +++ b/integrations/nvisy-vector/Cargo.toml @@ -20,7 +20,7 @@ rustdoc-args = ["--cfg", "docsrs"] [dependencies] # Internal crates -nvisy-core = { workspace = true } +nvisy-data = { workspace = true } # Async runtime tokio = { workspace = true, features = ["rt", "sync"] } @@ -37,12 +37,19 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = [] } # Derive macros & utilities -thiserror = { workspace = true, features = [] } -derive_more = { workspace = true, features = ["debug", "display", "from", "into"] } async-trait = { workspace = true, features = [] } # Vector store clients qdrant-client = "1.13" +pinecone-sdk = "0.1" +milvus-sdk-rust = "0.1" +prost-types = "0.12" + +# Database (for pgvector) +diesel = { workspace = true, features = ["postgres"] } +diesel-async = { workspace = true, features = ["postgres", "deadpool"] } +pgvector = { workspace = true, features = ["diesel"] } +deadpool = { workspace = true, features = [] } [dev-dependencies] tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/integrations/nvisy-vector/src/error.rs b/integrations/nvisy-vector/src/error.rs deleted file mode 100644 index 883168c..0000000 --- a/integrations/nvisy-vector/src/error.rs +++ /dev/null @@ -1,99 +0,0 @@ -//! Vector store error types. - -use thiserror::Error; - -/// Result type for vector store operations. -pub type VectorResult = Result; - -/// Vector store errors. -#[derive(Debug, Error)] -pub enum VectorError { - /// Connection error. - #[error("connection error: {0}")] - Connection(String), - - /// Collection not found. - #[error("collection not found: {0}")] - CollectionNotFound(String), - - /// Invalid configuration. - #[error("invalid configuration: {0}")] - InvalidConfig(String), - - /// Authentication error. - #[error("authentication error: {0}")] - Authentication(String), - - /// Operation timeout. - #[error("operation timed out: {0}")] - Timeout(String), - - /// Vector dimension mismatch. - #[error("dimension mismatch: expected {expected}, got {actual}")] - DimensionMismatch { expected: usize, actual: usize }, - - /// Backend-specific error. - #[error("backend error: {0}")] - Backend(String), - - /// Serialization/deserialization error. - #[error("serialization error: {0}")] - Serialization(String), - - /// Feature not enabled. - #[error("feature not enabled: {0}")] - FeatureNotEnabled(String), -} - -impl VectorError { - /// Creates a connection error. - pub fn connection(msg: impl Into) -> Self { - Self::Connection(msg.into()) - } - - /// Creates a collection not found error. - pub fn collection_not_found(name: impl Into) -> Self { - Self::CollectionNotFound(name.into()) - } - - /// Creates an invalid config error. - pub fn invalid_config(msg: impl Into) -> Self { - Self::InvalidConfig(msg.into()) - } - - /// Creates an authentication error. - pub fn authentication(msg: impl Into) -> Self { - Self::Authentication(msg.into()) - } - - /// Creates a timeout error. - pub fn timeout(msg: impl Into) -> Self { - Self::Timeout(msg.into()) - } - - /// Creates a dimension mismatch error. - pub fn dimension_mismatch(expected: usize, actual: usize) -> Self { - Self::DimensionMismatch { expected, actual } - } - - /// Creates a backend error. - pub fn backend(msg: impl Into) -> Self { - Self::Backend(msg.into()) - } - - /// Creates a serialization error. - pub fn serialization(msg: impl Into) -> Self { - Self::Serialization(msg.into()) - } - - /// Creates a feature not enabled error. - pub fn feature_not_enabled(feature: impl Into) -> Self { - Self::FeatureNotEnabled(feature.into()) - } -} - -impl From for VectorError { - fn from(err: serde_json::Error) -> Self { - Self::serialization(err.to_string()) - } -} diff --git a/integrations/nvisy-vector/src/lib.rs b/integrations/nvisy-vector/src/lib.rs index d39e619..2d6260e 100644 --- a/integrations/nvisy-vector/src/lib.rs +++ b/integrations/nvisy-vector/src/lib.rs @@ -1,6 +1,10 @@ +//! Vector store backends for nvisy. +//! +//! This crate provides vector store implementations that implement the +//! [`VectorOutput`] trait from `nvisy-data`. + #![forbid(unsafe_code)] #![cfg_attr(docsrs, feature(doc_cfg))] -#![doc = include_str!("../README.md")] pub mod milvus; pub mod pgvector; @@ -8,15 +12,19 @@ pub mod pinecone; pub mod qdrant; mod config; -mod error; mod store; pub use config::{ MilvusConfig, PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType, PineconeConfig, QdrantConfig, VectorStoreConfig, }; -pub use error::{VectorError, VectorResult}; -pub use store::{SearchOptions, SearchResult, VectorData, VectorStore, VectorStoreBackend}; +pub use store::VectorStore; + +// Re-export types from nvisy-data for convenience +pub use nvisy_data::{ + DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, + VectorSearchResult, +}; /// Tracing target for vector store operations. pub const TRACING_TARGET: &str = "nvisy_vector"; diff --git a/integrations/nvisy-vector/src/milvus/backend.rs b/integrations/nvisy-vector/src/milvus/backend.rs index 06aab94..8222732 100644 --- a/integrations/nvisy-vector/src/milvus/backend.rs +++ b/integrations/nvisy-vector/src/milvus/backend.rs @@ -1,124 +1,246 @@ //! Milvus backend implementation. -//! -//! This is a stub implementation. The Milvus SDK API differs significantly -//! from the interface we designed. A full implementation would require -//! adapting to the actual milvus-sdk-rust API. + +use std::borrow::Cow; +use std::collections::HashMap; use async_trait::async_trait; +use milvus::client::Client; +use milvus::collection::SearchOption; +use milvus::data::FieldColumn; +use milvus::index::{IndexParams, IndexType, MetricType}; +use milvus::schema::{CollectionSchemaBuilder, FieldSchema}; +use milvus::value::{Value, ValueVec}; +use nvisy_data::{ + DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, + VectorSearchResult, +}; use super::MilvusConfig; use crate::TRACING_TARGET; -use crate::error::{VectorError, VectorResult}; -use crate::store::{SearchOptions, SearchResult, VectorData, VectorStoreBackend}; /// Milvus backend implementation. pub struct MilvusBackend { + client: Client, #[allow(dead_code)] config: MilvusConfig, } impl MilvusBackend { /// Creates a new Milvus backend. - pub async fn new(config: &MilvusConfig) -> VectorResult { + pub async fn new(config: &MilvusConfig) -> DataResult { + let url = format!("http://{}:{}", config.host, config.port); + + let client = Client::new(url) + .await + .map_err(|e| DataError::connection(e.to_string()))?; + tracing::debug!( target: TRACING_TARGET, host = %config.host, port = %config.port, - "Milvus backend initialized (stub implementation)" + "Connected to Milvus" ); Ok(Self { + client, config: config.clone(), }) } -} -#[async_trait] -impl VectorStoreBackend for MilvusBackend { - async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { - tracing::warn!( - target: TRACING_TARGET, - collection = %name, - dimensions = %dimensions, - "Milvus create_collection is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Milvus backend is not yet implemented", - )) - } + /// Ensures a collection exists, creating it if necessary. + async fn ensure_collection(&self, name: &str, dimensions: usize) -> DataResult<()> { + let exists = self + .client + .has_collection(name) + .await + .map_err(|e| DataError::backend(e.to_string()))?; - async fn delete_collection(&self, name: &str) -> VectorResult<()> { - tracing::warn!( - target: TRACING_TARGET, - collection = %name, - "Milvus delete_collection is a stub - not yet implemented" + if exists { + return Ok(()); + } + + // Build the collection schema + let mut builder = CollectionSchemaBuilder::new(name, "Vector collection"); + builder.add_field(FieldSchema::new_primary_int64("_id", "primary key", true)); + builder.add_field(FieldSchema::new_varchar("id", "string id", 256)); + builder.add_field(FieldSchema::new_float_vector( + "vector", + "embedding vector", + dimensions as i64, + )); + builder.add_field(FieldSchema::new_varchar("metadata", "json metadata", 65535)); + + let schema = builder + .build() + .map_err(|e| DataError::backend(e.to_string()))?; + + // Create the collection + self.client + .create_collection(schema, None) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + // Create index on vector field + let index_params = IndexParams::new( + "vector_index".to_string(), + IndexType::IvfFlat, + MetricType::L2, + HashMap::from([("nlist".to_string(), "128".to_string())]), ); - Err(VectorError::backend( - "Milvus backend is not yet implemented", - )) - } - async fn collection_exists(&self, name: &str) -> VectorResult { - tracing::warn!( + let collection = self + .client + .get_collection(name) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + collection + .create_index("vector", index_params) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + // Load collection into memory + collection + .load(1) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + tracing::info!( target: TRACING_TARGET, collection = %name, - "Milvus collection_exists is a stub - not yet implemented" + dimensions = %dimensions, + "Created Milvus collection" ); - Err(VectorError::backend( - "Milvus backend is not yet implemented", - )) + + Ok(()) } +} - async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { - tracing::warn!( - target: TRACING_TARGET, - collection = %collection, - count = %vectors.len(), - "Milvus upsert is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Milvus backend is not yet implemented", - )) +#[async_trait] +impl VectorOutput for MilvusBackend { + async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()> { + if vectors.is_empty() { + return Ok(()); + } + + // Get the dimension from the first vector + let dim = vectors.first().map(|v| v.vector.len()).unwrap_or(0); + + // Ensure collection exists + self.ensure_collection(&ctx.collection, dim).await?; + + let coll = self + .client + .get_collection(&ctx.collection) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + let ids: Vec = vectors.iter().map(|v| v.id.clone()).collect(); + let embeddings: Vec = vectors + .iter() + .flat_map(|v| v.vector.iter().copied()) + .collect(); + let metadata: Vec = vectors + .iter() + .map(|v| serde_json::to_string(&v.metadata).unwrap_or_default()) + .collect(); + + // Create field schemas for columns + let id_schema = FieldSchema::new_varchar("id", "string id", 256); + let vector_schema = FieldSchema::new_float_vector("vector", "embedding vector", dim as i64); + let metadata_schema = FieldSchema::new_varchar("metadata", "json metadata", 65535); + + let columns = vec![ + FieldColumn::new(&id_schema, ValueVec::String(ids)), + FieldColumn::new(&vector_schema, ValueVec::Float(embeddings)), + FieldColumn::new(&metadata_schema, ValueVec::String(metadata)), + ]; + + coll.insert(columns, None) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + Ok(()) } async fn search( &self, - collection: &str, - _query: Vec, - _limit: usize, - _options: SearchOptions, - ) -> VectorResult> { - tracing::warn!( - target: TRACING_TARGET, - collection = %collection, - "Milvus search is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Milvus backend is not yet implemented", - )) - } + ctx: &VectorContext, + query: Vec, + limit: usize, + _options: VectorSearchOptions, + ) -> DataResult> { + let coll = self + .client + .get_collection(&ctx.collection) + .await + .map_err(|e| DataError::backend(e.to_string()))?; - async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { - tracing::warn!( - target: TRACING_TARGET, - collection = %collection, - count = %ids.len(), - "Milvus delete is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Milvus backend is not yet implemented", - )) - } + let mut search_option = SearchOption::new(); + search_option.add_param("nprobe", serde_json::json!(16)); - async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { - tracing::warn!( - target: TRACING_TARGET, - collection = %collection, - count = %ids.len(), - "Milvus get is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Milvus backend is not yet implemented", - )) + let query_value = Value::FloatArray(Cow::Owned(query)); + + let results = coll + .search( + vec![query_value], + "vector", + limit as i32, + MetricType::L2, + vec!["id", "metadata"], + &search_option, + ) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + let mut search_results = Vec::new(); + + for result in results { + for i in 0..result.size as usize { + let id = match result.id.get(i) { + Some(Value::String(s)) => s.to_string(), + Some(Value::Long(l)) => l.to_string(), + _ => continue, + }; + + let score = result.score.get(i).copied().unwrap_or(0.0); + + // Extract metadata from fields + let metadata_str = result + .field + .iter() + .find(|f| f.name == "metadata") + .and_then(|f| f.get(i)) + .and_then(|v| match v { + Value::String(s) => Some(s.to_string()), + _ => None, + }); + + let metadata: HashMap = metadata_str + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_default(); + + // Get string id if available + let string_id = result + .field + .iter() + .find(|f| f.name == "id") + .and_then(|f| f.get(i)) + .and_then(|v| match v { + Value::String(s) => Some(s.to_string()), + _ => None, + }) + .unwrap_or(id); + + search_results.push(VectorSearchResult { + id: string_id, + score, + vector: None, + metadata, + }); + } + } + + Ok(search_results) } } diff --git a/integrations/nvisy-vector/src/pgvector/backend.rs b/integrations/nvisy-vector/src/pgvector/backend.rs index 66262a7..f04088f 100644 --- a/integrations/nvisy-vector/src/pgvector/backend.rs +++ b/integrations/nvisy-vector/src/pgvector/backend.rs @@ -1,33 +1,51 @@ -//! pgvector backend implementation. +//! PostgreSQL pgvector backend implementation. use std::collections::HashMap; use async_trait::async_trait; +use diesel::prelude::*; +use diesel::sql_types::{Float, Integer, Text}; +use diesel_async::pooled_connection::AsyncDieselConnectionManager; +use diesel_async::pooled_connection::deadpool::Pool; +use diesel_async::{AsyncPgConnection, RunQueryDsl}; +use nvisy_data::{ + DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, + VectorSearchResult, +}; use super::{PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType}; use crate::TRACING_TARGET; -use crate::error::{VectorError, VectorResult}; -use crate::store::{SearchOptions, SearchResult, VectorData, VectorStoreBackend}; -/// pgvector backend implementation. -/// -/// This backend uses raw SQL queries via the pgvector extension. -/// It's designed to work with any PostgreSQL async driver. +/// pgvector backend implementation using Diesel. pub struct PgVectorBackend { + pool: Pool, config: PgVectorConfig, - // In a real implementation, this would hold a connection pool - // For now, we store the connection URL for documentation purposes - #[allow(dead_code)] - connection_url: String, } impl PgVectorBackend { /// Creates a new pgvector backend. - pub async fn new(config: &PgVectorConfig) -> VectorResult { - // In a real implementation, we would: - // 1. Create a connection pool - // 2. Verify pgvector extension is installed - // 3. Test the connection + pub async fn new(config: &PgVectorConfig) -> DataResult { + let manager = + AsyncDieselConnectionManager::::new(&config.connection_url); + + let pool = Pool::builder(manager) + .build() + .map_err(|e| DataError::connection(e.to_string()))?; + + // Test connection and ensure pgvector extension exists + { + let mut conn = pool + .get() + .await + .map_err(|e| DataError::connection(e.to_string()))?; + + diesel::sql_query("CREATE EXTENSION IF NOT EXISTS vector") + .execute(&mut conn) + .await + .map_err(|e| { + DataError::backend(format!("Failed to create vector extension: {}", e)) + })?; + } tracing::debug!( target: TRACING_TARGET, @@ -37,14 +55,31 @@ impl PgVectorBackend { ); Ok(Self { + pool, config: config.clone(), - connection_url: config.connection_url.clone(), }) } - /// Generates SQL for creating the vectors table. - pub fn create_table_sql(&self, name: &str, dimensions: usize) -> String { - format!( + async fn get_conn( + &self, + ) -> DataResult>> + { + self.pool + .get() + .await + .map_err(|e| DataError::connection(e.to_string())) + } + + fn distance_operator(&self) -> &'static str { + self.config.distance_metric.operator() + } + + /// Ensures a collection (table) exists, creating it if necessary. + async fn ensure_collection(&self, name: &str, dimensions: usize) -> DataResult<()> { + let mut conn = self.get_conn().await?; + + // Create the table + let create_table = format!( r#" CREATE TABLE IF NOT EXISTS {} ( id VARCHAR(256) PRIMARY KEY, @@ -54,15 +89,18 @@ impl PgVectorBackend { ) "#, name, dimensions - ) - } + ); - /// Generates SQL for creating the vector index. - pub fn create_index_sql(&self, name: &str) -> String { + diesel::sql_query(&create_table) + .execute(&mut conn) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + // Create the index let index_name = format!("{}_vector_idx", name); - let operator = self.config.distance_metric.operator(); + let operator = self.distance_operator(); - match self.config.index_type { + let create_index = match self.config.index_type { PgVectorIndexType::IvfFlat => { format!( r#" @@ -83,196 +121,172 @@ impl PgVectorBackend { index_name, name, operator ) } - } - } - - /// Generates SQL for upserting vectors. - pub fn upsert_sql(&self, name: &str) -> String { - format!( - r#" - INSERT INTO {} (id, vector, metadata) - VALUES ($1, $2, $3) - ON CONFLICT (id) DO UPDATE SET - vector = EXCLUDED.vector, - metadata = EXCLUDED.metadata - "#, - name - ) - } - - /// Generates SQL for searching vectors. - pub fn search_sql(&self, name: &str, include_vector: bool) -> String { - let operator = self.config.distance_metric.operator(); - let vector_column = if include_vector { ", vector" } else { "" }; - - let distance_expr = match self.config.distance_metric { - PgVectorDistanceMetric::L2 => format!("vector {} $1", operator), - PgVectorDistanceMetric::InnerProduct => { - // Inner product returns negative, so we negate for similarity - format!("-(vector {} $1)", operator) - } - PgVectorDistanceMetric::Cosine => { - // Cosine distance, convert to similarity - format!("1 - (vector {} $1)", operator) - } }; - format!( - r#" - SELECT id, {} as score{}, metadata - FROM {} - ORDER BY vector {} $1 - LIMIT $2 - "#, - distance_expr, vector_column, name, operator - ) - } - - /// Generates SQL for deleting vectors. - pub fn delete_sql(&self, name: &str) -> String { - format!("DELETE FROM {} WHERE id = ANY($1)", name) - } - - /// Generates SQL for getting vectors by ID. - pub fn get_sql(&self, name: &str) -> String { - format!( - "SELECT id, vector, metadata FROM {} WHERE id = ANY($1)", - name - ) - } -} + diesel::sql_query(&create_index) + .execute(&mut conn) + .await + .map_err(|e| DataError::backend(e.to_string()))?; -#[async_trait] -impl VectorStoreBackend for PgVectorBackend { - async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { - // In a real implementation, execute: - // 1. CREATE EXTENSION IF NOT EXISTS vector; - // 2. self.create_table_sql(name, dimensions) - // 3. self.create_index_sql(name) - - tracing::info!( + tracing::debug!( target: TRACING_TARGET, collection = %name, dimensions = %dimensions, - index_type = ?self.config.index_type, - "Would create pgvector table: {}", - self.create_table_sql(name, dimensions) + "Ensured pgvector table exists" ); Ok(()) } +} - async fn delete_collection(&self, name: &str) -> VectorResult<()> { - // In a real implementation, execute: - // DROP TABLE IF EXISTS {name} - - tracing::info!( - target: TRACING_TARGET, - collection = %name, - "Would drop pgvector table" - ); - - Ok(()) - } - - async fn collection_exists(&self, _name: &str) -> VectorResult { - // In a real implementation, query information_schema.tables - Ok(true) - } - - async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { - // In a real implementation, execute batched upserts - let sql = self.upsert_sql(collection); +#[async_trait] +impl VectorOutput for PgVectorBackend { + async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()> { + if vectors.is_empty() { + return Ok(()); + } - tracing::debug!( - target: TRACING_TARGET, - collection = %collection, - count = %vectors.len(), - "Would upsert with SQL: {}", - sql - ); + // Get dimensions from the first vector + let dimensions = vectors + .get(0) + .map(|v| v.vector.len()) + .ok_or_else(|| DataError::invalid("No vectors provided"))?; + + // Ensure collection exists + self.ensure_collection(&ctx.collection, dimensions).await?; + + let mut conn = self.get_conn().await?; + + for v in vectors { + let vector_str = format!( + "[{}]", + v.vector + .iter() + .map(|f| f.to_string()) + .collect::>() + .join(",") + ); + let metadata_json = + serde_json::to_string(&v.metadata).unwrap_or_else(|_| "{}".to_string()); + + let upsert_query = format!( + r#" + INSERT INTO {} (id, vector, metadata) + VALUES ($1, $2::vector, $3::jsonb) + ON CONFLICT (id) DO UPDATE SET + vector = EXCLUDED.vector, + metadata = EXCLUDED.metadata + "#, + ctx.collection + ); + + diesel::sql_query(&upsert_query) + .bind::(&v.id) + .bind::(&vector_str) + .bind::(&metadata_json) + .execute(&mut conn) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + } Ok(()) } async fn search( &self, - collection: &str, - _query: Vec, - _limit: usize, - options: SearchOptions, - ) -> VectorResult> { - let sql = self.search_sql(collection, options.include_vectors); - - tracing::debug!( - target: TRACING_TARGET, - collection = %collection, - "Would search with SQL: {}", - sql + ctx: &VectorContext, + query: Vec, + limit: usize, + options: VectorSearchOptions, + ) -> DataResult> { + let mut conn = self.get_conn().await?; + + let operator = self.distance_operator(); + let vector_str = format!( + "[{}]", + query + .iter() + .map(|f| f.to_string()) + .collect::>() + .join(",") ); - // In a real implementation, execute the query and parse results - Ok(vec![]) - } - - async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { - let sql = self.delete_sql(collection); - - tracing::debug!( - target: TRACING_TARGET, - collection = %collection, - count = %ids.len(), - "Would delete with SQL: {}", - sql - ); - - Ok(()) - } + let vector_column = if options.include_vectors { + ", vector::text as vector_data" + } else { + "" + }; - async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { - let sql = self.get_sql(collection); + // For cosine and inner product, we need to convert distance to similarity + let score_expr = match self.config.distance_metric { + PgVectorDistanceMetric::L2 => format!("vector {} $1::vector", operator), + PgVectorDistanceMetric::InnerProduct => format!("-(vector {} $1::vector)", operator), + PgVectorDistanceMetric::Cosine => format!("1 - (vector {} $1::vector)", operator), + }; - tracing::debug!( - target: TRACING_TARGET, - collection = %collection, - count = %ids.len(), - "Would get with SQL: {}", - sql + let search_query = format!( + r#" + SELECT id, {} as score{}, metadata::text as metadata_json + FROM {} + ORDER BY vector {} $1::vector + LIMIT $2 + "#, + score_expr, vector_column, ctx.collection, operator ); - // In a real implementation, execute the query and parse results - Ok(vec![]) + let results: Vec = diesel::sql_query(&search_query) + .bind::(&vector_str) + .bind::(limit as i32) + .load(&mut conn) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + let search_results = results + .into_iter() + .map(|row| { + let metadata: HashMap = + serde_json::from_str(&row.metadata_json).unwrap_or_default(); + + let vector = if options.include_vectors { + row.vector_data.and_then(|v| parse_vector(&v).ok()) + } else { + None + }; + + VectorSearchResult { + id: row.id, + score: row.score, + vector, + metadata, + } + }) + .collect(); + + Ok(search_results) } } -/// Helper to format a vector for PostgreSQL. -#[allow(dead_code)] -pub fn format_vector(v: &[f32]) -> String { - format!( - "[{}]", - v.iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ) -} - -/// Helper to parse a vector from PostgreSQL. -#[allow(dead_code)] -pub fn parse_vector(s: &str) -> VectorResult> { +/// Parse a vector string from PostgreSQL format. +fn parse_vector(s: &str) -> DataResult> { let trimmed = s.trim_start_matches('[').trim_end_matches(']'); trimmed .split(',') .map(|s| { s.trim() .parse::() - .map_err(|e| VectorError::serialization(e.to_string())) + .map_err(|e| DataError::serialization(e.to_string())) }) .collect() } -/// Helper to convert metadata to JSONB. -#[allow(dead_code)] -pub fn metadata_to_jsonb(metadata: &HashMap) -> String { - serde_json::to_string(metadata).unwrap_or_else(|_| "{}".to_string()) +#[derive(QueryableByName)] +struct SearchRow { + #[diesel(sql_type = Text)] + id: String, + #[diesel(sql_type = Float)] + score: f32, + #[diesel(sql_type = Text)] + metadata_json: String, + #[diesel(sql_type = diesel::sql_types::Nullable)] + vector_data: Option, } diff --git a/integrations/nvisy-vector/src/pinecone/backend.rs b/integrations/nvisy-vector/src/pinecone/backend.rs index f69cfb4..c764a4d 100644 --- a/integrations/nvisy-vector/src/pinecone/backend.rs +++ b/integrations/nvisy-vector/src/pinecone/backend.rs @@ -1,124 +1,230 @@ //! Pinecone backend implementation. -//! -//! This is a stub implementation. The Pinecone SDK API differs significantly -//! from the interface we designed. A full implementation would require -//! adapting to the actual pinecone-sdk API. + +use std::collections::{BTreeMap, HashMap}; use async_trait::async_trait; +use nvisy_data::{ + DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, + VectorSearchResult, +}; +use pinecone_sdk::models::{Kind, Metadata, Namespace, Value as PineconeValue, Vector}; +use pinecone_sdk::pinecone::PineconeClientConfig; +use pinecone_sdk::pinecone::data::Index; +use tokio::sync::Mutex; use super::PineconeConfig; use crate::TRACING_TARGET; -use crate::error::{VectorError, VectorResult}; -use crate::store::{SearchOptions, SearchResult, VectorData, VectorStoreBackend}; /// Pinecone backend implementation. pub struct PineconeBackend { - #[allow(dead_code)] + index: Mutex, config: PineconeConfig, } impl PineconeBackend { /// Creates a new Pinecone backend. - pub async fn new(config: &PineconeConfig) -> VectorResult { + pub async fn new(config: &PineconeConfig) -> DataResult { + let client_config = PineconeClientConfig { + api_key: Some(config.api_key.clone()), + ..Default::default() + }; + + let client = client_config + .client() + .map_err(|e| DataError::connection(e.to_string()))?; + + // Describe the index to get its host + let index_description = client + .describe_index(&config.index) + .await + .map_err(|e| DataError::connection(format!("Failed to describe index: {}", e)))?; + + // host is a String, not Option + let host = &index_description.host; + + // Connect to the index + let index = client + .index(host) + .await + .map_err(|e| DataError::connection(format!("Failed to connect to index: {}", e)))?; + tracing::debug!( target: TRACING_TARGET, - environment = %config.environment, index = %config.index, - "Pinecone backend initialized (stub implementation)" + "Connected to Pinecone" ); Ok(Self { + index: Mutex::new(index), config: config.clone(), }) } -} -#[async_trait] -impl VectorStoreBackend for PineconeBackend { - async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { - tracing::warn!( - target: TRACING_TARGET, - collection = %name, - dimensions = %dimensions, - "Pinecone create_collection is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Pinecone backend is not yet implemented", - )) + fn get_namespace(&self, collection: &str) -> Namespace { + if collection.is_empty() { + self.config + .namespace + .as_ref() + .map(|ns| Namespace::from(ns.as_str())) + .unwrap_or_default() + } else { + Namespace::from(collection) + } } - async fn delete_collection(&self, name: &str) -> VectorResult<()> { - tracing::warn!( - target: TRACING_TARGET, - collection = %name, - "Pinecone delete_collection is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Pinecone backend is not yet implemented", - )) + /// Convert Pinecone Metadata (prost_types::Struct) to HashMap + fn metadata_to_hashmap(metadata: Metadata) -> HashMap { + metadata + .fields + .into_iter() + .map(|(k, v)| (k, pinecone_value_to_json(v))) + .collect() } - async fn collection_exists(&self, name: &str) -> VectorResult { - tracing::warn!( - target: TRACING_TARGET, - collection = %name, - "Pinecone collection_exists is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Pinecone backend is not yet implemented", - )) + /// Convert HashMap to Pinecone Metadata (prost_types::Struct) + fn hashmap_to_metadata(map: HashMap) -> Metadata { + let fields: BTreeMap = map + .into_iter() + .map(|(k, v)| (k, json_to_pinecone_value(v))) + .collect(); + + Metadata { fields } } +} - async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { - tracing::warn!( - target: TRACING_TARGET, - collection = %collection, - count = %vectors.len(), - "Pinecone upsert is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Pinecone backend is not yet implemented", - )) +#[async_trait] +impl VectorOutput for PineconeBackend { + async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()> { + let namespace = self.get_namespace(&ctx.collection); + + let pinecone_vectors: Vec = vectors + .into_iter() + .map(|v| { + let metadata = if v.metadata.is_empty() { + None + } else { + Some(Self::hashmap_to_metadata(v.metadata)) + }; + + Vector { + id: v.id, + values: v.vector, + sparse_values: None, + metadata, + } + }) + .collect(); + + let mut index = self.index.lock().await; + index + .upsert(&pinecone_vectors, &namespace) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + Ok(()) } async fn search( &self, - collection: &str, - _query: Vec, - _limit: usize, - _options: SearchOptions, - ) -> VectorResult> { - tracing::warn!( - target: TRACING_TARGET, - collection = %collection, - "Pinecone search is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Pinecone backend is not yet implemented", - )) - } + ctx: &VectorContext, + query: Vec, + limit: usize, + options: VectorSearchOptions, + ) -> DataResult> { + let namespace = self.get_namespace(&ctx.collection); - async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { - tracing::warn!( - target: TRACING_TARGET, - collection = %collection, - count = %ids.len(), - "Pinecone delete is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Pinecone backend is not yet implemented", - )) + let filter: Option = options.filter.and_then(|f| { + if let serde_json::Value::Object(obj) = f { + let map: HashMap = obj.into_iter().collect(); + Some(Self::hashmap_to_metadata(map)) + } else { + None + } + }); + + let mut index = self.index.lock().await; + let response = index + .query_by_value( + query, + None, // sparse values + limit as u32, + &namespace, + filter, + Some(options.include_vectors), + Some(options.include_metadata), + ) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + let results = response + .matches + .into_iter() + .map(|m| { + let metadata = m + .metadata + .map(Self::metadata_to_hashmap) + .unwrap_or_default(); + + VectorSearchResult { + id: m.id, + score: m.score, + vector: Some(m.values), + metadata, + } + }) + .collect(); + + Ok(results) } +} - async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { - tracing::warn!( - target: TRACING_TARGET, - collection = %collection, - count = %ids.len(), - "Pinecone get is a stub - not yet implemented" - ); - Err(VectorError::backend( - "Pinecone backend is not yet implemented", - )) +/// Convert Pinecone Value (prost_types::Value) to serde_json::Value +fn pinecone_value_to_json(value: PineconeValue) -> serde_json::Value { + match value.kind { + Some(Kind::NullValue(_)) => serde_json::Value::Null, + Some(Kind::NumberValue(n)) => serde_json::Value::Number( + serde_json::Number::from_f64(n).unwrap_or(serde_json::Number::from(0)), + ), + Some(Kind::StringValue(s)) => serde_json::Value::String(s), + Some(Kind::BoolValue(b)) => serde_json::Value::Bool(b), + Some(Kind::StructValue(s)) => { + let map: serde_json::Map = s + .fields + .into_iter() + .map(|(k, v)| (k, pinecone_value_to_json(v))) + .collect(); + serde_json::Value::Object(map) + } + Some(Kind::ListValue(list)) => { + let arr: Vec = list + .values + .into_iter() + .map(pinecone_value_to_json) + .collect(); + serde_json::Value::Array(arr) + } + None => serde_json::Value::Null, } } + +/// Convert serde_json::Value to Pinecone Value (prost_types::Value) +fn json_to_pinecone_value(value: serde_json::Value) -> PineconeValue { + let kind = match value { + serde_json::Value::Null => Some(Kind::NullValue(0)), + serde_json::Value::Bool(b) => Some(Kind::BoolValue(b)), + serde_json::Value::Number(n) => Some(Kind::NumberValue(n.as_f64().unwrap_or(0.0))), + serde_json::Value::String(s) => Some(Kind::StringValue(s)), + serde_json::Value::Array(arr) => Some(Kind::ListValue(prost_types::ListValue { + values: arr.into_iter().map(json_to_pinecone_value).collect(), + })), + serde_json::Value::Object(obj) => { + let fields: BTreeMap = obj + .into_iter() + .map(|(k, v)| (k, json_to_pinecone_value(v))) + .collect(); + Some(Kind::StructValue(prost_types::Struct { fields })) + } + }; + + PineconeValue { kind } +} diff --git a/integrations/nvisy-vector/src/qdrant/backend.rs b/integrations/nvisy-vector/src/qdrant/backend.rs index cb92445..2dcd8c3 100644 --- a/integrations/nvisy-vector/src/qdrant/backend.rs +++ b/integrations/nvisy-vector/src/qdrant/backend.rs @@ -3,19 +3,21 @@ use std::collections::HashMap; use async_trait::async_trait; +use nvisy_data::{ + DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, + VectorSearchResult, +}; use qdrant_client::Qdrant; use qdrant_client::qdrant::vectors_config::Config as VectorsConfig; use qdrant_client::qdrant::with_payload_selector::SelectorOptions; use qdrant_client::qdrant::with_vectors_selector::SelectorOptions as VectorsSelectorOptions; use qdrant_client::qdrant::{ - Condition, CreateCollectionBuilder, DeletePointsBuilder, Distance, Filter, GetPointsBuilder, - PointId, PointStruct, SearchPointsBuilder, UpsertPointsBuilder, VectorParamsBuilder, + Condition, CreateCollectionBuilder, Distance, Filter, PointId, PointStruct, + SearchPointsBuilder, UpsertPointsBuilder, VectorParamsBuilder, }; use super::QdrantConfig; use crate::TRACING_TARGET; -use crate::error::{VectorError, VectorResult}; -use crate::store::{SearchOptions, SearchResult, VectorData, VectorStoreBackend}; /// Qdrant backend implementation. pub struct QdrantBackend { @@ -26,11 +28,11 @@ pub struct QdrantBackend { impl QdrantBackend { /// Creates a new Qdrant backend. - pub async fn new(config: &QdrantConfig) -> VectorResult { + pub async fn new(config: &QdrantConfig) -> DataResult { let client = Qdrant::from_url(&config.url) .api_key(config.api_key.clone()) .build() - .map_err(|e| VectorError::connection(e.to_string()))?; + .map_err(|e| DataError::connection(e.to_string()))?; tracing::debug!( target: TRACING_TARGET, @@ -44,6 +46,37 @@ impl QdrantBackend { }) } + /// Ensures a collection exists, creating it if necessary. + async fn ensure_collection(&self, name: &str, dimensions: usize) -> DataResult<()> { + let exists = self + .client + .collection_exists(name) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + if !exists { + let vectors_config = VectorsConfig::Params( + VectorParamsBuilder::new(dimensions as u64, Distance::Cosine).build(), + ); + + self.client + .create_collection( + CreateCollectionBuilder::new(name).vectors_config(vectors_config), + ) + .await + .map_err(|e| DataError::backend(e.to_string()))?; + + tracing::info!( + target: TRACING_TARGET, + collection = %name, + dimensions = %dimensions, + "Created Qdrant collection" + ); + } + + Ok(()) + } + /// Extracts vector data from Qdrant's VectorsOutput. fn extract_vector(vectors: Option) -> Option> { use qdrant_client::qdrant::vectors_output::VectorsOptions; @@ -72,53 +105,21 @@ impl QdrantBackend { } #[async_trait] -impl VectorStoreBackend for QdrantBackend { - async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { - let vectors_config = VectorsConfig::Params( - VectorParamsBuilder::new(dimensions as u64, Distance::Cosine).build(), - ); - - self.client - .create_collection(CreateCollectionBuilder::new(name).vectors_config(vectors_config)) - .await - .map_err(|e| VectorError::backend(e.to_string()))?; - - tracing::info!( - target: TRACING_TARGET, - collection = %name, - dimensions = %dimensions, - "Created Qdrant collection" - ); - - Ok(()) - } - - async fn delete_collection(&self, name: &str) -> VectorResult<()> { - self.client - .delete_collection(name) - .await - .map_err(|e| VectorError::backend(e.to_string()))?; - - tracing::info!( - target: TRACING_TARGET, - collection = %name, - "Deleted Qdrant collection" - ); - - Ok(()) - } +impl VectorOutput for QdrantBackend { + async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()> { + if vectors.is_empty() { + return Ok(()); + } - async fn collection_exists(&self, name: &str) -> VectorResult { - let exists = self - .client - .collection_exists(name) - .await - .map_err(|e| VectorError::backend(e.to_string()))?; + // Get dimensions from the first vector + let dimensions = vectors + .first() + .map(|v| v.vector.len()) + .ok_or_else(|| DataError::invalid("No vectors provided"))?; - Ok(exists) - } + // Ensure collection exists + self.ensure_collection(&ctx.collection, dimensions).await?; - async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { let points: Vec = vectors .into_iter() .map(|v| { @@ -133,21 +134,21 @@ impl VectorStoreBackend for QdrantBackend { .collect(); self.client - .upsert_points(UpsertPointsBuilder::new(collection, points)) + .upsert_points(UpsertPointsBuilder::new(&ctx.collection, points)) .await - .map_err(|e| VectorError::backend(e.to_string()))?; + .map_err(|e| DataError::backend(e.to_string()))?; Ok(()) } async fn search( &self, - collection: &str, + ctx: &VectorContext, query: Vec, limit: usize, - options: SearchOptions, - ) -> VectorResult> { - let mut search = SearchPointsBuilder::new(collection, query, limit as u64); + options: VectorSearchOptions, + ) -> DataResult> { + let mut search = SearchPointsBuilder::new(&ctx.collection, query, limit as u64); if options.include_vectors { search = search.with_vectors(VectorsSelectorOptions::Enable(true)); @@ -167,7 +168,7 @@ impl VectorStoreBackend for QdrantBackend { .client .search_points(search) .await - .map_err(|e| VectorError::backend(e.to_string()))?; + .map_err(|e| DataError::backend(e.to_string()))?; let results = response .result @@ -182,7 +183,7 @@ impl VectorStoreBackend for QdrantBackend { .map(|(k, v)| (k, qdrant_value_to_json(v))) .collect(); - SearchResult { + VectorSearchResult { id, score: point.score, vector, @@ -193,54 +194,6 @@ impl VectorStoreBackend for QdrantBackend { Ok(results) } - - async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { - let point_ids: Vec = ids.into_iter().map(PointId::from).collect(); - - self.client - .delete_points(DeletePointsBuilder::new(collection).points(point_ids)) - .await - .map_err(|e| VectorError::backend(e.to_string()))?; - - Ok(()) - } - - async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { - let point_ids: Vec = ids.into_iter().map(PointId::from).collect(); - - let response = self - .client - .get_points( - GetPointsBuilder::new(collection, point_ids) - .with_vectors(VectorsSelectorOptions::Enable(true)) - .with_payload(SelectorOptions::Enable(true)), - ) - .await - .map_err(|e| VectorError::backend(e.to_string()))?; - - let results = response - .result - .into_iter() - .filter_map(|point| { - let id = Self::extract_point_id(point.id)?; - let vector = Self::extract_vector(point.vectors)?; - - let metadata: HashMap = point - .payload - .into_iter() - .map(|(k, v)| (k, qdrant_value_to_json(v))) - .collect(); - - Some(VectorData { - id, - vector, - metadata, - }) - }) - .collect(); - - Ok(results) - } } /// Converts JSON value to Qdrant value. @@ -306,7 +259,6 @@ fn qdrant_value_to_json(value: qdrant_client::qdrant::Value) -> serde_json::Valu /// Parses a JSON filter into Qdrant conditions. fn parse_filter(filter: &serde_json::Value) -> Option> { - // Simple filter parsing - can be extended for more complex queries if let serde_json::Value::Object(obj) = filter { let conditions: Vec = obj .iter() diff --git a/integrations/nvisy-vector/src/store.rs b/integrations/nvisy-vector/src/store.rs index 84772b9..b136f5a 100644 --- a/integrations/nvisy-vector/src/store.rs +++ b/integrations/nvisy-vector/src/store.rs @@ -1,158 +1,27 @@ -//! Vector store trait and implementations. +//! Vector store wrapper and unified API. -use std::collections::HashMap; - -use async_trait::async_trait; -use serde::{Deserialize, Serialize}; +use nvisy_data::{ + DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, VectorSearchResult, +}; use crate::TRACING_TARGET; use crate::config::VectorStoreConfig; -use crate::error::VectorResult; use crate::milvus::MilvusBackend; use crate::pgvector::PgVectorBackend; use crate::pinecone::PineconeBackend; use crate::qdrant::QdrantBackend; -/// Vector data to be stored. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct VectorData { - /// Unique identifier for the vector. - pub id: String, - /// The embedding vector. - pub vector: Vec, - /// Optional metadata. - #[serde(default, skip_serializing_if = "HashMap::is_empty")] - pub metadata: HashMap, -} - -impl VectorData { - /// Creates a new vector data with an ID and embedding. - pub fn new(id: impl Into, vector: Vec) -> Self { - Self { - id: id.into(), - vector, - metadata: HashMap::new(), - } - } - - /// Adds metadata to the vector. - pub fn with_metadata( - mut self, - metadata: impl IntoIterator, serde_json::Value)>, - ) -> Self { - self.metadata = metadata.into_iter().map(|(k, v)| (k.into(), v)).collect(); - self - } - - /// Adds a single metadata field. - pub fn with_field(mut self, key: impl Into, value: serde_json::Value) -> Self { - self.metadata.insert(key.into(), value); - self - } -} - -/// Search result from a vector query. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SearchResult { - /// Vector ID. - pub id: String, - /// Similarity score. - pub score: f32, - /// The vector (if requested). - #[serde(skip_serializing_if = "Option::is_none")] - pub vector: Option>, - /// Associated metadata. - #[serde(default, skip_serializing_if = "HashMap::is_empty")] - pub metadata: HashMap, -} - -/// Search options. -#[derive(Debug, Clone, Default)] -pub struct SearchOptions { - /// Include vectors in results. - pub include_vectors: bool, - /// Include metadata in results. - pub include_metadata: bool, - /// Metadata filter (backend-specific JSON). - pub filter: Option, - /// Namespace/partition (for backends that support it). - pub namespace: Option, -} - -impl SearchOptions { - /// Creates default search options. - pub fn new() -> Self { - Self::default() - } - - /// Include vectors in results. - pub fn with_vectors(mut self) -> Self { - self.include_vectors = true; - self - } - - /// Include metadata in results. - pub fn with_metadata(mut self) -> Self { - self.include_metadata = true; - self - } - - /// Set a metadata filter. - pub fn with_filter(mut self, filter: serde_json::Value) -> Self { - self.filter = Some(filter); - self - } - - /// Set the namespace. - pub fn with_namespace(mut self, namespace: impl Into) -> Self { - self.namespace = Some(namespace.into()); - self - } -} - -/// Trait for vector store backends. -#[async_trait] -pub trait VectorStoreBackend: Send + Sync { - /// Creates or ensures a collection exists. - async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()>; - - /// Deletes a collection. - async fn delete_collection(&self, name: &str) -> VectorResult<()>; - - /// Checks if a collection exists. - async fn collection_exists(&self, name: &str) -> VectorResult; - - /// Upserts vectors into a collection. - async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()>; - - /// Searches for similar vectors. - async fn search( - &self, - collection: &str, - query: Vec, - limit: usize, - options: SearchOptions, - ) -> VectorResult>; - - /// Deletes vectors by their IDs. - async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()>; - - /// Gets vectors by their IDs. - async fn get(&self, collection: &str, ids: Vec) -> VectorResult>; -} - /// Unified vector store that wraps backend implementations. pub struct VectorStore { #[allow(dead_code)] config: VectorStoreConfig, - #[allow(dead_code)] - backend: Box, + backend: Box, } impl VectorStore { /// Creates a new vector store from configuration. - pub async fn new(config: VectorStoreConfig) -> VectorResult { - let backend: Box = match &config { + pub async fn new(config: VectorStoreConfig) -> DataResult { + let backend: Box = match &config { VectorStoreConfig::Qdrant(cfg) => Box::new(QdrantBackend::new(cfg).await?), VectorStoreConfig::Milvus(cfg) => Box::new(MilvusBackend::new(cfg).await?), VectorStoreConfig::Pinecone(cfg) => Box::new(PineconeBackend::new(cfg).await?), @@ -168,41 +37,17 @@ impl VectorStore { Ok(Self { config, backend }) } - /// Creates or ensures a collection exists. - pub async fn create_collection(&self, name: &str, dimensions: usize) -> VectorResult<()> { - tracing::debug!( - target: TRACING_TARGET, - collection = %name, - dimensions = %dimensions, - "Creating collection" - ); - self.backend.create_collection(name, dimensions).await - } - - /// Deletes a collection. - pub async fn delete_collection(&self, name: &str) -> VectorResult<()> { - tracing::debug!( - target: TRACING_TARGET, - collection = %name, - "Deleting collection" - ); - self.backend.delete_collection(name).await - } - - /// Checks if a collection exists. - pub async fn collection_exists(&self, name: &str) -> VectorResult { - self.backend.collection_exists(name).await - } - - /// Upserts vectors into a collection. - pub async fn upsert(&self, collection: &str, vectors: Vec) -> VectorResult<()> { + /// Inserts vectors into a collection. + pub async fn insert(&self, collection: &str, vectors: Vec) -> DataResult<()> { tracing::debug!( target: TRACING_TARGET, collection = %collection, count = %vectors.len(), - "Upserting vectors" + "Inserting vectors" ); - self.backend.upsert(collection, vectors).await + + let ctx = VectorContext::new(collection); + self.backend.insert(&ctx, vectors).await } /// Searches for similar vectors. @@ -211,8 +56,8 @@ impl VectorStore { collection: &str, query: Vec, limit: usize, - ) -> VectorResult> { - self.search_with_options(collection, query, limit, SearchOptions::default()) + ) -> DataResult> { + self.search_with_options(collection, query, limit, VectorSearchOptions::default()) .await } @@ -222,37 +67,22 @@ impl VectorStore { collection: &str, query: Vec, limit: usize, - options: SearchOptions, - ) -> VectorResult> { + options: VectorSearchOptions, + ) -> DataResult> { tracing::debug!( target: TRACING_TARGET, collection = %collection, limit = %limit, "Searching vectors" ); - self.backend.search(collection, query, limit, options).await - } - /// Deletes vectors by their IDs. - pub async fn delete(&self, collection: &str, ids: Vec) -> VectorResult<()> { - tracing::debug!( - target: TRACING_TARGET, - collection = %collection, - count = %ids.len(), - "Deleting vectors" - ); - self.backend.delete(collection, ids).await + let ctx = VectorContext::new(collection); + self.backend.search(&ctx, query, limit, options).await } - /// Gets vectors by their IDs. - pub async fn get(&self, collection: &str, ids: Vec) -> VectorResult> { - tracing::debug!( - target: TRACING_TARGET, - collection = %collection, - count = %ids.len(), - "Getting vectors" - ); - self.backend.get(collection, ids).await + /// Returns a reference to the underlying backend. + pub fn backend(&self) -> &dyn VectorOutput { + self.backend.as_ref() } } From c490e92769570af42d03e61d4e82eaf8a04c5633 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Tue, 20 Jan 2026 07:21:51 +0100 Subject: [PATCH 08/28] feat(nats): refactor kv, object, stream modules with marker trait patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stream module: - Rename publisher.rs → stream_pub.rs, subscriber.rs → stream_sub.rs - Merge file_job.rs into event.rs - Create EventStream trait with FileStream and WebhookStream implementations - Make EventPublisher and EventSubscriber generic over stream type - Remove EventPriority enum and priority field from FileJob - Remove with_consumer_name and with_filter from EventSubscriber - Add WebhookStream for webhook delivery (24h TTL) Object module: - Create ObjectBucket trait with NAME, DESCRIPTION, MAX_SIZE constants - Create ObjectKey trait with PREFIX constant - Make ObjectStore generic over bucket and key types - Consolidate FileKey and AccountKey into object_key.rs - Add convenience methods: file_store, intermediates_store, thumbnail_store, avatar_store KV module: - Create KvBucket trait with NAME, DESCRIPTION, TTL constants - Create KvKey trait for key types (SessionKey, TokenKey) - Make KvStore generic over key, value, and bucket - Delete ApiTokenStore, ChatHistoryStore, CacheStore wrappers - Add with_ttl constructor for runtime TTL override NatsClient: - Group methods into impl blocks: core, kv, object, stream - Add typed convenience getters for all store types - Remove unused dependencies: async-stream, strum, bytes Webhook implementation: - Add WebhookWorker for consuming and delivering webhooks - Add WebhookEmitter service for publishing webhook events - Update nvisy-server to use new webhook infrastructure --- Cargo.lock | 4 +- crates/nvisy-cli/Cargo.toml | 1 + crates/nvisy-cli/src/main.rs | 30 +- crates/nvisy-nats/Cargo.toml | 20 +- crates/nvisy-nats/src/client/mod.rs | 2 +- crates/nvisy-nats/src/client/nats_client.rs | 273 +++++----- crates/nvisy-nats/src/kv/api_token.rs | 46 +- crates/nvisy-nats/src/kv/api_token_store.rs | 355 ------------- crates/nvisy-nats/src/kv/cache.rs | 324 ------------ crates/nvisy-nats/src/kv/chat_history.rs | 156 ------ crates/nvisy-nats/src/kv/kv_bucket.rs | 59 +++ crates/nvisy-nats/src/kv/kv_key.rs | 92 ++++ crates/nvisy-nats/src/kv/kv_store.rs | 337 +++++++++++++ crates/nvisy-nats/src/kv/mod.rs | 41 +- crates/nvisy-nats/src/kv/store.rs | 433 ---------------- crates/nvisy-nats/src/lib.rs | 2 +- crates/nvisy-nats/src/object/avatar_bucket.rs | 9 - crates/nvisy-nats/src/object/avatar_key.rs | 91 ---- crates/nvisy-nats/src/object/avatar_store.rs | 59 --- .../nvisy-nats/src/object/document_bucket.rs | 58 --- crates/nvisy-nats/src/object/document_key.rs | 182 ------- .../nvisy-nats/src/object/document_store.rs | 73 --- crates/nvisy-nats/src/object/mod.rs | 42 +- crates/nvisy-nats/src/object/object_bucket.rs | 84 ++++ crates/nvisy-nats/src/object/object_data.rs | 18 +- crates/nvisy-nats/src/object/object_key.rs | 282 +++++++++++ crates/nvisy-nats/src/object/object_store.rs | 122 ++--- .../nvisy-nats/src/object/thumbnail_bucket.rs | 9 - .../nvisy-nats/src/object/thumbnail_store.rs | 59 --- crates/nvisy-nats/src/stream/document_job.rs | 470 ------------------ .../nvisy-nats/src/stream/document_job_pub.rs | 57 --- .../nvisy-nats/src/stream/document_job_sub.rs | 66 --- crates/nvisy-nats/src/stream/document_task.rs | 261 ---------- crates/nvisy-nats/src/stream/event.rs | 130 +++-- crates/nvisy-nats/src/stream/event_pub.rs | 76 +++ crates/nvisy-nats/src/stream/event_stream.rs | 74 +++ crates/nvisy-nats/src/stream/event_sub.rs | 63 +++ crates/nvisy-nats/src/stream/mod.rs | 33 +- .../stream/{publisher.rs => stream_pub.rs} | 2 +- .../stream/{subscriber.rs => stream_sub.rs} | 38 +- .../src/model/workspace_webhook.rs | 2 + .../src/query/workspace_webhook.rs | 47 +- crates/nvisy-postgres/src/schema.rs | 1 + .../src/types/enums/webhook_event.rs | 22 + crates/nvisy-server/src/error.rs | 6 + crates/nvisy-server/src/handler/files.rs | 131 +++-- crates/nvisy-server/src/handler/members.rs | 48 +- .../src/handler/response/webhooks.rs | 29 ++ crates/nvisy-server/src/handler/webhooks.rs | 20 +- crates/nvisy-server/src/lib.rs | 1 + crates/nvisy-server/src/service/mod.rs | 7 + .../src/service/webhook/emitter.rs | 440 ++++++++++++++++ .../nvisy-server/src/service/webhook/mod.rs | 7 + crates/nvisy-server/src/worker/mod.rs | 5 + crates/nvisy-server/src/worker/webhook.rs | 178 +++++++ crates/nvisy-webhook/src/request.rs | 16 +- crates/nvisy-webhook/src/reqwest/client.rs | 7 + docker/README.md | 52 -- docker/docker-compose.dev.yml | 58 --- integrations/nvisy-rig/src/rag/mod.rs | 8 +- .../nvisy-rig/src/rag/searcher/mod.rs | 8 +- integrations/nvisy-rig/src/session/store.rs | 37 +- .../nvisy-vector/src/pgvector/backend.rs | 3 +- 63 files changed, 2405 insertions(+), 3261 deletions(-) delete mode 100644 crates/nvisy-nats/src/kv/api_token_store.rs delete mode 100644 crates/nvisy-nats/src/kv/cache.rs delete mode 100644 crates/nvisy-nats/src/kv/chat_history.rs create mode 100644 crates/nvisy-nats/src/kv/kv_bucket.rs create mode 100644 crates/nvisy-nats/src/kv/kv_key.rs create mode 100644 crates/nvisy-nats/src/kv/kv_store.rs delete mode 100644 crates/nvisy-nats/src/kv/store.rs delete mode 100644 crates/nvisy-nats/src/object/avatar_bucket.rs delete mode 100644 crates/nvisy-nats/src/object/avatar_key.rs delete mode 100644 crates/nvisy-nats/src/object/avatar_store.rs delete mode 100644 crates/nvisy-nats/src/object/document_bucket.rs delete mode 100644 crates/nvisy-nats/src/object/document_key.rs delete mode 100644 crates/nvisy-nats/src/object/document_store.rs create mode 100644 crates/nvisy-nats/src/object/object_bucket.rs create mode 100644 crates/nvisy-nats/src/object/object_key.rs delete mode 100644 crates/nvisy-nats/src/object/thumbnail_bucket.rs delete mode 100644 crates/nvisy-nats/src/object/thumbnail_store.rs delete mode 100644 crates/nvisy-nats/src/stream/document_job.rs delete mode 100644 crates/nvisy-nats/src/stream/document_job_pub.rs delete mode 100644 crates/nvisy-nats/src/stream/document_job_sub.rs delete mode 100644 crates/nvisy-nats/src/stream/document_task.rs create mode 100644 crates/nvisy-nats/src/stream/event_pub.rs create mode 100644 crates/nvisy-nats/src/stream/event_stream.rs create mode 100644 crates/nvisy-nats/src/stream/event_sub.rs rename crates/nvisy-nats/src/stream/{publisher.rs => stream_pub.rs} (98%) rename crates/nvisy-nats/src/stream/{subscriber.rs => stream_sub.rs} (93%) create mode 100644 crates/nvisy-server/src/service/webhook/emitter.rs create mode 100644 crates/nvisy-server/src/service/webhook/mod.rs create mode 100644 crates/nvisy-server/src/worker/mod.rs create mode 100644 crates/nvisy-server/src/worker/webhook.rs diff --git a/Cargo.lock b/Cargo.lock index 75d3f39..27da2ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3191,6 +3191,7 @@ dependencies = [ "serde", "thiserror 2.0.18", "tokio", + "tokio-util", "tracing", "tracing-subscriber", ] @@ -3225,9 +3226,7 @@ name = "nvisy-nats" version = "0.1.0" dependencies = [ "async-nats", - "async-stream", "base64 0.22.1", - "bytes", "clap", "derive_more", "futures", @@ -3238,7 +3237,6 @@ dependencies = [ "serde", "serde_json", "sha2", - "strum 0.27.2", "thiserror 2.0.18", "tokio", "tracing", diff --git a/crates/nvisy-cli/Cargo.toml b/crates/nvisy-cli/Cargo.toml index cac19eb..94afff5 100644 --- a/crates/nvisy-cli/Cargo.toml +++ b/crates/nvisy-cli/Cargo.toml @@ -43,6 +43,7 @@ nvisy-server = { workspace = true, features = ["config"] } # Async runtime, environment and CLI. tokio = { workspace = true, features = ["rt-multi-thread", "macros", "signal"] } +tokio-util = { workspace = true, features = [] } clap = { workspace = true, features = ["derive", "env"] } dotenvy = { workspace = true, features = [], optional = true } diff --git a/crates/nvisy-cli/src/main.rs b/crates/nvisy-cli/src/main.rs index acb0829..10c0824 100644 --- a/crates/nvisy-cli/src/main.rs +++ b/crates/nvisy-cli/src/main.rs @@ -11,6 +11,8 @@ use axum::Router; use nvisy_server::handler::{CustomRoutes, routes}; use nvisy_server::middleware::*; use nvisy_server::service::ServiceState; +use nvisy_server::worker::WebhookWorker; +use tokio_util::sync::CancellationToken; use crate::config::{Cli, MiddlewareConfig}; use crate::server::TRACING_TARGET_SHUTDOWN; @@ -45,10 +47,34 @@ async fn run() -> anyhow::Result<()> { let state = cli.service_state().await?; // Build router - let router = create_router(state, &cli.middleware); + let router = create_router(state.clone(), &cli.middleware); + + // Create cancellation token for graceful shutdown of workers + let cancel = CancellationToken::new(); + + // Spawn webhook worker (logs lifecycle events internally) + let webhook_worker = WebhookWorker::new(state.nats.clone(), state.webhook.clone()); + let worker_cancel = cancel.clone(); + let worker_handle = tokio::spawn(async move { + let _ = webhook_worker.run(worker_cancel).await; + }); // Run the HTTP server - server::serve(router, cli.server).await?; + let server_result = server::serve(router, cli.server).await; + + // Signal workers to stop + cancel.cancel(); + + // Wait for worker to finish + if let Err(err) = worker_handle.await { + tracing::error!( + target: TRACING_TARGET_SHUTDOWN, + error = %err, + "Webhook worker task panicked" + ); + } + + server_result?; Ok(()) } diff --git a/crates/nvisy-nats/Cargo.toml b/crates/nvisy-nats/Cargo.toml index d68c982..7e24721 100644 --- a/crates/nvisy-nats/Cargo.toml +++ b/crates/nvisy-nats/Cargo.toml @@ -22,6 +22,9 @@ rustdoc-args = ["--cfg", "docsrs"] # Default feature set (none for minimal dependencies) default = [] +# JSON Schema support: enables JsonSchema derives on Nats types +# This allows Nats types to be used directly in API documentation without +# manual schema implementations, while keeping the dependency optional schema = ["dep:schemars"] # CLI configuration support: enables clap derives for config types @@ -34,8 +37,8 @@ async-nats = { workspace = true } # Async runtime tokio = { workspace = true, features = ["rt-multi-thread", "macros", "time"] } +pin-project-lite = { workspace = true, features = [] } futures = { workspace = true, features = [] } -async-stream = { workspace = true, features = [] } # Observability tracing = { workspace = true, features = [] } @@ -44,28 +47,21 @@ tracing = { workspace = true, features = [] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = [] } -# Error handling -thiserror = { workspace = true, features = [] } - # Derive macros & utilities +thiserror = { workspace = true, features = [] } derive_more = { workspace = true, features = ["deref", "deref_mut", "from", "into", "display"] } -strum = { workspace = true, features = [] } # Data types uuid = { workspace = true, features = ["serde", "v4", "v7"] } jiff = { workspace = true, features = ["serde"] } -bytes = { workspace = true, features = [] } # Cryptography sha2 = { workspace = true, features = [] } hex = { workspace = true, features = [] } +base64 = { workspace = true, features = [] } -# Utilities -pin-project-lite = { workspace = true, features = [] } - -# Optional: JSON schema generation +# JSON schema generation (Optional) schemars = { workspace = true, features = [], optional = true } -# CLI (optional) +# CLI (Optional) clap = { workspace = true, features = ["derive", "env"], optional = true } -base64.workspace = true diff --git a/crates/nvisy-nats/src/client/mod.rs b/crates/nvisy-nats/src/client/mod.rs index 05526a5..0d2c51b 100644 --- a/crates/nvisy-nats/src/client/mod.rs +++ b/crates/nvisy-nats/src/client/mod.rs @@ -3,5 +3,5 @@ mod nats_client; mod nats_config; -pub use nats_client::{NatsClient, NatsConnection}; +pub use nats_client::NatsClient; pub use nats_config::NatsConfig; diff --git a/crates/nvisy-nats/src/client/nats_client.rs b/crates/nvisy-nats/src/client/nats_client.rs index 0493f1c..64f3e58 100644 --- a/crates/nvisy-nats/src/client/nats_client.rs +++ b/crates/nvisy-nats/src/client/nats_client.rs @@ -33,13 +33,19 @@ use std::sync::Arc; use std::time::Duration; use async_nats::{Client, ConnectOptions, jetstream}; -use bytes::Bytes; +use serde::Serialize; +use serde::de::DeserializeOwned; use tokio::time::timeout; use super::nats_config::NatsConfig; -use crate::kv::{ApiTokenStore, CacheStore, ChatHistoryStore}; -use crate::object::{DocumentBucket, DocumentStore}; -use crate::stream::{DocumentJobPublisher, DocumentJobSubscriber, Stage}; +use crate::kv::{ + ApiToken, ApiTokensBucket, ChatHistoryBucket, KvBucket, KvKey, KvStore, SessionKey, TokenKey, +}; +use crate::object::{ + AccountKey, AvatarsBucket, FileKey, FilesBucket, IntermediatesBucket, ObjectBucket, ObjectKey, + ObjectStore, ThumbnailsBucket, +}; +use crate::stream::{EventPublisher, EventStream, EventSubscriber, FileStream, WebhookStream}; use crate::{Error, Result, TRACING_TARGET_CLIENT, TRACING_TARGET_CONNECTION}; /// NATS client wrapper with connection management. @@ -157,196 +163,155 @@ impl NatsClient { async_nats::connection::State::Connected ) } +} - /// Get or create an ApiTokenStore +// Key-value store getters +impl NatsClient { + /// Get or create a KV store for the specified key, value, and bucket types. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn api_token_store(&self, ttl: Option) -> Result { - ApiTokenStore::new(&self.inner.jetstream, ttl).await + pub async fn kv_store(&self) -> Result> + where + K: KvKey, + V: Serialize + DeserializeOwned + Send + Sync + 'static, + B: KvBucket, + { + KvStore::new(&self.inner.jetstream).await } - /// Get or create a document store for the specified bucket type. + /// Get or create a KV store with custom TTL. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn document_store(&self) -> Result> { - DocumentStore::new(&self.inner.jetstream).await + pub async fn kv_store_with_ttl(&self, ttl: Duration) -> Result> + where + K: KvKey, + V: Serialize + DeserializeOwned + Send + Sync + 'static, + B: KvBucket, + { + KvStore::with_ttl(&self.inner.jetstream, ttl).await } - /// Create a document job publisher for a specific stage. + /// Get or create an API token store. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn document_job_publisher(&self) -> Result> { - DocumentJobPublisher::new(&self.inner.jetstream).await + pub async fn api_token_store( + &self, + ttl: Duration, + ) -> Result> { + self.kv_store_with_ttl(ttl).await } - /// Create a document job subscriber for a specific stage. + /// Get or create a chat history store with default TTL. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn document_job_subscriber( - &self, - consumer_name: &str, - ) -> Result> { - DocumentJobSubscriber::new(&self.inner.jetstream, consumer_name).await + pub async fn chat_history_store(&self) -> Result> + where + V: Serialize + DeserializeOwned + Send + Sync + 'static, + { + self.kv_store().await } - /// Get or create a CacheStore for a specific namespace + /// Get or create a chat history store with custom TTL. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn cache_store( + pub async fn chat_history_store_with_ttl( &self, - namespace: &str, - ttl: Option, - ) -> Result> + ttl: Duration, + ) -> Result> where - T: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone + Send + Sync + 'static, + V: Serialize + DeserializeOwned + Send + Sync + 'static, { - CacheStore::new(&self.inner.jetstream, namespace, ttl).await + self.kv_store_with_ttl(ttl).await } +} - /// Get or create a ChatHistoryStore for ephemeral sessions. +// Object store getters +impl NatsClient { + /// Get or create an object store for the specified bucket and key types. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn chat_history_store(&self, ttl: Option) -> Result> + pub async fn object_store(&self) -> Result> where - T: serde::Serialize + for<'de> serde::Deserialize<'de> + Send + Sync + 'static, + B: ObjectBucket, + K: ObjectKey, { - match ttl { - Some(ttl) => ChatHistoryStore::with_ttl(&self.inner.jetstream, ttl).await, - None => ChatHistoryStore::new(&self.inner.jetstream).await, - } + ObjectStore::new(&self.inner.jetstream).await } -} -/// A NATS connection wrapper for basic pub/sub operations -#[derive(Debug, Clone)] -pub struct NatsConnection { - client: Client, - request_timeout: Duration, -} - -impl NatsConnection { - /// Publish a message to a subject - #[tracing::instrument(skip(self, payload))] - pub async fn publish(&self, subject: &str, payload: impl Into) -> Result<()> { - timeout( - self.request_timeout, - self.client.publish(subject.to_string(), payload.into()), - ) - .await - .map_err(|_| Error::Timeout { - timeout: self.request_timeout, - })? - .map_err(|e| Error::delivery_failed(subject, e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - "Published message" - ); - Ok(()) + /// Get or create a file store for primary file storage. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn file_store(&self) -> Result> { + self.object_store().await } - /// Publish a message with a reply subject - #[tracing::instrument(skip(self, payload), target = TRACING_TARGET_CLIENT)] - pub async fn publish_with_reply( - &self, - subject: &str, - reply: &str, - payload: impl Into, - ) -> Result<()> { - timeout( - self.request_timeout, - self.client - .publish_with_reply(subject.to_string(), reply.to_string(), payload.into()), - ) - .await - .map_err(|_| Error::Timeout { - timeout: self.request_timeout, - })? - .map_err(|e| Error::delivery_failed(subject, e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - reply = %reply, - "Published message with reply" - ); - Ok(()) + /// Get or create an intermediates store for temporary processing artifacts. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn intermediates_store(&self) -> Result> { + self.object_store().await } - /// Send a request and wait for a response - #[tracing::instrument(skip(self, payload), target = TRACING_TARGET_CLIENT)] - pub async fn request( - &self, - subject: &str, - payload: impl Into, - ) -> Result { - let response = timeout( - self.request_timeout, - self.client.request(subject.to_string(), payload.into()), - ) - .await - .map_err(|_| Error::Timeout { - timeout: self.request_timeout, - })? - .map_err(|e| Error::delivery_failed(subject, e.to_string()))?; + /// Get or create a thumbnail store for document thumbnails. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn thumbnail_store(&self) -> Result> { + self.object_store().await + } - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - payload_size = response.payload.len(), - "Received response for request" - ); - Ok(response) + /// Get or create an avatar store for account avatars. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn avatar_store(&self) -> Result> { + self.object_store().await } +} - /// Subscribe to a subject +// Stream getters +impl NatsClient { + /// Create an event publisher for the specified stream type. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn subscribe(&self, subject: &str) -> Result { - let subscriber = self - .client - .subscribe(subject.to_string()) - .await - .map_err(|e| Error::Connection(Box::new(e)))?; + pub async fn event_publisher(&self) -> Result> + where + T: Serialize + Send + Sync + 'static, + S: EventStream, + { + EventPublisher::new(&self.inner.jetstream).await + } - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - "Subscribed to subject" - ); - Ok(subscriber) + /// Create an event subscriber for the specified stream type. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn event_subscriber(&self) -> Result> + where + T: DeserializeOwned + Send + Sync + 'static, + S: EventStream, + { + EventSubscriber::new(&self.inner.jetstream).await } - /// Subscribe to a subject with a queue group + /// Create a file job publisher. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn queue_subscribe( - &self, - subject: &str, - queue: &str, - ) -> Result { - let subscriber = self - .client - .queue_subscribe(subject.to_string(), queue.to_string()) - .await - .map_err(|e| Error::Connection(Box::new(e)))?; + pub async fn file_publisher(&self) -> Result> + where + T: Serialize + Send + Sync + 'static, + { + self.event_publisher().await + } - tracing::debug!( - target: TRACING_TARGET_CLIENT, - subject = %subject, - queue = %queue, - "Subscribed to subject with queue group" - ); - Ok(subscriber) + /// Create a file job subscriber. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn file_subscriber(&self) -> Result> + where + T: DeserializeOwned + Send + Sync + 'static, + { + self.event_subscriber().await } - /// Flush pending messages + /// Create a webhook publisher. #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] - pub async fn flush(&self) -> Result<()> { - timeout(self.request_timeout, self.client.flush()) - .await - .map_err(|_| Error::Timeout { - timeout: self.request_timeout, - })? - .map_err(|e| Error::Connection(Box::new(e)))?; + pub async fn webhook_publisher(&self) -> Result> + where + T: Serialize + Send + Sync + 'static, + { + self.event_publisher().await + } - tracing::debug!( - target: TRACING_TARGET_CLIENT, - "Flushed pending messages" - ); - Ok(()) + /// Create a webhook subscriber. + #[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)] + pub async fn webhook_subscriber(&self) -> Result> + where + T: DeserializeOwned + Send + Sync + 'static, + { + self.event_subscriber().await } } diff --git a/crates/nvisy-nats/src/kv/api_token.rs b/crates/nvisy-nats/src/kv/api_token.rs index 6d848dd..6c82dd0 100644 --- a/crates/nvisy-nats/src/kv/api_token.rs +++ b/crates/nvisy-nats/src/kv/api_token.rs @@ -1,4 +1,4 @@ -//! API authentication token data structure. +//! API authentication token type. use std::time::Duration; @@ -20,8 +20,6 @@ pub enum ApiTokenType { } /// API authentication token data structure. -/// -/// Simplified token model for session management. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct ApiToken { /// Unique token identifier used for authentication @@ -166,8 +164,6 @@ mod tests { fn test_token_expiry() { let now = Timestamp::now(); let mut token = create_test_token(); - - // Set token to expire in the past token.expired_at = now .checked_sub(jiff::SignedDuration::from_secs(3600)) .unwrap(); @@ -184,7 +180,6 @@ mod tests { assert!(!token.is_valid()); assert!(token.is_deleted()); - assert!(token.deleted_at.is_some()); } #[test] @@ -197,41 +192,6 @@ mod tests { assert!(!token.can_be_refreshed()); } - #[test] - fn test_token_touch() { - let mut token = create_test_token(); - let original_last_used = token.last_used_at; - - // Small delay to ensure timestamp difference - std::thread::sleep(std::time::Duration::from_millis(10)); - - token.touch(); - assert!(token.last_used_at > original_last_used); - } - - #[test] - fn test_token_short_display() { - let token = create_test_token(); - let short_access = token.access_seq_short(); - - assert_eq!(short_access.len(), 11); // 8 chars + "..." - assert!(short_access.ends_with("...")); - } - - #[test] - fn test_is_expiring_soon() { - let now = Timestamp::now(); - let mut token = create_test_token(); - - // Set expiry to 10 minutes from now - token.expired_at = now - .checked_add(jiff::SignedDuration::from_secs(600)) - .unwrap(); - - assert!(token.is_expiring_soon(15)); // Within 15 minutes - assert!(!token.is_expiring_soon(5)); // Not within 5 minutes - } - #[test] fn test_api_token_type_serialization() { let web = ApiTokenType::Web; @@ -241,9 +201,5 @@ mod tests { let api = ApiTokenType::Api; let serialized = serde_json::to_string(&api).unwrap(); assert_eq!(serialized, "\"api\""); - - let cli = ApiTokenType::Cli; - let serialized = serde_json::to_string(&cli).unwrap(); - assert_eq!(serialized, "\"cli\""); } } diff --git a/crates/nvisy-nats/src/kv/api_token_store.rs b/crates/nvisy-nats/src/kv/api_token_store.rs deleted file mode 100644 index b00e715..0000000 --- a/crates/nvisy-nats/src/kv/api_token_store.rs +++ /dev/null @@ -1,355 +0,0 @@ -//! API token store operations using NATS KV. - -use std::time::Duration; - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; -use jiff::Timestamp; -use uuid::Uuid; - -use crate::kv::KvStore; -use crate::kv::api_token::{ApiToken, ApiTokenType}; -use crate::{Result, TRACING_TARGET_KV}; - -/// API token store for authentication token management. -/// -/// Provides operations for creating, retrieving, updating, and managing -/// API authentication tokens with automatic expiry handling. -#[derive(Deref, DerefMut)] -pub struct ApiTokenStore { - #[deref] - #[deref_mut] - store: KvStore, - default_ttl: Duration, -} - -impl ApiTokenStore { - /// Create a new API token store. - /// - /// # Arguments - /// * `jetstream` - JetStream context for NATS operations - /// * `ttl` - Default time-to-live for tokens (defaults to 24 hours) - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn new(jetstream: &jetstream::Context, ttl: Option) -> Result { - let default_ttl = ttl.unwrap_or(Duration::from_secs(86400)); // 24 hours default - - let store = KvStore::new( - jetstream, - "api_tokens", - Some("API authentication tokens"), - Some(default_ttl), - ) - .await?; - - tracing::info!( - target: TRACING_TARGET_KV, - ttl_hours = default_ttl.as_secs() / 3600, - bucket = %store.bucket_name(), - "Created API token store" - ); - - Ok(Self { store, default_ttl }) - } - - /// Create and store a new API token. - /// - /// # Arguments - /// * `account_id` - Account UUID this token belongs to - /// * `token_type` - Type of token (web, mobile, api) - /// * `ip_address` - IP address where token originated - /// * `user_agent` - User agent string from client - /// * `ttl` - Token lifetime (uses default if None) - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn create_token( - &self, - account_id: Uuid, - token_type: ApiTokenType, - ip_address: String, - user_agent: String, - ttl: Option, - ) -> Result { - let token_ttl = ttl.unwrap_or(self.default_ttl); - let now = Timestamp::now(); - let expires_at = now - .checked_add(jiff::SignedDuration::from_secs(token_ttl.as_secs() as i64)) - .unwrap_or( - now.checked_add(jiff::SignedDuration::from_secs(86400)) - .unwrap_or(now), - ); - - let token = ApiToken { - access_seq: Uuid::new_v4(), - account_id, - ip_address, - user_agent, - token_type, - is_suspicious: false, - issued_at: now, - expired_at: expires_at, - last_used_at: Some(now), - deleted_at: None, - }; - - let token_key = token.access_seq.to_string(); - self.store.put(&token_key, &token).await?; - - tracing::info!( - target: TRACING_TARGET_KV, - access_seq = %token.access_seq, - account_id = %token.account_id, - token_type = ?token.token_type, - expires_at = %token.expired_at, - ip_address = %token.ip_address, - "Created new API token" - ); - - Ok(token) - } - - /// Retrieve and validate an API token by access sequence. - /// - /// Returns None if token doesn't exist, is expired, or is deleted. - /// Does NOT automatically update last_used_at to avoid write amplification. - /// Use `touch_token()` separately if you need to update the timestamp. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get_token(&self, access_seq: &Uuid) -> Result> { - let token_key = access_seq.to_string(); - - match self.store.get(&token_key).await? { - Some(kv_token) => { - let token = kv_token.value; - - // Check if token is deleted - if token.is_deleted() { - tracing::debug!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - deleted_at = ?token.deleted_at, - "Token is soft-deleted" - ); - return Ok(None); - } - - // Check if token is expired - if token.is_expired() { - tracing::warn!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - expired_at = %token.expired_at, - "Token has expired" - ); - - // Soft delete expired token - self.delete_token(access_seq).await?; - return Ok(None); - } - - tracing::debug!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - account_id = %token.account_id, - last_used_at = ?token.last_used_at, - "Retrieved API token" - ); - - Ok(Some(token)) - } - None => { - tracing::debug!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - "Token not found" - ); - Ok(None) - } - } - } - - /// Update the last_used_at timestamp for a token. - /// - /// Call this periodically (e.g., every 5 minutes) instead of on every access - /// to avoid write amplification while still tracking activity. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn touch_token(&self, access_seq: &Uuid) -> Result { - let token_key = access_seq.to_string(); - - if let Some(kv_token) = self.store.get(&token_key).await? { - let mut token = kv_token.value; - - if token.is_valid() { - token.touch(); - self.store.put(&token_key, &token).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - last_used_at = ?token.last_used_at, - "Updated token last_used_at" - ); - - return Ok(true); - } - } - - Ok(false) - } - - /// Mark a token as deleted (soft delete). - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete_token(&self, access_seq: &Uuid) -> Result<()> { - let token_key = access_seq.to_string(); - - if let Some(kv_token) = self.store.get(&token_key).await? { - let mut token = kv_token.value; - token.mark_deleted(); - - self.store.put(&token_key, &token).await?; - - tracing::info!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - account_id = %token.account_id, - deleted_at = ?token.deleted_at, - "Soft-deleted API token" - ); - } - - Ok(()) - } - - /// Delete all tokens for a specific account. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete_account_tokens(&self, account_id: &Uuid) -> Result { - let all_keys = self.store.keys().await?; - let mut deleted_count = 0; - - for key in all_keys { - if let Ok(Some(kv_token)) = self.store.get(&key).await - && kv_token.value.account_id == *account_id - && !kv_token.value.is_deleted() - && let Ok(access_seq) = Uuid::parse_str(&key) - { - self.delete_token(&access_seq).await?; - deleted_count += 1; - } - } - - tracing::info!( - target: TRACING_TARGET_KV, - account_id = %account_id, - deleted_count = deleted_count, - "Deleted all account tokens" - ); - - Ok(deleted_count) - } - - /// Get all active tokens for a specific account. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get_account_tokens(&self, account_id: &Uuid) -> Result> { - let all_keys = self.store.keys().await?; - let mut tokens = Vec::new(); - - for key in all_keys { - if let Ok(Some(kv_token)) = self.store.get(&key).await { - let token = kv_token.value; - if token.account_id == *account_id && token.is_valid() { - tokens.push(token); - } - } - } - - // Sort by most recently used - tokens.sort_by(|a, b| { - b.last_used_at - .unwrap_or(b.issued_at) - .cmp(&a.last_used_at.unwrap_or(a.issued_at)) - }); - - tracing::debug!( - target: TRACING_TARGET_KV, - account_id = %account_id, - active_tokens = tokens.len(), - "Retrieved account tokens" - ); - - Ok(tokens) - } - - /// Mark a token as suspicious. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn mark_suspicious(&self, access_seq: &Uuid) -> Result { - let token_key = access_seq.to_string(); - - if let Some(kv_token) = self.store.get(&token_key).await? { - let mut token = kv_token.value; - - if !token.is_suspicious { - token.mark_suspicious(); - self.store.put(&token_key, &token).await?; - - tracing::warn!( - target: TRACING_TARGET_KV, - access_seq = %access_seq, - account_id = %token.account_id, - "Marked token as suspicious" - ); - - return Ok(true); - } - } - - Ok(false) - } - - /// Clean up expired and deleted tokens (maintenance operation). - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn cleanup_expired(&self) -> Result { - let all_keys = self.store.keys().await?; - let mut cleaned_count = 0; - let now = Timestamp::now(); - - // Define cleanup threshold (delete tokens that have been soft-deleted for more than 7 days) - let cleanup_threshold = now - .checked_sub(jiff::SignedDuration::from_secs(7 * 24 * 3600)) - .unwrap_or(now); - - for key in all_keys { - if let Ok(Some(kv_token)) = self.store.get(&key).await { - let token = kv_token.value; - - // Hard delete tokens that have been soft-deleted for more than the threshold - if let Some(deleted_at) = token.deleted_at - && deleted_at < cleanup_threshold - { - self.store.delete(&key).await?; - cleaned_count += 1; - continue; - } - - // Hard delete expired tokens that haven't been accessed in the threshold period - if token.is_expired() { - let last_activity = token.last_used_at.unwrap_or(token.issued_at); - if last_activity < cleanup_threshold { - self.store.delete(&key).await?; - cleaned_count += 1; - } - } - } - } - - tracing::info!( - target: TRACING_TARGET_KV, - cleaned_count = cleaned_count, - cleanup_threshold = %cleanup_threshold, - "Cleaned up expired tokens" - ); - - Ok(cleaned_count) - } - - /// Get the default TTL for tokens. - pub fn default_ttl(&self) -> Duration { - self.default_ttl - } -} diff --git a/crates/nvisy-nats/src/kv/cache.rs b/crates/nvisy-nats/src/kv/cache.rs deleted file mode 100644 index 29247fc..0000000 --- a/crates/nvisy-nats/src/kv/cache.rs +++ /dev/null @@ -1,324 +0,0 @@ -//! Type-safe generic caching using NATS KV store. - -use std::marker::PhantomData; -use std::time::Duration; - -use async_nats::jetstream; -use serde::Serialize; -use serde::de::DeserializeOwned; - -use super::KvStore; -use crate::{Result, TRACING_TARGET_KV}; - -/// Type-safe generic cache store wrapper around KvStore. -/// -/// Provides cache-specific semantics and operations while maintaining -/// compile-time type safety for cached values of type T. -#[derive(Clone)] -pub struct CacheStore { - store: KvStore, - namespace: String, - _marker: PhantomData, -} - -impl CacheStore -where - T: Serialize + DeserializeOwned + Clone + Send + Sync + 'static, -{ - /// Create a new type-safe cache store for the given namespace. - /// - /// # Arguments - /// * `jetstream` - JetStream context for NATS operations - /// * `namespace` - Cache namespace (becomes part of bucket name) - /// * `ttl` - Optional time-to-live for cache entries - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn new( - jetstream: &jetstream::Context, - namespace: &str, - ttl: Option, - ) -> Result { - let bucket_name = format!("cache_{}", namespace); - let description = format!("Type-safe cache for {}", namespace); - - let store = KvStore::new(jetstream, &bucket_name, Some(&description), ttl).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - namespace = %namespace, - bucket = %bucket_name, - ttl_secs = ttl.map(|d| d.as_secs()), - type_name = std::any::type_name::(), - "Created type-safe cache store" - ); - - Ok(Self { - store, - namespace: namespace.to_string(), - _marker: PhantomData, - }) - } - - /// Set a value in the cache. - #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] - pub async fn set(&self, key: &str, value: &T) -> Result<()> { - self.store.set(key, value).await?; - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Cached value" - ); - Ok(()) - } - - /// Get a value from the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get(&self, key: &str) -> Result> { - let result = self.store.get_value(key).await?; - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - cache_hit = result.is_some(), - "Retrieved cached value" - ); - Ok(result) - } - - /// Delete a value from the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete(&self, key: &str) -> Result<()> { - self.store.delete(key).await?; - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Deleted cached value" - ); - Ok(()) - } - - /// Check if a key exists in the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn exists(&self, key: &str) -> Result { - self.store.exists(key).await - } - - /// Get or compute a value using the cache-aside pattern. - /// - /// If the key exists in cache, returns the cached value. - /// If not, computes the value using the provided function, - /// stores it in cache, and returns it. - #[tracing::instrument(skip(self, compute_fn), target = TRACING_TARGET_KV)] - pub async fn get_or_compute(&self, key: &str, compute_fn: F) -> Result - where - F: FnOnce() -> Fut + Send, - Fut: std::future::Future> + Send, - { - // Check cache first - if let Some(cached) = self.get(key).await? { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Cache hit" - ); - return Ok(cached); - } - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Cache miss, computing value" - ); - - // Compute new value - let value = compute_fn().await?; - - // Store in cache - self.set(key, &value).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - namespace = %self.namespace, - "Computed and cached new value" - ); - - Ok(value) - } - - /// Set multiple values in the cache as a batch operation. - #[tracing::instrument(skip(self, items), target = TRACING_TARGET_KV)] - pub async fn set_batch(&self, items: &[(&str, &T)]) -> Result<()> { - self.store.put_batch(items).await?; - tracing::debug!( - target: TRACING_TARGET_KV, - count = items.len(), - namespace = %self.namespace, - "Batch cached values" - ); - Ok(()) - } - - /// Get multiple values from the cache as a batch operation. - #[tracing::instrument(skip(self, keys), target = TRACING_TARGET_KV)] - pub async fn get_batch(&self, keys: &[&str]) -> Result>> { - let kv_results = self.store.get_batch(keys).await?; - let mut results = Vec::with_capacity(keys.len()); - - for key in keys { - if let Some(kv_value) = kv_results.get(*key) { - results.push(Some(kv_value.value.clone())); - } else { - results.push(None); - } - } - - let hit_count = results.iter().filter(|r| r.is_some()).count(); - tracing::debug!( - target: TRACING_TARGET_KV, - requested = keys.len(), - found = hit_count, - hit_rate = format!("{:.1}%", (hit_count as f64 / keys.len() as f64) * 100.0), - namespace = %self.namespace, - "Batch retrieved cached values" - ); - - Ok(results) - } - - /// Clear all entries from the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn clear(&self) -> Result<()> { - self.store.purge_all().await?; - tracing::info!( - target: TRACING_TARGET_KV, - namespace = %self.namespace, - bucket = %self.store.bucket_name(), - "Cleared all cache entries" - ); - Ok(()) - } - - /// Get all keys currently in the cache. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn keys(&self) -> Result> { - self.store.keys().await - } - - /// Get cache statistics. - pub async fn stats(&self) -> Result { - let keys = self.store.keys().await?; - - let stats = CacheStats { - entry_count: keys.len(), - bucket_name: self.store.bucket_name().to_string(), - namespace: self.namespace.clone(), - type_name: std::any::type_name::().to_string(), - }; - - tracing::debug!( - target: TRACING_TARGET_KV, - namespace = %self.namespace, - entry_count = stats.entry_count, - type_name = %stats.type_name, - "Retrieved cache statistics" - ); - - Ok(stats) - } - - /// Get the cache namespace. - pub fn namespace(&self) -> &str { - &self.namespace - } - - /// Get the underlying KV store. - pub fn inner(&self) -> &KvStore { - &self.store - } - - /// Get the bucket name used by this cache. - pub fn bucket_name(&self) -> &str { - self.store.bucket_name() - } -} - -/// Cache statistics and metadata. -#[derive(Debug, Clone)] -pub struct CacheStats { - /// Number of entries currently in cache - pub entry_count: usize, - /// NATS KV bucket name - pub bucket_name: String, - /// Cache namespace - pub namespace: String, - /// Rust type name of cached values - pub type_name: String, -} - -impl CacheStats { - /// Check if cache is empty. - pub fn is_empty(&self) -> bool { - self.entry_count == 0 - } - - /// Get a human-readable summary of cache stats. - pub fn summary(&self) -> String { - format!( - "Cache '{}' contains {} {} entries in bucket '{}'", - self.namespace, self.entry_count, self.type_name, self.bucket_name - ) - } -} - -#[cfg(test)] -mod tests { - use serde::{Deserialize, Serialize}; - - use super::*; - - #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] - #[allow(dead_code)] - struct TestData { - id: u64, - name: String, - } - - #[test] - fn test_cache_stats() { - let stats = CacheStats { - entry_count: 5, - bucket_name: "cache_test".to_string(), - namespace: "test".to_string(), - type_name: "TestData".to_string(), - }; - - assert!(!stats.is_empty()); - assert!(stats.summary().contains("5 TestData entries")); - - let empty_stats = CacheStats { - entry_count: 0, - bucket_name: "cache_empty".to_string(), - namespace: "empty".to_string(), - type_name: "TestData".to_string(), - }; - - assert!(empty_stats.is_empty()); - assert!(empty_stats.summary().contains("0 TestData entries")); - } - - #[test] - fn test_cache_namespace_formatting() { - // Test that namespace is correctly formatted into bucket name - let namespace = "user_sessions"; - let expected_bucket = "cache_user_sessions"; - let actual_bucket = format!("cache_{}", namespace); - assert_eq!(actual_bucket, expected_bucket); - } - - // Note: Integration tests requiring NATS server would go in a separate test module - // or be marked with #[ignore] attribute for optional execution -} diff --git a/crates/nvisy-nats/src/kv/chat_history.rs b/crates/nvisy-nats/src/kv/chat_history.rs deleted file mode 100644 index 79ce7f3..0000000 --- a/crates/nvisy-nats/src/kv/chat_history.rs +++ /dev/null @@ -1,156 +0,0 @@ -//! Chat history store for ephemeral sessions with TTL. - -use std::time::Duration; - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; -use serde::Serialize; -use serde::de::DeserializeOwned; -use uuid::Uuid; - -use super::KvStore; -use crate::{Result, TRACING_TARGET_KV}; - -/// Default session TTL (30 minutes). -pub const DEFAULT_SESSION_TTL: Duration = Duration::from_secs(30 * 60); - -/// NATS KV bucket name for chat history. -const CHAT_HISTORY_BUCKET: &str = "chat_history"; - -/// Chat history store backed by NATS KV. -/// -/// Provides ephemeral session storage with automatic TTL expiration. -#[derive(Clone, Deref, DerefMut)] -pub struct ChatHistoryStore -where - T: Serialize + DeserializeOwned + Send + Sync + 'static, -{ - #[deref] - #[deref_mut] - store: KvStore, - ttl: Duration, -} - -impl ChatHistoryStore -where - T: Serialize + DeserializeOwned + Send + Sync + 'static, -{ - /// Creates a new chat history store with default TTL (30 minutes). - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn new(jetstream: &jetstream::Context) -> Result { - Self::with_ttl(jetstream, DEFAULT_SESSION_TTL).await - } - - /// Creates a new chat history store with custom TTL. - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn with_ttl(jetstream: &jetstream::Context, ttl: Duration) -> Result { - let store = KvStore::new( - jetstream, - CHAT_HISTORY_BUCKET, - Some("Ephemeral chat sessions"), - Some(ttl), - ) - .await?; - - tracing::info!( - target: TRACING_TARGET_KV, - ttl_secs = ttl.as_secs(), - bucket = %store.bucket_name(), - "Created chat history store" - ); - - Ok(Self { store, ttl }) - } - - /// Returns the configured TTL. - pub fn ttl(&self) -> Duration { - self.ttl - } - - /// Creates a new session. - #[tracing::instrument(skip(self, session), target = TRACING_TARGET_KV)] - pub async fn create(&self, session_id: Uuid, session: &T) -> Result<()> { - let key = session_key(session_id); - - if self.store.exists(&key).await? { - return Err(crate::Error::operation( - "chat_history_create", - format!("session already exists: {session_id}"), - )); - } - - self.store.put(&key, session).await?; - - tracing::info!( - target: TRACING_TARGET_KV, - session_id = %session_id, - "Created chat session" - ); - - Ok(()) - } - - /// Gets a session by ID. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get(&self, session_id: Uuid) -> Result> { - let key = session_key(session_id); - self.store.get_value(&key).await - } - - /// Updates an existing session (also resets TTL). - #[tracing::instrument(skip(self, session), target = TRACING_TARGET_KV)] - pub async fn update(&self, session_id: Uuid, session: &T) -> Result<()> { - let key = session_key(session_id); - - if !self.store.exists(&key).await? { - return Err(crate::Error::operation( - "chat_history_update", - format!("session not found: {session_id}"), - )); - } - - self.store.put(&key, session).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - session_id = %session_id, - "Updated chat session" - ); - - Ok(()) - } - - /// Touches a session to reset its TTL. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn touch(&self, session_id: Uuid) -> Result<()> { - let key = session_key(session_id); - self.store.touch(&key).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - session_id = %session_id, - "Touched chat session" - ); - - Ok(()) - } - - /// Deletes a session. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete(&self, session_id: Uuid) -> Result<()> { - let key = session_key(session_id); - self.store.delete(&key).await?; - - tracing::info!( - target: TRACING_TARGET_KV, - session_id = %session_id, - "Deleted chat session" - ); - - Ok(()) - } -} - -fn session_key(session_id: Uuid) -> String { - format!("session.{session_id}") -} diff --git a/crates/nvisy-nats/src/kv/kv_bucket.rs b/crates/nvisy-nats/src/kv/kv_bucket.rs new file mode 100644 index 0000000..881d67a --- /dev/null +++ b/crates/nvisy-nats/src/kv/kv_bucket.rs @@ -0,0 +1,59 @@ +//! Key-value bucket configuration traits. + +use std::time::Duration; + +/// Marker trait for KV bucket configuration. +/// +/// This trait defines the configuration for a NATS KV bucket, +/// similar to `ObjectBucket` for object stores. +pub trait KvBucket: Clone + Send + Sync + 'static { + /// Bucket name used in NATS KV. + const NAME: &'static str; + + /// Human-readable description for the bucket. + const DESCRIPTION: &'static str; + + /// Default TTL for entries in this bucket. + /// Returns `None` for buckets where entries should not expire. + const TTL: Option; +} + +/// Bucket for API authentication tokens. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct ApiTokensBucket; + +impl KvBucket for ApiTokensBucket { + const NAME: &'static str = "api_tokens"; + const DESCRIPTION: &'static str = "API authentication tokens"; + const TTL: Option = Some(Duration::from_secs(24 * 60 * 60)); // 24 hours +} + +/// Bucket for ephemeral chat history sessions. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct ChatHistoryBucket; + +impl KvBucket for ChatHistoryBucket { + const NAME: &'static str = "chat_history"; + const DESCRIPTION: &'static str = "Ephemeral chat sessions"; + const TTL: Option = Some(Duration::from_secs(30 * 60)); // 30 minutes +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_api_tokens_bucket() { + assert_eq!(ApiTokensBucket::NAME, "api_tokens"); + assert_eq!( + ApiTokensBucket::TTL, + Some(Duration::from_secs(24 * 60 * 60)) + ); + } + + #[test] + fn test_chat_history_bucket() { + assert_eq!(ChatHistoryBucket::NAME, "chat_history"); + assert_eq!(ChatHistoryBucket::TTL, Some(Duration::from_secs(30 * 60))); + } +} diff --git a/crates/nvisy-nats/src/kv/kv_key.rs b/crates/nvisy-nats/src/kv/kv_key.rs new file mode 100644 index 0000000..c063c54 --- /dev/null +++ b/crates/nvisy-nats/src/kv/kv_key.rs @@ -0,0 +1,92 @@ +//! Key-value key types and traits. + +use std::fmt; +use std::str::FromStr; + +use uuid::Uuid; + +use crate::Error; + +/// Marker trait for KV key types. +/// +/// This trait defines how keys are formatted for storage in NATS KV. +pub trait KvKey: fmt::Debug + fmt::Display + FromStr + Clone + Send + Sync + 'static {} + +/// Key for chat history sessions. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SessionKey(pub Uuid); + +impl KvKey for SessionKey {} + +impl fmt::Display for SessionKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for SessionKey { + type Err = Error; + + fn from_str(s: &str) -> Result { + let id = + Uuid::parse_str(s).map_err(|e| Error::operation("parse_session_key", e.to_string()))?; + Ok(Self(id)) + } +} + +impl From for SessionKey { + fn from(id: Uuid) -> Self { + Self(id) + } +} + +/// Key for API tokens. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct TokenKey(pub Uuid); + +impl KvKey for TokenKey {} + +impl fmt::Display for TokenKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl FromStr for TokenKey { + type Err = Error; + + fn from_str(s: &str) -> Result { + let id = + Uuid::parse_str(s).map_err(|e| Error::operation("parse_token_key", e.to_string()))?; + Ok(Self(id)) + } +} + +impl From for TokenKey { + fn from(id: Uuid) -> Self { + Self(id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_session_key_roundtrip() { + let id = Uuid::nil(); + let key = SessionKey(id); + let s = key.to_string(); + let parsed: SessionKey = s.parse().unwrap(); + assert_eq!(key, parsed); + } + + #[test] + fn test_token_key_roundtrip() { + let id = Uuid::nil(); + let key = TokenKey(id); + let s = key.to_string(); + let parsed: TokenKey = s.parse().unwrap(); + assert_eq!(key, parsed); + } +} diff --git a/crates/nvisy-nats/src/kv/kv_store.rs b/crates/nvisy-nats/src/kv/kv_store.rs new file mode 100644 index 0000000..f61dc1b --- /dev/null +++ b/crates/nvisy-nats/src/kv/kv_store.rs @@ -0,0 +1,337 @@ +//! Type-safe NATS KV store wrapper. + +use std::marker::PhantomData; +use std::time::Duration; + +use async_nats::jetstream::{self, kv}; +use futures::StreamExt; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; + +use super::{KvBucket, KvKey}; +use crate::{Error, Result, TRACING_TARGET_KV}; + +/// Type-safe NATS KV store wrapper. +/// +/// This store is generic over: +/// - `K`: The key type (determines prefix) +/// - `V`: The value type to store (must be serializable) +/// - `B`: The bucket configuration (determines name, description, TTL) +#[derive(Clone)] +pub struct KvStore +where + K: KvKey, + V: Serialize + DeserializeOwned + Send + Sync + 'static, + B: KvBucket, +{ + store: kv::Store, + _key: PhantomData, + _value: PhantomData, + _bucket: PhantomData, +} + +impl KvStore +where + K: KvKey, + V: Serialize + DeserializeOwned + Send + Sync + 'static, + B: KvBucket, +{ + /// Create or get a KV bucket using the bucket configuration. + #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] + pub(crate) async fn new(jetstream: &jetstream::Context) -> Result { + Self::with_ttl(jetstream, B::TTL.unwrap_or_default()).await + } + + /// Create or get a KV bucket with custom TTL. + #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] + pub(crate) async fn with_ttl(jetstream: &jetstream::Context, ttl: Duration) -> Result { + let config = kv::Config { + bucket: B::NAME.to_string(), + description: B::DESCRIPTION.to_string(), + max_age: ttl, + ..Default::default() + }; + + let store = match jetstream.get_key_value(B::NAME).await { + Ok(store) => { + tracing::debug!( + target: TRACING_TARGET_KV, + bucket = %B::NAME, + "Using existing KV bucket" + ); + store + } + Err(_) => { + tracing::debug!( + target: TRACING_TARGET_KV, + bucket = %B::NAME, + ttl_secs = ttl.as_secs(), + "Creating new KV bucket" + ); + jetstream + .create_key_value(config) + .await + .map_err(|e| Error::operation("kv_create", e.to_string()))? + } + }; + + Ok(Self { + store, + _key: PhantomData, + _value: PhantomData, + _bucket: PhantomData, + }) + } + + /// Returns the bucket name. + #[inline] + pub fn bucket_name(&self) -> &'static str { + B::NAME + } + + /// Put a value into the store. + #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] + pub async fn put(&self, key: &K, value: &V) -> Result { + let key_str = key.to_string(); + let json = serde_json::to_vec(value)?; + let size = json.len(); + let revision = self + .store + .put(&key_str, json.into()) + .await + .map_err(|e| Error::operation("kv_put", e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + revision = revision, + size_bytes = size, + "Put value to KV store" + ); + + Ok(KvEntry { + key: key_str, + revision, + size: size as u64, + }) + } + + /// Get a value from the store. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn get(&self, key: &K) -> Result>> { + let key_str = key.to_string(); + match self.store.entry(&key_str).await { + Ok(Some(entry)) => { + let size = entry.value.len(); + let deserialized = serde_json::from_slice(&entry.value)?; + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + size_bytes = size, + revision = entry.revision, + "Retrieved value from KV store" + ); + Ok(Some(KvValue { + key: key_str, + value: deserialized, + revision: entry.revision, + size: size as u64, + created: entry.created.into(), + })) + } + Ok(None) => { + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + "Key not found in KV store" + ); + Ok(None) + } + Err(e) => Err(Error::operation("kv_get", e.to_string())), + } + } + + /// Get a value, returning just the data. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn get_value(&self, key: &K) -> Result> { + Ok(self.get(key).await?.map(|kv| kv.value)) + } + + /// Delete a key from the store. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn delete(&self, key: &K) -> Result<()> { + let key_str = key.to_string(); + self.store + .purge(&key_str) + .await + .map_err(|e| Error::operation("kv_delete", e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + "Deleted key from KV store" + ); + Ok(()) + } + + /// Check if a key exists in the store. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn exists(&self, key: &K) -> Result { + let key_str = key.to_string(); + match self.store.get(&key_str).await { + Ok(Some(_)) => Ok(true), + Ok(None) => Ok(false), + Err(e) => Err(Error::operation("kv_exists", e.to_string())), + } + } + + /// Touches a key to reset its TTL by re-putting the same value. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn touch(&self, key: &K) -> Result { + let kv_value = self + .get(key) + .await? + .ok_or_else(|| Error::operation("kv_touch", format!("key not found: {key}")))?; + + self.put(key, &kv_value.value).await + } + + /// Get all keys in the bucket with the expected prefix. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn keys(&self) -> Result> { + let mut keys = Vec::new(); + let mut key_stream = self + .store + .keys() + .await + .map_err(|e| Error::operation("kv_keys", e.to_string()))?; + + while let Some(key_result) = key_stream.next().await { + match key_result { + Ok(key_str) => { + if let Ok(key) = key_str.parse::() { + keys.push(key); + } + } + Err(e) => { + tracing::warn!( + target: TRACING_TARGET_KV, + error = %e, + "Error reading key from bucket" + ); + } + } + } + + tracing::debug!( + target: TRACING_TARGET_KV, + count = keys.len(), + bucket = %B::NAME, + "Retrieved keys from bucket" + ); + Ok(keys) + } + + /// Purge all keys in the bucket. + #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] + pub async fn purge_all(&self) -> Result<()> { + let keys = self.keys().await?; + let count = keys.len(); + for key in keys { + self.delete(&key).await?; + } + tracing::debug!( + target: TRACING_TARGET_KV, + count = count, + bucket = %B::NAME, + "Purged all keys from bucket" + ); + Ok(()) + } + + /// Update a value only if the revision matches (optimistic concurrency). + #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] + pub async fn update(&self, key: &K, value: &V, revision: u64) -> Result { + let key_str = key.to_string(); + let json = serde_json::to_vec(value)?; + let size = json.len(); + let new_revision = self + .store + .update(&key_str, json.into(), revision) + .await + .map_err(|e| Error::operation("kv_update", e.to_string()))?; + + tracing::debug!( + target: TRACING_TARGET_KV, + key = %key_str, + old_revision = revision, + new_revision = new_revision, + size_bytes = size, + "Updated value in KV store" + ); + + Ok(KvEntry { + key: key_str, + revision: new_revision, + size: size as u64, + }) + } + + /// Get or compute a value using the cache-aside pattern. + #[tracing::instrument(skip(self, compute_fn), target = TRACING_TARGET_KV)] + pub async fn get_or_compute(&self, key: &K, compute_fn: F) -> Result + where + F: FnOnce() -> Fut + Send, + Fut: std::future::Future> + Send, + V: Clone, + { + if let Some(existing) = self.get_value(key).await? { + return Ok(existing); + } + + let value = compute_fn().await?; + self.put(key, &value).await?; + Ok(value) + } + + /// Get the underlying store reference. + pub fn inner(&self) -> &kv::Store { + &self.store + } +} + +/// KV entry metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KvEntry { + pub key: String, + pub revision: u64, + pub size: u64, +} + +/// KV value with metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct KvValue { + pub key: String, + pub value: V, + pub revision: u64, + pub size: u64, + pub created: std::time::SystemTime, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_kv_entry_creation() { + let entry = KvEntry { + key: "test_key".to_string(), + revision: 1, + size: 100, + }; + + assert_eq!(entry.key, "test_key"); + assert_eq!(entry.revision, 1); + assert_eq!(entry.size, 100); + } +} diff --git a/crates/nvisy-nats/src/kv/mod.rs b/crates/nvisy-nats/src/kv/mod.rs index 2f910fa..d742ca1 100644 --- a/crates/nvisy-nats/src/kv/mod.rs +++ b/crates/nvisy-nats/src/kv/mod.rs @@ -1,22 +1,31 @@ -//! NATS Key-Value store operations for caching, API tokens, chat history, and generic KV storage. +//! NATS Key-Value store operations. //! -//! This module provides type-safe abstractions over NATS KV for different use cases: -//! - `KvStore`: Generic type-safe key-value operations -//! - `CacheStore`: Type-safe caching with cache-aside patterns -//! - `ApiTokenStore`: API authentication token management -//! - `ChatHistoryStore`: Ephemeral chat session storage with TTL +//! This module provides type-safe abstractions over NATS KV: +//! - `KvStore`: Generic type-safe key-value operations +//! - `KvKey`: Trait for key types with prefix support +//! - `KvBucket`: Trait for bucket configuration //! -//! All stores provide compile-time type safety through generic parameters and -//! comprehensive observability through structured logging. +//! # Example +//! +//! ```ignore +//! // Create a session store +//! let store: KvStore = +//! nats_client.kv_store().await?; +//! +//! // Put a session +//! let key = SessionKey::from(Uuid::new_v4()); +//! store.put(&key, &session).await?; +//! +//! // Get the session back +//! let session = store.get_value(&key).await?; +//! ``` mod api_token; -mod api_token_store; -mod cache; -mod chat_history; -mod store; +mod kv_bucket; +mod kv_key; +mod kv_store; pub use api_token::{ApiToken, ApiTokenType}; -pub use api_token_store::ApiTokenStore; -pub use cache::{CacheStats, CacheStore}; -pub use chat_history::{ChatHistoryStore, DEFAULT_SESSION_TTL}; -pub use store::{KvEntry, KvStore, KvValue}; +pub use kv_bucket::{ApiTokensBucket, ChatHistoryBucket, KvBucket}; +pub use kv_key::{KvKey, SessionKey, TokenKey}; +pub use kv_store::{KvEntry, KvStore, KvValue}; diff --git a/crates/nvisy-nats/src/kv/store.rs b/crates/nvisy-nats/src/kv/store.rs deleted file mode 100644 index 0a4f478..0000000 --- a/crates/nvisy-nats/src/kv/store.rs +++ /dev/null @@ -1,433 +0,0 @@ -//! Type-safe NATS KV store wrapper with improved API design. - -use std::collections::HashMap; -use std::marker::PhantomData; -use std::time::Duration; - -use async_nats::jetstream::{self, kv}; -use futures::StreamExt; -use serde::de::DeserializeOwned; -use serde::{Deserialize, Serialize}; - -use crate::{Error, Result, TRACING_TARGET_KV}; - -/// Type-safe NATS KV store wrapper with improved API design -/// -/// This store provides a generic interface over NATS KV for a specific -/// serializable data type T, with consistent error handling and -/// comprehensive operations. The type parameter ensures compile-time -/// type safety for all operations. -#[derive(Clone)] -pub struct KvStore { - store: kv::Store, - bucket_name: String, - _marker: PhantomData, -} - -impl KvStore -where - T: Serialize + DeserializeOwned + Send + Sync + 'static, -{ - /// Create or get a KV bucket for the specified type T. - /// - /// # Arguments - /// * `jetstream` - JetStream context for NATS operations - /// * `bucket_name` - Name of the KV bucket to create or access - /// * `description` - Optional description for the bucket - /// * `ttl` - Optional time-to-live for entries in the bucket - #[tracing::instrument(skip(jetstream), target = TRACING_TARGET_KV)] - pub async fn new( - jetstream: &jetstream::Context, - bucket_name: &str, - description: Option<&str>, - ttl: Option, - ) -> Result { - let mut config = kv::Config { - bucket: bucket_name.to_string(), - description: description.unwrap_or("").to_string(), - max_age: ttl.unwrap_or(Duration::from_secs(0)), - ..Default::default() - }; - - if let Some(ttl_duration) = ttl { - config.max_age = ttl_duration; - } - - // Try to get existing bucket first - let store = match jetstream.get_key_value(bucket_name).await { - Ok(store) => { - tracing::debug!( - target: TRACING_TARGET_KV, - bucket = %bucket_name, - "Using existing KV bucket" - ); - store - } - Err(_) => { - // Bucket doesn't exist, create it - tracing::debug!( - target: TRACING_TARGET_KV, - bucket = %bucket_name, - ttl_secs = ttl.map(|d| d.as_secs()), - "Creating new KV bucket" - ); - jetstream - .create_key_value(config) - .await - .map_err(|e| Error::operation("kv_create", e.to_string()))? - } - }; - - Ok(Self { - store, - bucket_name: bucket_name.to_string(), - _marker: PhantomData, - }) - } - - /// Get the bucket name - pub fn bucket_name(&self) -> &str { - &self.bucket_name - } - - /// Put a value into the store (serializes to JSON). - #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] - pub async fn put(&self, key: &str, value: &T) -> Result { - let json = serde_json::to_vec(value)?; - let size = json.len(); - let revision = self - .store - .put(key, json.into()) - .await - .map_err(|e| Error::operation("kv_put", e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - revision = revision, - size_bytes = size, - "Put value to KV store" - ); - - Ok(KvEntry { - key: key.to_string(), - revision, - size: size as u64, - }) - } - - /// Get a value from the store (deserializes from JSON). - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get(&self, key: &str) -> Result>> { - match self.store.entry(key).await { - Ok(Some(entry)) => { - let size = entry.value.len(); - let deserialized = serde_json::from_slice(&entry.value)?; - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - size_bytes = size, - revision = entry.revision, - "Retrieved value from KV store" - ); - Ok(Some(KvValue { - key: key.to_string(), - value: deserialized, - revision: entry.revision, - size: size as u64, - created: entry.created.into(), - })) - } - Ok(None) => { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - "Key not found in KV store" - ); - Ok(None) - } - Err(e) => Err(Error::operation("kv_get", e.to_string())), - } - } - - /// Delete a key from the store. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn delete(&self, key: &str) -> Result<()> { - self.store - .purge(key) - .await - .map_err(|e| Error::operation("kv_delete", e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - "Deleted key from KV store" - ); - Ok(()) - } - - /// Check if a key exists in the store. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn exists(&self, key: &str) -> Result { - match self.store.get(key).await { - Ok(Some(_)) => { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - exists = true, - "Checked key existence" - ); - Ok(true) - } - Ok(None) => { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - exists = false, - "Checked key existence" - ); - Ok(false) - } - Err(e) => Err(Error::operation("kv_exists", e.to_string())), - } - } - - /// Touches a key to reset its TTL by re-putting the same value. - /// - /// Returns an error if the key doesn't exist. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn touch(&self, key: &str) -> Result { - let kv_value = self - .get(key) - .await? - .ok_or_else(|| Error::operation("kv_touch", format!("key not found: {key}")))?; - - let entry = self.put(key, &kv_value.value).await?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - revision = entry.revision, - "Touched key (TTL reset)" - ); - - Ok(entry) - } - - /// Get all keys in the bucket. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn keys(&self) -> Result> { - let mut keys = Vec::new(); - let mut key_stream = self - .store - .keys() - .await - .map_err(|e| Error::operation("kv_keys", e.to_string()))?; - - while let Some(key) = key_stream.next().await { - match key { - Ok(k) => keys.push(k), - Err(e) => { - tracing::warn!( - target: TRACING_TARGET_KV, - error = %e, - "Error reading key from bucket" - ); - } - } - } - - tracing::debug!( - target: TRACING_TARGET_KV, - count = keys.len(), - bucket = %self.store.name, - "Retrieved keys from bucket" - ); - Ok(keys) - } - - /// Purge all keys in the bucket. - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn purge_all(&self) -> Result<()> { - let keys = self.keys().await?; - let count = keys.len(); - for key in keys { - self.delete(&key).await?; - } - tracing::debug!( - target: TRACING_TARGET_KV, - count = count, - bucket = %self.store.name, - "Purged all keys from bucket" - ); - Ok(()) - } - - /// Get the underlying store reference - pub fn inner(&self) -> &kv::Store { - &self.store - } - - /// Set/update a value (alias for put for consistency with cache interface). - #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] - pub async fn set(&self, key: &str, value: &T) -> Result { - self.put(key, value).await - } - - /// Get a value and extract just the data (convenience method) - #[tracing::instrument(skip(self), target = TRACING_TARGET_KV)] - pub async fn get_value(&self, key: &str) -> Result> { - Ok(self.get(key).await?.map(|kv_value| kv_value.value)) - } - - /// Put multiple values in a batch operation. - #[tracing::instrument(skip(self, items), target = TRACING_TARGET_KV)] - pub async fn put_batch(&self, items: &[(&str, &T)]) -> Result> { - let mut results = Vec::with_capacity(items.len()); - - for (key, value) in items { - let entry = self.put(key, value).await?; - results.push(entry); - } - - tracing::debug!( - target: TRACING_TARGET_KV, - count = items.len(), - "Batch put completed" - ); - - Ok(results) - } - - /// Get multiple values in a batch operation. - #[tracing::instrument(skip(self, keys), target = TRACING_TARGET_KV)] - pub async fn get_batch(&self, keys: &[&str]) -> Result>> { - let mut results = HashMap::with_capacity(keys.len()); - - for key in keys { - if let Some(value) = self.get(key).await? { - results.insert(key.to_string(), value); - } - } - - tracing::debug!( - target: TRACING_TARGET_KV, - requested = keys.len(), - found = results.len(), - "Batch get completed" - ); - - Ok(results) - } - - /// Update a value only if the revision matches (optimistic concurrency). - #[tracing::instrument(skip(self, value), target = TRACING_TARGET_KV)] - pub async fn update(&self, key: &str, value: &T, revision: u64) -> Result { - let json = serde_json::to_vec(value)?; - let size = json.len(); - let new_revision = self - .store - .update(key, json.into(), revision) - .await - .map_err(|e| Error::operation("kv_update", e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - old_revision = revision, - new_revision = new_revision, - size_bytes = size, - "Updated value in KV store" - ); - - Ok(KvEntry { - key: key.to_string(), - revision: new_revision, - size: size as u64, - }) - } - - /// Get or compute a value using the cache-aside pattern. - #[tracing::instrument(skip(self, compute_fn), target = TRACING_TARGET_KV)] - pub async fn get_or_compute(&self, key: &str, compute_fn: F) -> Result - where - F: FnOnce() -> Fut + Send, - Fut: std::future::Future> + Send, - T: Clone, - { - // Try to get from store first - if let Some(existing) = self.get_value(key).await? { - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - "Found existing value in store" - ); - return Ok(existing); - } - - // Value not found, compute it - tracing::debug!( - target: TRACING_TARGET_KV, - key = %key, - "Value not found, computing new value" - ); - let value = compute_fn().await?; - - // Store the computed value - self.put(key, &value).await?; - - Ok(value) - } -} - -/// KV entry metadata -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct KvEntry { - pub key: String, - pub revision: u64, - pub size: u64, -} - -/// KV value with metadata -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct KvValue { - pub key: String, - pub value: T, - pub revision: u64, - pub size: u64, - pub created: std::time::SystemTime, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[derive(Debug, Serialize, Deserialize, PartialEq)] - #[allow(dead_code)] - struct TestData { - id: u64, - name: String, - } - - // Note: These tests would require a running NATS server with JetStream enabled - // They're marked as ignored for now - - #[test] - #[ignore] - fn test_kv_operations() { - // Would test put/get/delete operations - } - - #[test] - fn test_kv_entry_creation() { - let entry = KvEntry { - key: "test_key".to_string(), - revision: 1, - size: 100, - }; - - assert_eq!(entry.key, "test_key"); - assert_eq!(entry.revision, 1); - assert_eq!(entry.size, 100); - } -} diff --git a/crates/nvisy-nats/src/lib.rs b/crates/nvisy-nats/src/lib.rs index 90b15cc..add120c 100644 --- a/crates/nvisy-nats/src/lib.rs +++ b/crates/nvisy-nats/src/lib.rs @@ -35,5 +35,5 @@ pub mod stream; // Re-export async_nats types needed by consumers pub use async_nats::jetstream; -pub use client::{NatsClient, NatsConfig, NatsConnection}; +pub use client::{NatsClient, NatsConfig}; pub use error::{Error, Result}; diff --git a/crates/nvisy-nats/src/object/avatar_bucket.rs b/crates/nvisy-nats/src/object/avatar_bucket.rs deleted file mode 100644 index 02e0df9..0000000 --- a/crates/nvisy-nats/src/object/avatar_bucket.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Avatar bucket constants for NATS object storage. - -use std::time::Duration; - -/// Bucket name for account avatars. -pub const AVATAR_BUCKET: &str = "ACCOUNT_AVATARS"; - -/// Maximum age for avatars (none - retained indefinitely). -pub const AVATAR_MAX_AGE: Option = None; diff --git a/crates/nvisy-nats/src/object/avatar_key.rs b/crates/nvisy-nats/src/object/avatar_key.rs deleted file mode 100644 index 5d08f55..0000000 --- a/crates/nvisy-nats/src/object/avatar_key.rs +++ /dev/null @@ -1,91 +0,0 @@ -//! Avatar key for NATS object storage. - -use std::fmt; -use std::str::FromStr; - -use uuid::Uuid; - -use crate::{Error, Result}; - -/// A validated key for avatar objects in NATS object storage. -/// -/// The key format is simply the account ID as a string, since avatars -/// are uniquely identified by their owning account. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct AvatarKey { - account_id: Uuid, -} - -impl AvatarKey { - /// Creates a new avatar key for an account. - pub fn new(account_id: Uuid) -> Self { - Self { account_id } - } - - /// Returns the account ID. - pub fn account_id(&self) -> Uuid { - self.account_id - } -} - -impl fmt::Display for AvatarKey { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.account_id) - } -} - -impl FromStr for AvatarKey { - type Err = Error; - - fn from_str(s: &str) -> Result { - let account_id = Uuid::parse_str(s) - .map_err(|e| Error::operation("parse_key", format!("Invalid account UUID: {}", e)))?; - Ok(Self::new(account_id)) - } -} - -impl From for AvatarKey { - fn from(account_id: Uuid) -> Self { - Self::new(account_id) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_avatar_key_new() { - let account_id = Uuid::new_v4(); - let key = AvatarKey::new(account_id); - assert_eq!(key.account_id(), account_id); - } - - #[test] - fn test_avatar_key_display() { - let account_id = Uuid::new_v4(); - let key = AvatarKey::new(account_id); - assert_eq!(key.to_string(), account_id.to_string()); - } - - #[test] - fn test_avatar_key_roundtrip() { - let account_id = Uuid::new_v4(); - let key = AvatarKey::new(account_id); - let encoded = key.to_string(); - let decoded: AvatarKey = encoded.parse().unwrap(); - assert_eq!(decoded.account_id(), account_id); - } - - #[test] - fn test_avatar_key_from_uuid() { - let account_id = Uuid::new_v4(); - let key: AvatarKey = account_id.into(); - assert_eq!(key.account_id(), account_id); - } - - #[test] - fn test_avatar_key_from_str_invalid() { - assert!(AvatarKey::from_str("not-a-uuid").is_err()); - } -} diff --git a/crates/nvisy-nats/src/object/avatar_store.rs b/crates/nvisy-nats/src/object/avatar_store.rs deleted file mode 100644 index 61b5529..0000000 --- a/crates/nvisy-nats/src/object/avatar_store.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Avatar store for NATS object storage. - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; - -use super::avatar_bucket::{AVATAR_BUCKET, AVATAR_MAX_AGE}; -use super::avatar_key::AvatarKey; -use super::object_data::{GetResult, PutResult}; -use super::object_store::ObjectStore; -use crate::Result; - -/// An avatar store that manages profile images in NATS object storage. -/// -/// Uses [`AvatarKey`] for addressing (account ID based). -#[derive(Clone, Deref, DerefMut)] -pub struct AvatarStore { - #[deref] - #[deref_mut] - inner: ObjectStore, -} - -impl AvatarStore { - /// Creates a new avatar store. - pub async fn new(jetstream: &jetstream::Context) -> Result { - let inner = ObjectStore::new(jetstream, AVATAR_BUCKET, AVATAR_MAX_AGE).await?; - Ok(Self { inner }) - } - - /// Streams avatar data to the store while computing SHA-256 hash on-the-fly. - pub async fn put(&self, key: &AvatarKey, reader: R) -> Result - where - R: tokio::io::AsyncRead + Unpin, - { - self.inner.put(&key.to_string(), reader).await - } - - /// Gets an avatar from the store as a stream. - /// - /// Returns `None` if the avatar doesn't exist. - pub async fn get(&self, key: &AvatarKey) -> Result> { - self.inner.get(&key.to_string()).await - } - - /// Deletes an avatar from the store. - pub async fn delete(&self, key: &AvatarKey) -> Result<()> { - self.inner.delete(&key.to_string()).await - } - - /// Checks if an avatar exists. - pub async fn exists(&self, key: &AvatarKey) -> Result { - self.inner.exists(&key.to_string()).await - } - - /// Returns the bucket name. - #[inline] - pub fn bucket(&self) -> &'static str { - AVATAR_BUCKET - } -} diff --git a/crates/nvisy-nats/src/object/document_bucket.rs b/crates/nvisy-nats/src/object/document_bucket.rs deleted file mode 100644 index e459a0e..0000000 --- a/crates/nvisy-nats/src/object/document_bucket.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! Document bucket configuration for NATS object storage. - -use std::time::Duration; - -/// Marker trait for document storage buckets. -/// -/// This trait defines the configuration for a NATS object storage bucket, -/// including its name and optional TTL for objects. -pub trait DocumentBucket: Clone + Send + Sync + 'static { - /// Bucket name used in NATS object storage. - const NAME: &'static str; - - /// Maximum age for objects in this bucket. - /// Returns `None` for buckets where objects should not expire. - const MAX_AGE: Option; -} - -/// Primary document storage for uploaded and processed files. -/// -/// No expiration, files are retained indefinitely. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] -pub struct Files; - -impl DocumentBucket for Files { - const MAX_AGE: Option = None; - const NAME: &'static str = "DOCUMENT_FILES"; -} - -/// Temporary storage for intermediate processing artifacts. -/// -/// Files expire after 7 days. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] -pub struct Intermediates; - -impl DocumentBucket for Intermediates { - const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); - const NAME: &'static str = "DOCUMENT_INTERMEDIATES"; // 7 days -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_bucket_names() { - assert_eq!(Files::NAME, "DOCUMENT_FILES"); - assert_eq!(Intermediates::NAME, "DOCUMENT_INTERMEDIATES"); - } - - #[test] - fn test_bucket_max_age() { - assert_eq!(Files::MAX_AGE, None); - assert_eq!( - Intermediates::MAX_AGE, - Some(Duration::from_secs(7 * 24 * 60 * 60)) - ); - } -} diff --git a/crates/nvisy-nats/src/object/document_key.rs b/crates/nvisy-nats/src/object/document_key.rs deleted file mode 100644 index 0861e95..0000000 --- a/crates/nvisy-nats/src/object/document_key.rs +++ /dev/null @@ -1,182 +0,0 @@ -//! Document key for NATS object storage. - -use std::fmt; -use std::str::FromStr; - -use base64::prelude::*; -use uuid::Uuid; - -use crate::{Error, Result}; - -/// A validated key for document objects in NATS object storage. -/// -/// The key is encoded as URL-safe base64 of the concatenated workspace ID and object ID. -/// This produces a compact 43-character key from two UUIDs (32 bytes → base64). -/// -/// The `object_id` is a UUID v7 generated at upload time, providing: -/// - Time-ordered keys for efficient storage and retrieval -/// - Guaranteed uniqueness within the workspace -/// - No collision with database-generated IDs -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DocumentKey { - workspace_id: Uuid, - object_id: Uuid, -} - -impl DocumentKey { - /// Generates a new document key with a fresh UUID v7 object ID. - /// - /// Uses UUID v7 which is time-ordered and contains randomness, - /// making keys both sortable and collision-resistant. - pub fn generate(workspace_id: Uuid) -> Self { - Self { - workspace_id, - object_id: Uuid::now_v7(), - } - } - - /// Creates a document key from existing IDs (for parsing stored keys). - pub fn from_parts(workspace_id: Uuid, object_id: Uuid) -> Self { - Self { - workspace_id, - object_id, - } - } - - /// Returns the workspace ID. - pub fn workspace_id(&self) -> Uuid { - self.workspace_id - } - - /// Returns the object ID (the UUID used for NATS storage). - pub fn object_id(&self) -> Uuid { - self.object_id - } - - /// Encodes the key as URL-safe base64. - fn encode(&self) -> String { - let mut bytes = [0u8; 32]; - bytes[..16].copy_from_slice(self.workspace_id.as_bytes()); - bytes[16..].copy_from_slice(self.object_id.as_bytes()); - BASE64_URL_SAFE_NO_PAD.encode(bytes) - } - - /// Decodes a key from URL-safe base64. - fn decode(s: &str) -> Result { - let bytes = BASE64_URL_SAFE_NO_PAD.decode(s).map_err(|e| { - Error::operation("parse_key", format!("Invalid base64 encoding: {}", e)) - })?; - - if bytes.len() != 32 { - return Err(Error::operation( - "parse_key", - format!("Invalid key length: expected 32 bytes, got {}", bytes.len()), - )); - } - - let workspace_id = Uuid::from_slice(&bytes[..16]) - .map_err(|e| Error::operation("parse_key", format!("Invalid workspace UUID: {}", e)))?; - - let object_id = Uuid::from_slice(&bytes[16..]) - .map_err(|e| Error::operation("parse_key", format!("Invalid object UUID: {}", e)))?; - - Ok(Self::from_parts(workspace_id, object_id)) - } -} - -impl fmt::Display for DocumentKey { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.encode()) - } -} - -impl FromStr for DocumentKey { - type Err = Error; - - fn from_str(s: &str) -> Result { - Self::decode(s) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_document_key_generate() { - let workspace_id = Uuid::new_v4(); - - let key = DocumentKey::generate(workspace_id); - - assert_eq!(key.workspace_id(), workspace_id); - // object_id should be a valid UUID v7 (starts with version nibble 7) - assert_eq!(key.object_id().get_version_num(), 7); - } - - #[test] - fn test_document_key_from_parts() { - let workspace_id = Uuid::new_v4(); - let object_id = Uuid::new_v4(); - - let key = DocumentKey::from_parts(workspace_id, object_id); - - assert_eq!(key.workspace_id(), workspace_id); - assert_eq!(key.object_id(), object_id); - } - - #[test] - fn test_document_key_display_is_base64() { - let workspace_id = Uuid::new_v4(); - - let key = DocumentKey::generate(workspace_id); - let encoded = key.to_string(); - - // URL-safe base64 without padding: 32 bytes → 43 chars - assert_eq!(encoded.len(), 43); - // Should only contain URL-safe base64 characters - assert!( - encoded - .chars() - .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') - ); - } - - #[test] - fn test_document_key_roundtrip() { - let workspace_id = Uuid::new_v4(); - let object_id = Uuid::new_v4(); - - let key = DocumentKey::from_parts(workspace_id, object_id); - let encoded = key.to_string(); - let decoded: DocumentKey = encoded.parse().unwrap(); - - assert_eq!(decoded.workspace_id(), workspace_id); - assert_eq!(decoded.object_id(), object_id); - assert_eq!(key, decoded); - } - - #[test] - fn test_document_key_uniqueness() { - let workspace_id = Uuid::new_v4(); - - // Generate multiple keys for the same workspace - let key1 = DocumentKey::generate(workspace_id); - let key2 = DocumentKey::generate(workspace_id); - - // Each should have a unique object_id - assert_ne!(key1.object_id(), key2.object_id()); - assert_ne!(key1.to_string(), key2.to_string()); - } - - #[test] - fn test_document_key_from_str_invalid() { - // Invalid base64 - assert!(DocumentKey::from_str("not-valid-base64!!!").is_err()); - - // Too short - assert!(DocumentKey::from_str("abc").is_err()); - - // Valid base64 but wrong length - assert!(DocumentKey::from_str("YWJjZGVm").is_err()); - } -} diff --git a/crates/nvisy-nats/src/object/document_store.rs b/crates/nvisy-nats/src/object/document_store.rs deleted file mode 100644 index a47b54d..0000000 --- a/crates/nvisy-nats/src/object/document_store.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Document file store for NATS object storage. - -use std::marker::PhantomData; - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; - -use super::document_bucket::DocumentBucket; -use super::document_key::DocumentKey; -use super::object_data::{GetResult, PutResult}; -use super::object_store::ObjectStore; -use crate::Result; - -/// A document file store that manages files in NATS object storage. -/// -/// This is a specialized wrapper around [`ObjectStore`] that uses -/// [`DocumentKey`] for addressing and provides document-specific operations. -/// -/// The store is generic over the bucket type, providing compile-time -/// type safety for bucket operations. -#[derive(Clone, Deref, DerefMut)] -pub struct DocumentStore { - #[deref] - #[deref_mut] - inner: ObjectStore, - _marker: PhantomData, -} - -impl DocumentStore { - /// Creates a new document store for the specified bucket type. - pub async fn new(jetstream: &jetstream::Context) -> Result { - let inner = ObjectStore::new(jetstream, B::NAME, B::MAX_AGE).await?; - Ok(Self { - inner, - _marker: PhantomData, - }) - } - - /// Streams data to the store while computing SHA-256 hash on-the-fly. - /// - /// This method does not buffer the entire content in memory, making it - /// suitable for large file uploads. - pub async fn put(&self, key: &DocumentKey, reader: R) -> Result - where - R: tokio::io::AsyncRead + Unpin, - { - self.inner.put(&key.to_string(), reader).await - } - - /// Gets an object from the store as a stream. - /// - /// Returns `None` if the object doesn't exist. - /// The returned reader implements `AsyncRead` for streaming the content. - pub async fn get(&self, key: &DocumentKey) -> Result> { - self.inner.get(&key.to_string()).await - } - - /// Deletes an object from the store using a document key. - pub async fn delete(&self, key: &DocumentKey) -> Result<()> { - self.inner.delete(&key.to_string()).await - } - - /// Checks if an object exists using a document key. - pub async fn exists(&self, key: &DocumentKey) -> Result { - self.inner.exists(&key.to_string()).await - } - - /// Returns the bucket name for this store. - #[inline] - pub fn bucket(&self) -> &'static str { - B::NAME - } -} diff --git a/crates/nvisy-nats/src/object/mod.rs b/crates/nvisy-nats/src/object/mod.rs index 371bd08..d880736 100644 --- a/crates/nvisy-nats/src/object/mod.rs +++ b/crates/nvisy-nats/src/object/mod.rs @@ -6,42 +6,32 @@ //! //! # Architecture //! -//! ## Generic Store -//! - [`ObjectStore`] - Generic object store wrapper with streaming support +//! ## Store +//! - [`ObjectStore`] - Type-safe object store with bucket and key configuration //! -//! ## Document Storage -//! - [`DocumentStore`] - Specialized store for document files -//! - [`DocumentKey`] - Unique key for documents (workspace + object ID) +//! ## Key Types +//! - [`FileKey`] - Unique key for files (workspace + object ID) +//! - [`AccountKey`] - Key for account-scoped objects (account ID) //! -//! ## Avatar Storage -//! - [`AvatarStore`] - Specialized store for account avatars -//! - [`AvatarKey`] - Key for avatars (account ID) -//! -//! ## Thumbnail Storage -//! - [`ThumbnailStore`] - Specialized store for document thumbnails -//! - Uses [`DocumentKey`] for addressing +//! ## Bucket Types +//! - [`FilesBucket`] - Primary file storage (no expiration) +//! - [`IntermediatesBucket`] - Temporary processing artifacts (7 day TTL) +//! - [`ThumbnailsBucket`] - Document thumbnails (no expiration) +//! - [`AvatarsBucket`] - Account avatars (no expiration) //! //! ## Common Types //! - [`PutResult`] - Result of upload operations with size and SHA-256 hash //! - [`GetResult`] - Result of download operations with streaming reader -mod avatar_bucket; -mod avatar_key; -mod avatar_store; -mod document_bucket; -mod document_key; -mod document_store; mod hashing_reader; +mod object_bucket; mod object_data; +mod object_key; mod object_store; -mod thumbnail_bucket; -mod thumbnail_store; -pub use avatar_key::AvatarKey; -pub use avatar_store::AvatarStore; -pub use document_bucket::{DocumentBucket, Files, Intermediates}; -pub use document_key::DocumentKey; -pub use document_store::DocumentStore; +pub use object_bucket::{ + AvatarsBucket, FilesBucket, IntermediatesBucket, ObjectBucket, ThumbnailsBucket, +}; pub use object_data::{GetResult, PutResult}; +pub use object_key::{AccountKey, FileKey, ObjectKey}; pub use object_store::ObjectStore; -pub use thumbnail_store::ThumbnailStore; diff --git a/crates/nvisy-nats/src/object/object_bucket.rs b/crates/nvisy-nats/src/object/object_bucket.rs new file mode 100644 index 0000000..c4acc3f --- /dev/null +++ b/crates/nvisy-nats/src/object/object_bucket.rs @@ -0,0 +1,84 @@ +//! Object bucket configuration for NATS object storage. + +use std::time::Duration; + +/// Marker trait for object storage buckets. +/// +/// This trait defines the configuration for a NATS object storage bucket, +/// including its name and optional TTL for objects. +pub trait ObjectBucket: Clone + Send + Sync + 'static { + /// Bucket name used in NATS object storage. + const NAME: &'static str; + + /// Maximum age for objects in this bucket. + /// Returns `None` for buckets where objects should not expire. + const MAX_AGE: Option; +} + +/// Primary file storage for uploaded and processed files. +/// +/// No expiration, files are retained indefinitely. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct FilesBucket; + +impl ObjectBucket for FilesBucket { + const MAX_AGE: Option = None; + const NAME: &'static str = "DOCUMENT_FILES"; +} + +/// Temporary storage for intermediate processing artifacts. +/// +/// Files expire after 7 days. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct IntermediatesBucket; + +impl ObjectBucket for IntermediatesBucket { + const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); + const NAME: &'static str = "DOCUMENT_INTERMEDIATES"; +} + +/// Storage for document thumbnails. +/// +/// No expiration, thumbnails are retained indefinitely. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct ThumbnailsBucket; + +impl ObjectBucket for ThumbnailsBucket { + const MAX_AGE: Option = None; + const NAME: &'static str = "DOCUMENT_THUMBNAILS"; +} + +/// Storage for account avatars. +/// +/// No expiration, avatars are retained indefinitely. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct AvatarsBucket; + +impl ObjectBucket for AvatarsBucket { + const MAX_AGE: Option = None; + const NAME: &'static str = "ACCOUNT_AVATARS"; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bucket_names() { + assert_eq!(FilesBucket::NAME, "DOCUMENT_FILES"); + assert_eq!(IntermediatesBucket::NAME, "DOCUMENT_INTERMEDIATES"); + assert_eq!(ThumbnailsBucket::NAME, "DOCUMENT_THUMBNAILS"); + assert_eq!(AvatarsBucket::NAME, "ACCOUNT_AVATARS"); + } + + #[test] + fn test_bucket_max_age() { + assert_eq!(FilesBucket::MAX_AGE, None); + assert_eq!( + IntermediatesBucket::MAX_AGE, + Some(Duration::from_secs(7 * 24 * 60 * 60)) + ); + assert_eq!(ThumbnailsBucket::MAX_AGE, None); + assert_eq!(AvatarsBucket::MAX_AGE, None); + } +} diff --git a/crates/nvisy-nats/src/object/object_data.rs b/crates/nvisy-nats/src/object/object_data.rs index a176be9..26fef58 100644 --- a/crates/nvisy-nats/src/object/object_data.rs +++ b/crates/nvisy-nats/src/object/object_data.rs @@ -11,21 +11,14 @@ pub struct PutResult { size: u64, /// SHA-256 hash computed during streaming. sha256: Vec, - /// SHA-256 hash as hex string. - sha256_hex: String, /// NATS object unique identifier. nuid: String, } impl PutResult { /// Creates a new put result. - pub(crate) fn new(size: u64, sha256: Vec, sha256_hex: String, nuid: String) -> Self { - Self { - size, - sha256, - sha256_hex, - nuid, - } + pub(crate) fn new(size: u64, sha256: Vec, nuid: String) -> Self { + Self { size, sha256, nuid } } /// Returns the size in bytes. @@ -42,8 +35,8 @@ impl PutResult { /// Returns the SHA-256 hash as a hex string. #[inline] - pub fn sha256_hex(&self) -> &str { - &self.sha256_hex + pub fn sha256_hex(&self) -> String { + hex::encode(&self.sha256) } /// Returns the NATS object unique identifier. @@ -109,11 +102,12 @@ mod tests { #[test] fn test_put_result_getters() { - let result = PutResult::new(1024, vec![0u8; 32], "0".repeat(64), "test-nuid".to_string()); + let result = PutResult::new(1024, vec![0u8; 32], "test-nuid".to_string()); assert_eq!(result.size(), 1024); assert_eq!(result.sha256().len(), 32); assert_eq!(result.sha256_hex().len(), 64); + assert_eq!(result.sha256_hex(), "0".repeat(64)); assert_eq!(result.nuid(), "test-nuid"); } } diff --git a/crates/nvisy-nats/src/object/object_key.rs b/crates/nvisy-nats/src/object/object_key.rs new file mode 100644 index 0000000..16fe2af --- /dev/null +++ b/crates/nvisy-nats/src/object/object_key.rs @@ -0,0 +1,282 @@ +//! Object key types for NATS object storage. + +use std::fmt; +use std::str::FromStr; + +use base64::prelude::*; +use uuid::Uuid; + +use crate::{Error, Result}; + +/// Trait for object storage keys. +/// +/// Keys must be convertible to/from strings for storage addressing. +/// Each key type has a prefix that organizes objects by type in the bucket. +pub trait ObjectKey: fmt::Display + FromStr + Clone + Send + Sync + 'static { + /// The prefix for this key type (e.g., "file_", "account_"). + const PREFIX: &'static str; +} + +/// A validated key for file objects in NATS object storage. +/// +/// The key is encoded as `file_` prefix followed by URL-safe base64 of the +/// concatenated workspace ID and object ID. This produces a key like +/// `file_ABC123...` from two UUIDs (32 bytes → base64). +/// +/// The `object_id` is a UUID v7 generated at upload time, providing: +/// - Time-ordered keys for efficient storage and retrieval +/// - Guaranteed uniqueness within the workspace +/// - No collision with database-generated IDs +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FileKey { + pub workspace_id: Uuid, + pub object_id: Uuid, +} + +impl ObjectKey for FileKey { + const PREFIX: &'static str = "file_"; +} + +impl FileKey { + /// Generates a new file key with a fresh UUID v7 object ID. + /// + /// Uses UUID v7 which is time-ordered and contains randomness, + /// making keys both sortable and collision-resistant. + pub fn generate(workspace_id: Uuid) -> Self { + Self { + workspace_id, + object_id: Uuid::now_v7(), + } + } + + /// Creates a file key from existing IDs (for parsing stored keys). + pub fn from_parts(workspace_id: Uuid, object_id: Uuid) -> Self { + Self { + workspace_id, + object_id, + } + } + + /// Regenerates the object ID with a fresh UUID v7. + /// + /// This is useful when creating a new version of a file + /// while keeping the same workspace association. + pub fn regenerate(&mut self) { + self.object_id = Uuid::now_v7(); + } + + /// Encodes the key payload as URL-safe base64. + fn encode_payload(&self) -> String { + let mut bytes = [0u8; 32]; + bytes[..16].copy_from_slice(self.workspace_id.as_bytes()); + bytes[16..].copy_from_slice(self.object_id.as_bytes()); + BASE64_URL_SAFE_NO_PAD.encode(bytes) + } + + /// Decodes a key payload from URL-safe base64. + fn decode_payload(s: &str) -> Result { + let bytes = BASE64_URL_SAFE_NO_PAD.decode(s).map_err(|e| { + Error::operation("parse_key", format!("Invalid base64 encoding: {}", e)) + })?; + + if bytes.len() != 32 { + return Err(Error::operation( + "parse_key", + format!("Invalid key length: expected 32 bytes, got {}", bytes.len()), + )); + } + + let workspace_id = Uuid::from_slice(&bytes[..16]) + .map_err(|e| Error::operation("parse_key", format!("Invalid workspace UUID: {}", e)))?; + + let object_id = Uuid::from_slice(&bytes[16..]) + .map_err(|e| Error::operation("parse_key", format!("Invalid object UUID: {}", e)))?; + + Ok(Self::from_parts(workspace_id, object_id)) + } +} + +impl fmt::Display for FileKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}{}", Self::PREFIX, self.encode_payload()) + } +} + +impl FromStr for FileKey { + type Err = Error; + + fn from_str(s: &str) -> Result { + let payload = s.strip_prefix(Self::PREFIX).ok_or_else(|| { + Error::operation( + "parse_key", + format!("Invalid key prefix: expected '{}'", Self::PREFIX), + ) + })?; + Self::decode_payload(payload) + } +} + +/// A validated key for account-scoped objects in NATS object storage. +/// +/// The key format is `account_` prefix followed by the account ID, +/// since these objects are uniquely identified by their owning account (e.g., avatars). +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct AccountKey { + pub account_id: Uuid, +} + +impl ObjectKey for AccountKey { + const PREFIX: &'static str = "account_"; +} + +impl AccountKey { + /// Creates a new account key. + pub fn new(account_id: Uuid) -> Self { + Self { account_id } + } +} + +impl fmt::Display for AccountKey { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}{}", Self::PREFIX, self.account_id) + } +} + +impl FromStr for AccountKey { + type Err = Error; + + fn from_str(s: &str) -> Result { + let payload = s.strip_prefix(Self::PREFIX).ok_or_else(|| { + Error::operation( + "parse_key", + format!("Invalid key prefix: expected '{}'", Self::PREFIX), + ) + })?; + let account_id = Uuid::parse_str(payload) + .map_err(|e| Error::operation("parse_key", format!("Invalid account UUID: {}", e)))?; + Ok(Self::new(account_id)) + } +} + +impl From for AccountKey { + fn from(account_id: Uuid) -> Self { + Self::new(account_id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + mod file_key { + use super::*; + + #[test] + fn test_prefix() { + assert_eq!(FileKey::PREFIX, "file_"); + } + + #[test] + fn test_generate() { + let workspace_id = Uuid::new_v4(); + let key = FileKey::generate(workspace_id); + + assert_eq!(key.workspace_id, workspace_id); + assert_eq!(key.object_id.get_version_num(), 7); + } + + #[test] + fn test_from_parts() { + let workspace_id = Uuid::new_v4(); + let object_id = Uuid::now_v7(); + let key = FileKey::from_parts(workspace_id, object_id); + + assert_eq!(key.workspace_id, workspace_id); + assert_eq!(key.object_id, object_id); + } + + #[test] + fn test_display_has_prefix() { + let workspace_id = Uuid::new_v4(); + let key = FileKey::generate(workspace_id); + let encoded = key.to_string(); + + assert!(encoded.starts_with("file_")); + // prefix (5) + base64 (43) = 48 + assert_eq!(encoded.len(), 48); + } + + #[test] + fn test_roundtrip() { + let workspace_id = Uuid::new_v4(); + let object_id = Uuid::new_v4(); + + let key = FileKey::from_parts(workspace_id, object_id); + let encoded = key.to_string(); + let decoded: FileKey = encoded.parse().unwrap(); + + assert_eq!(decoded.workspace_id, workspace_id); + assert_eq!(decoded.object_id, object_id); + assert_eq!(key, decoded); + } + + #[test] + fn test_from_str_invalid_prefix() { + assert!(FileKey::from_str("account_abc").is_err()); + assert!(FileKey::from_str("abc").is_err()); + } + } + + mod account_key { + use super::*; + + #[test] + fn test_prefix() { + assert_eq!(AccountKey::PREFIX, "account_"); + } + + #[test] + fn test_new() { + let account_id = Uuid::new_v4(); + let key = AccountKey::new(account_id); + assert_eq!(key.account_id, account_id); + } + + #[test] + fn test_display_has_prefix() { + let account_id = Uuid::new_v4(); + let key = AccountKey::new(account_id); + let encoded = key.to_string(); + + assert!(encoded.starts_with("account_")); + assert_eq!(encoded, format!("account_{}", account_id)); + } + + #[test] + fn test_roundtrip() { + let account_id = Uuid::new_v4(); + let key = AccountKey::new(account_id); + let encoded = key.to_string(); + let decoded: AccountKey = encoded.parse().unwrap(); + assert_eq!(decoded.account_id, account_id); + } + + #[test] + fn test_from_uuid() { + let account_id = Uuid::new_v4(); + let key: AccountKey = account_id.into(); + assert_eq!(key.account_id, account_id); + } + + #[test] + fn test_from_str_invalid_prefix() { + assert!(AccountKey::from_str("file_abc").is_err()); + assert!(AccountKey::from_str("abc").is_err()); + } + + #[test] + fn test_from_str_invalid_uuid() { + assert!(AccountKey::from_str("account_not-a-uuid").is_err()); + } + } +} diff --git a/crates/nvisy-nats/src/object/object_store.rs b/crates/nvisy-nats/src/object/object_store.rs index 5e00784..7b23336 100644 --- a/crates/nvisy-nats/src/object/object_store.rs +++ b/crates/nvisy-nats/src/object/object_store.rs @@ -1,7 +1,7 @@ -//! Generic object store wrapper for NATS JetStream. +//! Generic object store for NATS JetStream. +use std::marker::PhantomData; use std::sync::Arc; -use std::time::Duration; use async_nats::jetstream; use async_nats::jetstream::context::ObjectStoreErrorKind; @@ -9,65 +9,71 @@ use async_nats::jetstream::object_store::{self, ObjectInfo}; use tokio::io::AsyncRead; use super::hashing_reader::HashingReader; +use super::object_bucket::ObjectBucket; use super::object_data::{GetResult, PutResult}; +use super::object_key::ObjectKey; use crate::{Error, Result}; /// Tracing target for object store operations. const TRACING_TARGET: &str = "nvisy_nats::object_store"; -/// A generic object store that manages files in NATS object storage. +/// A type-safe object store that manages objects in NATS object storage. /// /// This store provides streaming upload capabilities with on-the-fly /// SHA-256 hash computation. +/// +/// The store is generic over: +/// - `B`: The bucket type (determines storage location and TTL) +/// - `K`: The key type (determines how objects are addressed) #[derive(Clone)] -pub struct ObjectStore { +pub struct ObjectStore +where + B: ObjectBucket, + K: ObjectKey, +{ inner: Arc, - bucket: Arc, + _marker: PhantomData<(B, K)>, } -impl ObjectStore { - /// Creates a new object store for the specified bucket. - /// - /// If `max_age` is `None`, objects will not expire. - pub async fn new( - jetstream: &jetstream::Context, - bucket: impl Into, - max_age: Option, - ) -> Result { - let bucket = bucket.into(); - +impl ObjectStore +where + B: ObjectBucket, + K: ObjectKey, +{ + /// Creates a new object store for the specified bucket type. + pub(crate) async fn new(jetstream: &jetstream::Context) -> Result { tracing::debug!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, "Initializing object store" ); - let store = match jetstream.get_object_store(&bucket).await { + let store = match jetstream.get_object_store(B::NAME).await { Ok(store) => { tracing::debug!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, "Retrieved existing object store" ); store } Err(e) if matches!(e.kind(), ObjectStoreErrorKind::GetStore) => { let config = object_store::Config { - bucket: bucket.clone(), - max_age: max_age.unwrap_or_default(), + bucket: B::NAME.to_string(), + max_age: B::MAX_AGE.unwrap_or_default(), ..Default::default() }; tracing::info!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, "Creating new object store" ); jetstream.create_object_store(config).await.map_err(|e| { tracing::error!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, error = %e, "Failed to create object store" ); @@ -77,7 +83,7 @@ impl ObjectStore { Err(e) => { tracing::error!( target: TRACING_TARGET, - bucket = %bucket, + bucket = %B::NAME, error = %e, "Failed to get object store" ); @@ -87,32 +93,35 @@ impl ObjectStore { Ok(Self { inner: Arc::new(store), - bucket: Arc::new(bucket), + _marker: PhantomData, }) } /// Returns the bucket name. - pub fn bucket(&self) -> &str { - &self.bucket + #[inline] + pub fn bucket(&self) -> &'static str { + B::NAME } /// Streams data to the store while computing SHA-256 hash on-the-fly. /// /// This method does not buffer the entire content in memory, making it /// suitable for large file uploads. - pub async fn put(&self, key: &str, reader: R) -> Result + pub async fn put(&self, key: &K, reader: R) -> Result where R: AsyncRead + Unpin, { + let key_str = key.to_string(); + tracing::debug!( target: TRACING_TARGET, - key = %key, - bucket = %self.bucket, + key = %key_str, + bucket = %B::NAME, "Starting streaming upload" ); let meta = object_store::ObjectMetadata { - name: key.to_string(), + name: key_str.clone(), ..Default::default() }; @@ -125,7 +134,7 @@ impl ObjectStore { .map_err(|e| { tracing::error!( target: TRACING_TARGET, - key = %key, + key = %key_str, error = %e, "Failed to upload object" ); @@ -133,34 +142,29 @@ impl ObjectStore { })?; let sha256 = hashing_reader.finalize(); - let sha256_hex = hex::encode(sha256); tracing::info!( target: TRACING_TARGET, - key = %key, + key = %key_str, size = info.size, - sha256 = %sha256_hex, nuid = %info.nuid, "Streaming upload complete" ); - Ok(PutResult::new( - info.size as u64, - sha256.to_vec(), - sha256_hex, - info.nuid, - )) + Ok(PutResult::new(info.size as u64, sha256.to_vec(), info.nuid)) } /// Gets an object from the store as a stream. /// /// Returns `None` if the object doesn't exist. /// The returned reader implements `AsyncRead` for streaming the content. - pub async fn get(&self, key: &str) -> Result> { + pub async fn get(&self, key: &K) -> Result> { + let key_str = key.to_string(); + tracing::debug!( target: TRACING_TARGET, - key = %key, - bucket = %self.bucket, + key = %key_str, + bucket = %B::NAME, "Getting object" ); @@ -170,11 +174,11 @@ impl ObjectStore { None => return Ok(None), }; - match self.inner.get(key).await { + match self.inner.get(&key_str).await { Ok(reader) => { tracing::debug!( target: TRACING_TARGET, - key = %key, + key = %key_str, size = info.size, "Object stream opened" ); @@ -186,14 +190,14 @@ impl ObjectStore { if error_str.contains("not found") || error_str.contains("no message found") { tracing::debug!( target: TRACING_TARGET, - key = %key, + key = %key_str, "Object not found" ); Ok(None) } else { tracing::error!( target: TRACING_TARGET, - key = %key, + key = %key_str, error = %e, "Failed to get object" ); @@ -204,8 +208,10 @@ impl ObjectStore { } /// Gets object info without downloading the content. - pub async fn info(&self, key: &str) -> Result> { - match self.inner.info(key).await { + pub async fn info(&self, key: &K) -> Result> { + let key_str = key.to_string(); + + match self.inner.info(&key_str).await { Ok(info) => Ok(Some(info)), Err(e) => { let error_str = e.to_string(); @@ -219,18 +225,20 @@ impl ObjectStore { } /// Deletes an object from the store. - pub async fn delete(&self, key: &str) -> Result<()> { + pub async fn delete(&self, key: &K) -> Result<()> { + let key_str = key.to_string(); + tracing::debug!( target: TRACING_TARGET, - key = %key, - bucket = %self.bucket, + key = %key_str, + bucket = %B::NAME, "Deleting object" ); - self.inner.delete(key).await.map_err(|e| { + self.inner.delete(&key_str).await.map_err(|e| { tracing::error!( target: TRACING_TARGET, - key = %key, + key = %key_str, error = %e, "Failed to delete object" ); @@ -239,7 +247,7 @@ impl ObjectStore { tracing::info!( target: TRACING_TARGET, - key = %key, + key = %key_str, "Object deleted" ); @@ -247,7 +255,7 @@ impl ObjectStore { } /// Checks if an object exists. - pub async fn exists(&self, key: &str) -> Result { + pub async fn exists(&self, key: &K) -> Result { Ok(self.info(key).await?.is_some()) } } diff --git a/crates/nvisy-nats/src/object/thumbnail_bucket.rs b/crates/nvisy-nats/src/object/thumbnail_bucket.rs deleted file mode 100644 index 5f4b9d2..0000000 --- a/crates/nvisy-nats/src/object/thumbnail_bucket.rs +++ /dev/null @@ -1,9 +0,0 @@ -//! Thumbnail bucket constants for NATS object storage. - -use std::time::Duration; - -/// Bucket name for document thumbnails. -pub const THUMBNAIL_BUCKET: &str = "DOCUMENT_THUMBNAILS"; - -/// Maximum age for thumbnails (none - retained indefinitely). -pub const THUMBNAIL_MAX_AGE: Option = None; diff --git a/crates/nvisy-nats/src/object/thumbnail_store.rs b/crates/nvisy-nats/src/object/thumbnail_store.rs deleted file mode 100644 index 464afc1..0000000 --- a/crates/nvisy-nats/src/object/thumbnail_store.rs +++ /dev/null @@ -1,59 +0,0 @@ -//! Thumbnail store for NATS object storage. - -use async_nats::jetstream; -use derive_more::{Deref, DerefMut}; - -use super::document_key::DocumentKey; -use super::object_data::{GetResult, PutResult}; -use super::object_store::ObjectStore; -use super::thumbnail_bucket::{THUMBNAIL_BUCKET, THUMBNAIL_MAX_AGE}; -use crate::Result; - -/// A thumbnail store that manages document thumbnails in NATS object storage. -/// -/// Uses [`DocumentKey`] for addressing (same key format as document files). -#[derive(Clone, Deref, DerefMut)] -pub struct ThumbnailStore { - #[deref] - #[deref_mut] - inner: ObjectStore, -} - -impl ThumbnailStore { - /// Creates a new thumbnail store. - pub async fn new(jetstream: &jetstream::Context) -> Result { - let inner = ObjectStore::new(jetstream, THUMBNAIL_BUCKET, THUMBNAIL_MAX_AGE).await?; - Ok(Self { inner }) - } - - /// Streams thumbnail data to the store while computing SHA-256 hash on-the-fly. - pub async fn put(&self, key: &DocumentKey, reader: R) -> Result - where - R: tokio::io::AsyncRead + Unpin, - { - self.inner.put(&key.to_string(), reader).await - } - - /// Gets a thumbnail from the store as a stream. - /// - /// Returns `None` if the thumbnail doesn't exist. - pub async fn get(&self, key: &DocumentKey) -> Result> { - self.inner.get(&key.to_string()).await - } - - /// Deletes a thumbnail from the store. - pub async fn delete(&self, key: &DocumentKey) -> Result<()> { - self.inner.delete(&key.to_string()).await - } - - /// Checks if a thumbnail exists. - pub async fn exists(&self, key: &DocumentKey) -> Result { - self.inner.exists(&key.to_string()).await - } - - /// Returns the bucket name. - #[inline] - pub fn bucket(&self) -> &'static str { - THUMBNAIL_BUCKET - } -} diff --git a/crates/nvisy-nats/src/stream/document_job.rs b/crates/nvisy-nats/src/stream/document_job.rs deleted file mode 100644 index a351305..0000000 --- a/crates/nvisy-nats/src/stream/document_job.rs +++ /dev/null @@ -1,470 +0,0 @@ -//! Document job types for file processing pipeline. - -use jiff::Timestamp; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::de::DeserializeOwned; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::document_task::PredefinedTask; -use super::event::EventPriority; - -/// Stream name for document jobs. -pub const STREAM_NAME: &str = "DOCUMENT_JOBS"; - -/// Marker trait for document processing stages. -/// -/// Each stage represents a distinct phase in the document processing pipeline, -/// with its own stream subject for NATS routing. -pub trait Stage: Serialize + DeserializeOwned + Clone + Send + Sync + 'static { - /// Stage name for logging and debugging. - const NAME: &'static str; - /// NATS stream subject suffix for this stage. - const SUBJECT: &'static str; -} - -/// Preprocessing stage data. -/// -/// Runs when a user uploads a file. Prepares the file for future processing: -/// - Format detection and validation -/// - File integrity checks -/// - Metadata extraction and fixes -/// - Thumbnail generation -/// - OCR for scanned documents -/// - Embedding generation for knowledge base / semantic search -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct PreprocessingData { - /// Whether to validate and fix file metadata. Defaults to true. - #[serde(default = "default_true", skip_serializing_if = "is_true")] - pub validate_metadata: bool, - /// Whether to run OCR on the document. Defaults to true. - #[serde(default = "default_true", skip_serializing_if = "is_true")] - pub run_ocr: bool, - /// Whether to generate embeddings for semantic search. Defaults to true. - #[serde(default = "default_true", skip_serializing_if = "is_true")] - pub generate_embeddings: bool, - /// Whether to generate thumbnails for UI previews. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub generate_thumbnails: Option, -} - -impl Default for PreprocessingData { - fn default() -> Self { - Self { - validate_metadata: true, - run_ocr: true, - generate_embeddings: true, - generate_thumbnails: None, - } - } -} - -impl Stage for PreprocessingData { - const NAME: &'static str = "preprocessing"; - const SUBJECT: &'static str = "preprocessing"; -} - -/// Processing stage data. -/// -/// Runs when a user requests changes to the document. Changes are typically -/// a collection of annotations (notes, highlights, comments) that need to be -/// applied using VLM pipelines. -#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct ProcessingData { - /// The main VLM prompt/instruction for processing. - #[serde(default, skip_serializing_if = "String::is_empty")] - pub prompt: String, - /// Additional context for the VLM. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub context: Option, - /// Annotation IDs to process. None means process all annotations. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub annotation_ids: Option>, - /// Other files to use as context (e.g., "make this look like that"). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub reference_file_ids: Option>, - /// Predefined processing tasks to apply. - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub tasks: Vec, - /// Processing quality level. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub quality: Option, - /// Whether to process in chunks for large files. Defaults to false. - #[serde(default, skip_serializing_if = "is_false")] - pub chunk_processing: bool, - /// Custom processing parameters. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub custom_params: Option, -} - -impl Stage for ProcessingData { - const NAME: &'static str = "processing"; - const SUBJECT: &'static str = "processing"; -} - -/// Postprocessing stage data. -/// -/// Runs when a user downloads the file. Prepares the final output: -/// - Format conversion to requested format -/// - Compression settings -/// - Cleanup of temporary artifacts -#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct PostprocessingData { - /// Target format for the output file. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub target_format: Option, - /// Compression level for output file. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub compression_level: Option, - /// Whether to burn annotations into the document vs keeping as metadata. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub flatten_annotations: Option, - /// Cleanup tasks to perform. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub cleanup_tasks: Option>, -} - -impl Stage for PostprocessingData { - const NAME: &'static str = "postprocessing"; - const SUBJECT: &'static str = "postprocessing"; -} - -/// Processing quality level. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub enum ProcessingQuality { - /// Fast processing with lower quality. - Fast, - /// Balanced speed and quality. - Balanced, - /// High quality, slower processing. - High, -} - -/// Compression level for output files. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub enum CompressionLevel { - /// No compression. - None, - /// Medium compression, balanced. - Normal, - /// High compression, slower but smaller files. - High, -} - -/// Document processing job. -/// -/// Represents a unit of work in the document processing pipeline. -/// Each job targets a specific file and is typed by its processing stage. -/// -/// The generic parameter `S` determines the stage (preprocessing, processing, -/// or postprocessing), enabling compile-time type safety and stage-specific -/// stream routing. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(bound = "")] -pub struct DocumentJob { - /// Unique job identifier (UUID v7 for time-ordering). - pub id: Uuid, - /// Database file ID to process. - pub file_id: Uuid, - /// Storage path in NATS object store (DocumentKey encoded). - pub object_key: String, - /// File extension for format detection. - pub file_extension: String, - /// Stage-specific data. - pub data: S, - /// Job priority. - pub priority: EventPriority, - /// When the job was created. - pub created_at: Timestamp, - /// NATS subject to publish result to (for internal job chaining). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub callback_subject: Option, - /// Idempotency key to prevent duplicate job processing. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub idempotency_key: Option, -} - -impl DocumentJob { - /// Creates a new document job with the given stage data. - pub fn new(file_id: Uuid, storage_path: String, file_extension: String, data: S) -> Self { - Self { - id: Uuid::now_v7(), - file_id, - object_key: storage_path, - file_extension, - data, - priority: EventPriority::Normal, - created_at: Timestamp::now(), - callback_subject: None, - idempotency_key: None, - } - } - - /// Sets the job priority. - pub fn with_priority(mut self, priority: EventPriority) -> Self { - self.priority = priority; - self - } - - /// Sets a callback subject for job chaining. - pub fn with_callback(mut self, subject: impl Into) -> Self { - self.callback_subject = Some(subject.into()); - self - } - - /// Sets an idempotency key. - pub fn with_idempotency_key(mut self, key: impl Into) -> Self { - self.idempotency_key = Some(key.into()); - self - } - - /// Returns the file ID. - #[inline] - pub fn file_id(&self) -> Uuid { - self.file_id - } - - /// Returns the storage path. - #[inline] - pub fn storage_path(&self) -> &str { - &self.object_key - } - - /// Returns the file extension. - #[inline] - pub fn file_extension(&self) -> &str { - &self.file_extension - } - - /// Returns a reference to the stage data. - #[inline] - pub fn data(&self) -> &S { - &self.data - } - - /// Returns the stage name. - #[inline] - pub fn stage_name(&self) -> &'static str { - S::NAME - } - - /// Returns the stream subject for this job's stage. - #[inline] - pub fn subject(&self) -> &'static str { - S::SUBJECT - } - - /// Returns job age since creation. - pub fn age(&self) -> std::time::Duration { - let now = Timestamp::now(); - let signed_dur = now.duration_since(self.created_at); - std::time::Duration::from_secs(signed_dur.as_secs().max(0) as u64) - } -} - -fn default_true() -> bool { - true -} - -fn is_true(value: &bool) -> bool { - *value -} - -fn is_false(value: &bool) -> bool { - !*value -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_preprocessing_job_new() { - let file_id = Uuid::now_v7(); - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - PreprocessingData::default(), - ); - - assert_eq!(job.file_id(), file_id); - assert_eq!(job.storage_path(), "storage/path"); - assert_eq!(job.file_extension(), "pdf"); - assert_eq!(job.stage_name(), "preprocessing"); - assert_eq!(job.subject(), "preprocessing"); - } - - #[test] - fn test_preprocessing_defaults() { - let data = PreprocessingData::default(); - assert!(data.validate_metadata); - assert!(data.run_ocr); - assert!(data.generate_embeddings); - assert!(data.generate_thumbnails.is_none()); - } - - #[test] - fn test_preprocessing_serialization_skips_defaults() { - let data = PreprocessingData::default(); - let json = serde_json::to_string(&data).unwrap(); - // Should be minimal since defaults are skipped - assert_eq!(json, "{}"); - - // Parsing empty object should give defaults - let parsed: PreprocessingData = serde_json::from_str("{}").unwrap(); - assert!(parsed.validate_metadata); - assert!(parsed.run_ocr); - assert!(parsed.generate_embeddings); - } - - #[test] - fn test_processing_job_with_prompt() { - let file_id = Uuid::now_v7(); - - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - ProcessingData { - prompt: "Apply the highlighted changes".to_string(), - context: Some("This is a legal document".to_string()), - annotation_ids: None, // Process all annotations - tasks: vec![PredefinedTask::Proofread], - ..Default::default() - }, - ); - - assert_eq!(job.stage_name(), "processing"); - assert_eq!(job.data().prompt, "Apply the highlighted changes"); - assert_eq!( - job.data().context, - Some("This is a legal document".to_string()) - ); - assert!(job.data().annotation_ids.is_none()); - assert_eq!(job.data().tasks.len(), 1); - } - - #[test] - fn test_predefined_task_redact() { - let task = PredefinedTask::Redact { - patterns: vec!["email".to_string(), "phone".to_string()], - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_translate() { - let task = PredefinedTask::Translate { - target_language: "es".to_string(), - }; - - let json = serde_json::to_string(&task).unwrap(); - assert!(json.contains("translate")); - assert!(json.contains("es")); - } - - #[test] - fn test_postprocessing_job() { - let file_id = Uuid::now_v7(); - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - PostprocessingData { - target_format: Some("docx".to_string()), - compression_level: Some(CompressionLevel::Normal), - ..Default::default() - }, - ); - - assert_eq!(job.stage_name(), "postprocessing"); - assert_eq!(job.data().target_format, Some("docx".to_string())); - assert_eq!(job.data().compression_level, Some(CompressionLevel::Normal)); - } - - #[test] - fn test_job_with_callback_and_idempotency() { - let file_id = Uuid::now_v7(); - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - PreprocessingData::default(), - ) - .with_callback("results.preprocessing") - .with_idempotency_key("upload-123"); - - assert_eq!( - job.callback_subject, - Some("results.preprocessing".to_string()) - ); - assert_eq!(job.idempotency_key, Some("upload-123".to_string())); - } - - #[test] - fn test_job_serialization_roundtrip() { - let file_id = Uuid::now_v7(); - let job = DocumentJob::new( - file_id, - "storage/path".to_string(), - "pdf".to_string(), - PreprocessingData { - validate_metadata: true, - run_ocr: true, - generate_embeddings: true, - generate_thumbnails: Some(true), - }, - ); - - let json = serde_json::to_string(&job).unwrap(); - let parsed: DocumentJob = serde_json::from_str(&json).unwrap(); - - assert_eq!(job.file_id, parsed.file_id); - assert_eq!(job.data, parsed.data); - } - - #[test] - fn test_compression_level_serialization() { - let level = CompressionLevel::High; - let json = serde_json::to_string(&level).unwrap(); - assert_eq!(json, "\"high\""); - - let parsed: CompressionLevel = serde_json::from_str(&json).unwrap(); - assert_eq!(parsed, CompressionLevel::High); - } - - #[test] - fn test_processing_quality_serialization() { - let quality = ProcessingQuality::Fast; - let json = serde_json::to_string(&quality).unwrap(); - assert_eq!(json, "\"fast\""); - - let parsed: ProcessingQuality = serde_json::from_str(&json).unwrap(); - assert_eq!(parsed, ProcessingQuality::Fast); - } - - #[test] - fn test_stage_constants() { - assert_eq!(PreprocessingData::NAME, "preprocessing"); - assert_eq!(PreprocessingData::SUBJECT, "preprocessing"); - - assert_eq!(ProcessingData::NAME, "processing"); - assert_eq!(ProcessingData::SUBJECT, "processing"); - - assert_eq!(PostprocessingData::NAME, "postprocessing"); - assert_eq!(PostprocessingData::SUBJECT, "postprocessing"); - } -} diff --git a/crates/nvisy-nats/src/stream/document_job_pub.rs b/crates/nvisy-nats/src/stream/document_job_pub.rs deleted file mode 100644 index 0195427..0000000 --- a/crates/nvisy-nats/src/stream/document_job_pub.rs +++ /dev/null @@ -1,57 +0,0 @@ -//! Document job stream publisher. - -use std::marker::PhantomData; - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; - -use super::document_job::{DocumentJob, STREAM_NAME, Stage}; -use super::publisher::StreamPublisher; -use crate::Result; - -/// Generic document job publisher for a specific processing stage. -/// -/// This publisher routes jobs to stage-specific subjects within the -/// `DOCUMENT_JOBS` stream, enabling separate consumers per stage. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct DocumentJobPublisher { - #[deref] - #[deref_mut] - publisher: StreamPublisher>, - _marker: PhantomData, -} - -impl DocumentJobPublisher { - /// Create a new document job publisher for the specified stage. - pub async fn new(jetstream: &Context) -> Result { - let publisher = StreamPublisher::new(jetstream, STREAM_NAME).await?; - Ok(Self { - publisher, - _marker: PhantomData, - }) - } - - /// Publish a job to the stage-specific subject. - /// - /// Jobs are published to `DOCUMENT_JOBS.{stage}.{file_id}`. - pub async fn publish_job(&self, job: &DocumentJob) -> Result<()> { - let subject = format!("{}.{}", S::SUBJECT, job.file_id); - self.publisher.publish(&subject, job).await - } - - /// Publish a job with a custom subject suffix. - /// - /// Jobs are published to `DOCUMENT_JOBS.{stage}.{suffix}`. - pub async fn publish_job_with_subject(&self, job: &DocumentJob, suffix: &str) -> Result<()> { - let subject = format!("{}.{}", S::SUBJECT, suffix); - self.publisher.publish(&subject, job).await - } - - /// Publish multiple jobs in batch. - pub async fn publish_batch(&self, jobs: &[DocumentJob]) -> Result<()> { - // Group by file_id isn't needed since we use the stage subject - self.publisher - .publish_batch_parallel(S::SUBJECT, jobs, 10) - .await - } -} diff --git a/crates/nvisy-nats/src/stream/document_job_sub.rs b/crates/nvisy-nats/src/stream/document_job_sub.rs deleted file mode 100644 index edbbcc5..0000000 --- a/crates/nvisy-nats/src/stream/document_job_sub.rs +++ /dev/null @@ -1,66 +0,0 @@ -//! Document job stream subscriber. - -use std::marker::PhantomData; - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; - -use super::document_job::{DocumentJob, STREAM_NAME, Stage}; -use super::subscriber::StreamSubscriber; -use crate::Result; - -/// Generic document job subscriber for a specific processing stage. -/// -/// This subscriber filters jobs by stage-specific subjects within the -/// `DOCUMENT_JOBS` stream, enabling dedicated consumers per stage. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct DocumentJobSubscriber { - #[deref] - #[deref_mut] - subscriber: StreamSubscriber>, - _marker: PhantomData, -} - -impl DocumentJobSubscriber { - /// Create a new document job subscriber for the specified stage. - /// - /// The subscriber automatically filters to the stage-specific subject pattern. - pub async fn new(jetstream: &Context, consumer_name: &str) -> Result { - let filter_subject = format!("{}.{}.>", STREAM_NAME, S::SUBJECT); - let subscriber = StreamSubscriber::new(jetstream, STREAM_NAME, consumer_name) - .await? - .with_filter_subject(filter_subject); - Ok(Self { - subscriber, - _marker: PhantomData, - }) - } - - /// Create a subscriber without stage filtering (receives all stages). - /// - /// Note: This requires the job type to match at deserialization time, - /// so it's primarily useful for monitoring or debugging. - pub async fn new_unfiltered(jetstream: &Context, consumer_name: &str) -> Result { - let subscriber = StreamSubscriber::new(jetstream, STREAM_NAME, consumer_name).await?; - Ok(Self { - subscriber, - _marker: PhantomData, - }) - } - - /// Create a subscriber filtered to a specific file. - pub async fn new_for_file( - jetstream: &Context, - consumer_name: &str, - file_id: uuid::Uuid, - ) -> Result { - let filter_subject = format!("{}.{}.{}", STREAM_NAME, S::SUBJECT, file_id); - let subscriber = StreamSubscriber::new(jetstream, STREAM_NAME, consumer_name) - .await? - .with_filter_subject(filter_subject); - Ok(Self { - subscriber, - _marker: PhantomData, - }) - } -} diff --git a/crates/nvisy-nats/src/stream/document_task.rs b/crates/nvisy-nats/src/stream/document_task.rs deleted file mode 100644 index fc9c5b4..0000000 --- a/crates/nvisy-nats/src/stream/document_task.rs +++ /dev/null @@ -1,261 +0,0 @@ -//! Predefined document processing tasks. - -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Predefined processing tasks that can be applied to documents. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(tag = "task", rename_all = "camelCase")] -pub enum PredefinedTask { - /// Redact sensitive information matching patterns. - Redact { - /// Patterns to redact (emails, phone numbers, SSNs, etc.). - patterns: Vec, - }, - - /// Summarize document content. - Summarize { - /// Maximum length of summary. - #[serde(default, skip_serializing_if = "Option::is_none")] - max_length: Option, - }, - - /// Translate document to target language. - Translate { - /// Target language code (e.g., "es", "fr", "de"). - target_language: String, - }, - - /// Extract key information from document. - ExtractInfo { - /// Fields to extract (e.g., "dates", "names", "amounts"). - #[serde(default, skip_serializing_if = "Vec::is_empty")] - fields: Vec, - }, - - /// Insert information into document at specified locations. - InsertInfo { - /// Key-value pairs to insert. - values: Vec, - }, - - /// Generate information based on document content. - GenerateInfo { - /// Type of information to generate. - info_type: GenerateInfoType, - }, - - /// Reformat document structure. - Reformat { - /// Target format style. - #[serde(default, skip_serializing_if = "Option::is_none")] - style: Option, - }, - - /// Proofread and fix grammar/spelling. - Proofread, - - /// Generate table of contents. - GenerateToc, - - /// Split document into multiple files. - Split { - /// How to split the document. - strategy: SplitStrategy, - }, - - /// Merge multiple files into one document. - Merge { - /// File IDs to merge with this document. - file_ids: Vec, - /// Order of files in the merged document. - #[serde(default, skip_serializing_if = "Option::is_none")] - order: Option, - }, -} - -/// Value to insert into a document. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -pub struct InsertValue { - /// Field or placeholder name. - pub field: String, - /// Value to insert. - pub value: String, - /// Location hint (e.g., "header", "footer", "after:section1"). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub location: Option, -} - -/// Types of information that can be generated. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub enum GenerateInfoType { - /// Generate an executive summary. - ExecutiveSummary, - /// Generate keywords/tags. - Keywords, - /// Generate document metadata. - Metadata, - /// Generate abstract. - Abstract, - /// Generate key takeaways. - KeyTakeaways, - /// Generate action items. - ActionItems, - /// Generate FAQ from content. - Faq, -} - -/// Strategy for splitting documents. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(tag = "by", rename_all = "camelCase")] -pub enum SplitStrategy { - /// Split by page count. - Pages { - /// Number of pages per split. - pages_per_file: u32, - }, - /// Split by sections/chapters. - Sections, - /// Split by heading level. - Headings { - /// Heading level to split on (1-6). - level: u8, - }, - /// Split by file size. - Size { - /// Maximum size per file in bytes. - max_bytes: u64, - }, - /// Split at specific page numbers. - AtPages { - /// Page numbers to split at. - page_numbers: Vec, - }, -} - -/// Order for merging documents. -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "camelCase")] -pub enum MergeOrder { - /// Use the order provided in file_ids. - #[default] - AsProvided, - /// Sort by filename alphabetically. - Alphabetical, - /// Sort by creation date. - ByDate, - /// Sort by file size. - BySize, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_predefined_task_redact() { - let task = PredefinedTask::Redact { - patterns: vec!["email".to_string(), "phone".to_string()], - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_translate() { - let task = PredefinedTask::Translate { - target_language: "es".to_string(), - }; - - let json = serde_json::to_string(&task).unwrap(); - assert!(json.contains("translate")); - assert!(json.contains("es")); - } - - #[test] - fn test_predefined_task_split() { - let task = PredefinedTask::Split { - strategy: SplitStrategy::Pages { pages_per_file: 10 }, - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_merge() { - let task = PredefinedTask::Merge { - file_ids: vec![Uuid::now_v7(), Uuid::now_v7()], - order: Some(MergeOrder::Alphabetical), - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_insert_info() { - let task = PredefinedTask::InsertInfo { - values: vec![ - InsertValue { - field: "company_name".to_string(), - value: "Acme Corp".to_string(), - location: Some("header".to_string()), - }, - InsertValue { - field: "date".to_string(), - value: "2024-01-15".to_string(), - location: None, - }, - ], - }; - - let json = serde_json::to_string(&task).unwrap(); - let parsed: PredefinedTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task, parsed); - } - - #[test] - fn test_predefined_task_generate_info() { - let task = PredefinedTask::GenerateInfo { - info_type: GenerateInfoType::ExecutiveSummary, - }; - - let json = serde_json::to_string(&task).unwrap(); - assert!(json.contains("generateInfo")); - assert!(json.contains("executiveSummary")); - } - - #[test] - fn test_split_strategy_serialization() { - let strategies = vec![ - SplitStrategy::Pages { pages_per_file: 5 }, - SplitStrategy::Sections, - SplitStrategy::Headings { level: 2 }, - SplitStrategy::Size { - max_bytes: 1024 * 1024, - }, - SplitStrategy::AtPages { - page_numbers: vec![5, 10, 15], - }, - ]; - - for strategy in strategies { - let json = serde_json::to_string(&strategy).unwrap(); - let parsed: SplitStrategy = serde_json::from_str(&json).unwrap(); - assert_eq!(strategy, parsed); - } - } -} diff --git a/crates/nvisy-nats/src/stream/event.rs b/crates/nvisy-nats/src/stream/event.rs index 66af202..164e3d7 100644 --- a/crates/nvisy-nats/src/stream/event.rs +++ b/crates/nvisy-nats/src/stream/event.rs @@ -1,42 +1,79 @@ //! Event types for stream processing. //! -//! This module contains priority levels used across all event streams. +//! This module contains common event types and the file job type +//! used in processing pipelines. +use jiff::Timestamp; #[cfg(feature = "schema")] use schemars::JsonSchema; +use serde::de::DeserializeOwned; use serde::{Deserialize, Serialize}; +use uuid::Uuid; -/// Event execution priority levels. +/// File processing job. /// -/// Priority determines the order in which events are processed when multiple -/// events are queued. Higher priority events are processed before lower priority ones. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] -#[derive(Serialize, Deserialize)] +/// Represents a unit of work in a file processing pipeline. +/// Each job targets a specific file and carries a generic payload +/// that defines the processing parameters. +/// +/// The generic parameter `T` is the job-specific data payload. +/// Callers define their own payload types for different pipeline stages. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(rename_all = "lowercase")] -pub enum EventPriority { - /// Low priority - processed when system resources are available. - Low = 0, +#[serde(bound = "T: Serialize + DeserializeOwned")] +pub struct FileJob { + /// Unique job identifier (UUID v7 for time-ordering). + pub id: Uuid, + /// Database file ID to process. + pub file_id: Uuid, + /// Storage path in NATS object store (DocumentKey encoded). + pub object_key: String, + /// File extension for format detection. + pub file_extension: String, + /// Job-specific data payload. + pub data: T, + /// When the job was created. + pub created_at: Timestamp, + /// NATS subject to publish result to (for internal job chaining). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub callback_subject: Option, + /// Idempotency key to prevent duplicate job processing. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub idempotency_key: Option, +} - /// Normal priority - default for most events. - #[default] - Normal = 1, +impl FileJob { + /// Creates a new file job with the given data payload. + pub fn new(file_id: Uuid, object_key: String, file_extension: String, data: T) -> Self { + Self { + id: Uuid::now_v7(), + file_id, + object_key, + file_extension, + data, + created_at: Timestamp::now(), + callback_subject: None, + idempotency_key: None, + } + } - /// High priority - processed ahead of normal events. - High = 2, -} + /// Sets a callback subject for job chaining. + pub fn with_callback(mut self, subject: impl Into) -> Self { + self.callback_subject = Some(subject.into()); + self + } -impl EventPriority { - /// Returns the numeric value of the priority level. - #[inline] - pub const fn as_u8(self) -> u8 { - self as u8 + /// Sets an idempotency key. + pub fn with_idempotency_key(mut self, key: impl Into) -> Self { + self.idempotency_key = Some(key.into()); + self } - /// Returns true if this is a high priority event. - #[inline] - pub const fn is_high(self) -> bool { - matches!(self, Self::High) + /// Returns job age since creation. + pub fn age(&self) -> std::time::Duration { + let now = Timestamp::now(); + let signed_dur = now.duration_since(self.created_at); + std::time::Duration::from_secs(signed_dur.as_secs().max(0) as u64) } } @@ -44,29 +81,38 @@ impl EventPriority { mod tests { use super::*; - #[test] - fn test_priority_ordering() { - assert!(EventPriority::Low < EventPriority::Normal); - assert!(EventPriority::Normal < EventPriority::High); + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] + struct TestPayload { + message: String, } #[test] - fn test_priority_numeric_values() { - assert_eq!(EventPriority::Low.as_u8(), 0); - assert_eq!(EventPriority::Normal.as_u8(), 1); - assert_eq!(EventPriority::High.as_u8(), 2); - } + fn test_serialization_roundtrip() { + let file_id = Uuid::now_v7(); + let job = FileJob::new( + file_id, + "path".to_string(), + "pdf".to_string(), + TestPayload { + message: "hello".to_string(), + }, + ); - #[test] - fn test_priority_default() { - assert_eq!(EventPriority::default(), EventPriority::Normal); + let json = serde_json::to_string(&job).unwrap(); + let parsed: FileJob = serde_json::from_str(&json).unwrap(); + + assert_eq!(job.file_id, parsed.file_id); + assert_eq!(job.data, parsed.data); } #[test] - fn test_priority_serialization() { - let priority = EventPriority::High; - let serialized = serde_json::to_string(&priority).unwrap(); - let deserialized: EventPriority = serde_json::from_str(&serialized).unwrap(); - assert_eq!(priority, deserialized); + fn test_with_unit_payload() { + let file_id = Uuid::now_v7(); + let job: FileJob<()> = FileJob::new(file_id, "path".to_string(), "pdf".to_string(), ()); + + let json = serde_json::to_string(&job).unwrap(); + let parsed: FileJob<()> = serde_json::from_str(&json).unwrap(); + + assert_eq!(job.file_id, parsed.file_id); } } diff --git a/crates/nvisy-nats/src/stream/event_pub.rs b/crates/nvisy-nats/src/stream/event_pub.rs new file mode 100644 index 0000000..ee826b7 --- /dev/null +++ b/crates/nvisy-nats/src/stream/event_pub.rs @@ -0,0 +1,76 @@ +//! Generic event stream publisher. + +use std::marker::PhantomData; + +use async_nats::jetstream::Context; +use derive_more::{Deref, DerefMut}; +use serde::Serialize; + +use super::event_stream::EventStream; +use super::stream_pub::StreamPublisher; +use crate::Result; + +/// Generic event publisher for delivering typed events to workers. +/// +/// This publisher is generic over: +/// - `T`: The event/message type to publish +/// - `S`: The stream configuration (determines stream name, subject, etc.) +#[derive(Debug, Clone, Deref, DerefMut)] +pub struct EventPublisher +where + T: Serialize + Send + Sync + 'static, + S: EventStream, +{ + #[deref] + #[deref_mut] + publisher: StreamPublisher, + _stream: PhantomData, +} + +impl EventPublisher +where + T: Serialize + Send + Sync + 'static, + S: EventStream, +{ + /// Create a new event publisher for the stream type. + pub(crate) async fn new(jetstream: &Context) -> Result { + let publisher = StreamPublisher::new(jetstream, S::NAME).await?; + Ok(Self { + publisher, + _stream: PhantomData, + }) + } + + /// Publish an event to the stream's configured subject. + pub async fn publish(&self, event: &T) -> Result<()> { + self.publisher.publish(S::SUBJECT, event).await + } + + /// Publish an event with a sub-subject appended to the stream subject. + /// + /// Events are published to `{stream_subject}.{sub_subject}`. + pub async fn publish_to(&self, sub_subject: &str, event: &T) -> Result<()> { + let subject = format!("{}.{}", S::SUBJECT, sub_subject); + self.publisher.publish(&subject, event).await + } + + /// Publish multiple events to the stream's configured subject. + pub async fn publish_batch(&self, events: &[T]) -> Result<()> + where + T: Clone, + { + self.publisher.publish_batch(S::SUBJECT, events).await + } + + /// Returns the stream name. + #[inline] + pub fn stream_name(&self) -> &'static str { + S::NAME + } + + /// Returns the subject. + #[inline] + pub fn subject(&self) -> &'static str { + S::SUBJECT + } +} diff --git a/crates/nvisy-nats/src/stream/event_stream.rs b/crates/nvisy-nats/src/stream/event_stream.rs new file mode 100644 index 0000000..7d95301 --- /dev/null +++ b/crates/nvisy-nats/src/stream/event_stream.rs @@ -0,0 +1,74 @@ +//! Event stream configuration for NATS JetStream. + +use std::time::Duration; + +/// Marker trait for event streams. +/// +/// This trait defines the configuration for a NATS JetStream stream. +pub trait EventStream: Clone + Send + Sync + 'static { + /// Stream name used in NATS JetStream. + const NAME: &'static str; + + /// Subject pattern for publishing/subscribing to this stream. + const SUBJECT: &'static str; + + /// Maximum age for messages in this stream. + /// Returns `None` for streams where messages should not expire. + const MAX_AGE: Option; + + /// Default consumer name for this stream. + const CONSUMER_NAME: &'static str; +} + +/// Stream for file processing jobs. +/// +/// Messages expire after 7 days. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct FileStream; + +impl EventStream for FileStream { + const NAME: &'static str = "FILE_JOBS"; + const SUBJECT: &'static str = "file.jobs"; + const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); + const CONSUMER_NAME: &'static str = "file-worker"; +} + +/// Stream for webhook delivery. +/// +/// Messages expire after 1 day. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct WebhookStream; + +impl EventStream for WebhookStream { + const NAME: &'static str = "WEBHOOKS"; + const SUBJECT: &'static str = "webhooks"; + const MAX_AGE: Option = Some(Duration::from_secs(24 * 60 * 60)); + const CONSUMER_NAME: &'static str = "webhook-worker"; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_file_stream() { + assert_eq!(FileStream::NAME, "FILE_JOBS"); + assert_eq!(FileStream::SUBJECT, "file.jobs"); + assert_eq!( + FileStream::MAX_AGE, + Some(Duration::from_secs(7 * 24 * 60 * 60)) + ); + assert_eq!(FileStream::CONSUMER_NAME, "file-worker"); + } + + #[test] + fn test_webhook_stream() { + assert_eq!(WebhookStream::NAME, "WEBHOOKS"); + assert_eq!(WebhookStream::SUBJECT, "webhooks"); + assert_eq!( + WebhookStream::MAX_AGE, + Some(Duration::from_secs(24 * 60 * 60)) + ); + assert_eq!(WebhookStream::CONSUMER_NAME, "webhook-worker"); + } +} diff --git a/crates/nvisy-nats/src/stream/event_sub.rs b/crates/nvisy-nats/src/stream/event_sub.rs new file mode 100644 index 0000000..974fd59 --- /dev/null +++ b/crates/nvisy-nats/src/stream/event_sub.rs @@ -0,0 +1,63 @@ +//! Generic event stream subscriber. + +use std::marker::PhantomData; + +use async_nats::jetstream::Context; +use derive_more::{Deref, DerefMut}; +use serde::de::DeserializeOwned; + +use super::event_stream::EventStream; +use super::stream_sub::StreamSubscriber; +use crate::Result; + +/// Generic event subscriber for consuming typed events. +/// +/// This subscriber is generic over: +/// - `T`: The event/message type to consume +/// - `S`: The stream configuration (determines stream name, subject, consumer name) +#[derive(Debug, Deref, DerefMut)] +pub struct EventSubscriber +where + T: DeserializeOwned + Send + Sync + 'static, + S: EventStream, +{ + #[deref] + #[deref_mut] + subscriber: StreamSubscriber, + _stream: PhantomData, +} + +impl EventSubscriber +where + T: DeserializeOwned + Send + Sync + 'static, + S: EventStream, +{ + /// Create a new event subscriber using the stream's default consumer name. + pub(crate) async fn new(jetstream: &Context) -> Result { + let subscriber = StreamSubscriber::new(jetstream, S::NAME, S::CONSUMER_NAME) + .await? + .with_filter_subject(format!("{}.>", S::NAME)); + Ok(Self { + subscriber, + _stream: PhantomData, + }) + } + + /// Returns the stream name. + #[inline] + pub fn stream_name(&self) -> &'static str { + S::NAME + } + + /// Returns the subject. + #[inline] + pub fn subject(&self) -> &'static str { + S::SUBJECT + } + + /// Returns the consumer name. + #[inline] + pub fn consumer_name(&self) -> &'static str { + S::CONSUMER_NAME + } +} diff --git a/crates/nvisy-nats/src/stream/mod.rs b/crates/nvisy-nats/src/stream/mod.rs index 603c2a7..1a939c2 100644 --- a/crates/nvisy-nats/src/stream/mod.rs +++ b/crates/nvisy-nats/src/stream/mod.rs @@ -2,26 +2,19 @@ //! //! This module provides type-safe streaming capabilities for: //! -//! - Document processing jobs +//! - File processing jobs via [`FileJob`], [`EventPublisher`], [`EventSubscriber`] +//! - Generic event publishing and subscribing with stream configuration via [`EventStream`] -// Base types mod event; -mod publisher; -mod subscriber; +mod event_pub; +mod event_stream; +mod event_sub; +mod stream_pub; +mod stream_sub; -// Document job -mod document_job; -mod document_job_pub; -mod document_job_sub; -mod document_task; - -pub use document_job::{ - CompressionLevel, DocumentJob, PostprocessingData, PreprocessingData, ProcessingData, - ProcessingQuality, STREAM_NAME as DOCUMENT_JOB_STREAM, Stage, -}; -pub use document_job_pub::DocumentJobPublisher; -pub use document_job_sub::DocumentJobSubscriber; -pub use document_task::{GenerateInfoType, InsertValue, MergeOrder, PredefinedTask, SplitStrategy}; -pub use event::EventPriority; -pub use publisher::StreamPublisher; -pub use subscriber::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; +pub use event::FileJob; +pub use event_pub::EventPublisher; +pub use event_stream::{EventStream, FileStream, WebhookStream}; +pub use event_sub::EventSubscriber; +pub use stream_pub::StreamPublisher; +pub use stream_sub::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; diff --git a/crates/nvisy-nats/src/stream/publisher.rs b/crates/nvisy-nats/src/stream/stream_pub.rs similarity index 98% rename from crates/nvisy-nats/src/stream/publisher.rs rename to crates/nvisy-nats/src/stream/stream_pub.rs index 4e143cc..4dde9e0 100644 --- a/crates/nvisy-nats/src/stream/publisher.rs +++ b/crates/nvisy-nats/src/stream/stream_pub.rs @@ -34,7 +34,7 @@ where { /// Create a new type-safe stream publisher #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] - pub async fn new(jetstream: &Context, stream_name: &str) -> Result { + pub(crate) async fn new(jetstream: &Context, stream_name: &str) -> Result { let stream_config = stream::Config { name: stream_name.to_string(), description: Some(format!("Type-safe stream: {}", stream_name)), diff --git a/crates/nvisy-nats/src/stream/subscriber.rs b/crates/nvisy-nats/src/stream/stream_sub.rs similarity index 93% rename from crates/nvisy-nats/src/stream/subscriber.rs rename to crates/nvisy-nats/src/stream/stream_sub.rs index e3dc33c..458448d 100644 --- a/crates/nvisy-nats/src/stream/subscriber.rs +++ b/crates/nvisy-nats/src/stream/stream_sub.rs @@ -37,7 +37,11 @@ where { /// Create a new type-safe stream subscriber. #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] - pub async fn new(jetstream: &Context, stream_name: &str, consumer_name: &str) -> Result { + pub(crate) async fn new( + jetstream: &Context, + stream_name: &str, + consumer_name: &str, + ) -> Result { // Verify stream exists jetstream .get_stream(stream_name) @@ -255,38 +259,6 @@ where .map_err(|e| Error::operation("consumer_info", e.to_string())) .map(|info| (*info).clone()) } - - /// Create a new subscriber with exponential backoff retry logic. - #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] - pub async fn new_with_retry( - jetstream: &Context, - stream_name: &str, - consumer_name: &str, - max_retries: u32, - ) -> Result { - let mut attempts = 0; - let mut delay = std::time::Duration::from_millis(100); - - loop { - match Self::new(jetstream, stream_name, consumer_name).await { - Ok(subscriber) => return Ok(subscriber), - Err(e) if attempts < max_retries => { - attempts += 1; - debug!( - target: TRACING_TARGET_STREAM, - attempt = attempts, - max_retries = max_retries, - delay_ms = delay.as_millis(), - error = %e, - "Retrying subscriber creation" - ); - tokio::time::sleep(delay).await; - delay = std::cmp::min(delay * 2, std::time::Duration::from_secs(30)); - } - Err(e) => return Err(e), - } - } - } } /// Type-safe message stream wrapper. diff --git a/crates/nvisy-postgres/src/model/workspace_webhook.rs b/crates/nvisy-postgres/src/model/workspace_webhook.rs index 36ea3f1..7fce105 100644 --- a/crates/nvisy-postgres/src/model/workspace_webhook.rs +++ b/crates/nvisy-postgres/src/model/workspace_webhook.rs @@ -42,6 +42,8 @@ pub struct WorkspaceWebhook { pub events: Vec>, /// Custom headers to include in webhook requests. pub headers: serde_json::Value, + /// HMAC-SHA256 signing secret for webhook verification. + pub secret: String, /// Current status of the webhook. pub status: WebhookStatus, /// Timestamp of last webhook trigger. diff --git a/crates/nvisy-postgres/src/query/workspace_webhook.rs b/crates/nvisy-postgres/src/query/workspace_webhook.rs index 988c6a0..bc15ad3 100644 --- a/crates/nvisy-postgres/src/query/workspace_webhook.rs +++ b/crates/nvisy-postgres/src/query/workspace_webhook.rs @@ -7,7 +7,9 @@ use diesel_async::RunQueryDsl; use uuid::Uuid; use crate::model::{NewWorkspaceWebhook, UpdateWorkspaceWebhook, WorkspaceWebhook}; -use crate::types::{Cursor, CursorPage, CursorPagination, OffsetPagination, WebhookStatus}; +use crate::types::{ + Cursor, CursorPage, CursorPagination, OffsetPagination, WebhookEvent, WebhookStatus, +}; use crate::{PgConnection, PgError, PgResult, schema}; /// Repository for workspace webhook database operations. @@ -82,6 +84,19 @@ pub trait WorkspaceWebhookRepository { &mut self, webhook_id: Uuid, ) -> impl Future> + Send; + + /// Finds all active webhooks for a workspace that are subscribed to a specific event. + /// + /// Returns webhooks where: + /// - The webhook belongs to the specified workspace + /// - The webhook status is Active + /// - The webhook's events array contains the specified event + /// - The webhook is not deleted + fn find_webhooks_for_event( + &mut self, + workspace_id: Uuid, + event: WebhookEvent, + ) -> impl Future>> + Send; } impl WorkspaceWebhookRepository for PgConnection { @@ -313,4 +328,34 @@ impl WorkspaceWebhookRepository for PgConnection { Ok(webhook) } + + async fn find_webhooks_for_event( + &mut self, + ws_id: Uuid, + event: WebhookEvent, + ) -> PgResult> { + use diesel::dsl::sql; + use diesel::sql_types::Bool; + use schema::workspace_webhooks::dsl::*; + + // Query webhooks where the events array contains the target event. + // Uses PostgreSQL's `@>` (array contains) operator via raw SQL. + // The events column is Array>, so we check if + // the array contains the event value. + let event_str = format!("'{}'", event.to_string().replace('\'', "''")); + let contains_event = + sql::(&format!("events @> ARRAY[{}]::WEBHOOK_EVENT[]", event_str)); + + let webhooks = workspace_webhooks + .filter(workspace_id.eq(ws_id)) + .filter(status.eq(WebhookStatus::Active)) + .filter(deleted_at.is_null()) + .filter(contains_event) + .select(WorkspaceWebhook::as_select()) + .load(self) + .await + .map_err(PgError::from)?; + + Ok(webhooks) + } } diff --git a/crates/nvisy-postgres/src/schema.rs b/crates/nvisy-postgres/src/schema.rs index 75809e8..830366a 100644 --- a/crates/nvisy-postgres/src/schema.rs +++ b/crates/nvisy-postgres/src/schema.rs @@ -382,6 +382,7 @@ diesel::table! { url -> Text, events -> Array>, headers -> Jsonb, + secret -> Text, status -> WebhookStatus, last_triggered_at -> Nullable, created_by -> Uuid, diff --git a/crates/nvisy-postgres/src/types/enums/webhook_event.rs b/crates/nvisy-postgres/src/types/enums/webhook_event.rs index cb23b9a..ec684a4 100644 --- a/crates/nvisy-postgres/src/types/enums/webhook_event.rs +++ b/crates/nvisy-postgres/src/types/enums/webhook_event.rs @@ -152,4 +152,26 @@ impl WebhookEvent { | WebhookEvent::IntegrationDesynced => "integration", } } + + /// Returns the event as a subject string for NATS routing. + /// + /// Format: `{category}.{action}` (e.g., "file.created", "member.deleted") + pub fn as_subject(&self) -> &'static str { + match self { + WebhookEvent::DocumentCreated => "document.created", + WebhookEvent::DocumentUpdated => "document.updated", + WebhookEvent::DocumentDeleted => "document.deleted", + WebhookEvent::FileCreated => "file.created", + WebhookEvent::FileUpdated => "file.updated", + WebhookEvent::FileDeleted => "file.deleted", + WebhookEvent::MemberAdded => "member.added", + WebhookEvent::MemberDeleted => "member.deleted", + WebhookEvent::MemberUpdated => "member.updated", + WebhookEvent::IntegrationCreated => "integration.created", + WebhookEvent::IntegrationUpdated => "integration.updated", + WebhookEvent::IntegrationDeleted => "integration.deleted", + WebhookEvent::IntegrationSynced => "integration.synced", + WebhookEvent::IntegrationDesynced => "integration.desynced", + } + } } diff --git a/crates/nvisy-server/src/error.rs b/crates/nvisy-server/src/error.rs index 7d006de..a323924 100644 --- a/crates/nvisy-server/src/error.rs +++ b/crates/nvisy-server/src/error.rs @@ -174,6 +174,12 @@ impl From for Error { } } +impl From for Error { + fn from(err: nvisy_postgres::PgError) -> Self { + Error::external("postgres", err.to_string()).with_source(err) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/nvisy-server/src/handler/files.rs b/crates/nvisy-server/src/handler/files.rs index 916baf2..7593047 100644 --- a/crates/nvisy-server/src/handler/files.rs +++ b/crates/nvisy-server/src/handler/files.rs @@ -14,8 +14,8 @@ use axum::extract::{DefaultBodyLimit, State}; use axum::http::{HeaderMap, StatusCode}; use futures::StreamExt; use nvisy_nats::NatsClient; -use nvisy_nats::object::{DocumentKey, DocumentStore, Files as FilesBucket}; -use nvisy_nats::stream::{DocumentJobPublisher, PreprocessingData}; +use nvisy_nats::object::{FileKey, FilesBucket, ObjectStore}; +use nvisy_nats::stream::{EventPublisher, FileJob, FileStream}; use nvisy_postgres::PgClient; use nvisy_postgres::model::{File as FileModel, NewFile}; use nvisy_postgres::query::FileRepository; @@ -30,11 +30,14 @@ use crate::handler::request::{ use crate::handler::response::{self, ErrorResponse, File, Files, FilesPage}; use crate::handler::{ErrorKind, Result}; use crate::middleware::DEFAULT_MAX_FILE_BODY_SIZE; -use crate::service::ServiceState; +use crate::service::{ServiceState, WebhookEmitter}; /// Tracing target for workspace file operations. const TRACING_TARGET: &str = "nvisy_server::handler::workspace_files"; +/// Type alias for file job publisher. +type FileJobPublisher = EventPublisher, FileStream>; + /// Finds a file by ID or returns NotFound error. async fn find_file(conn: &mut nvisy_postgres::PgConn, file_id: Uuid) -> Result { conn.find_file_by_id(file_id).await?.ok_or_else(|| { @@ -102,8 +105,8 @@ fn list_files_docs(op: TransformOperation) -> TransformOperation { struct FileUploadContext { workspace_id: Uuid, account_id: Uuid, - document_store: DocumentStore, - publisher: DocumentJobPublisher, + file_store: ObjectStore, + publisher: FileJobPublisher, } /// Processes a single file from a multipart upload using streaming. @@ -123,12 +126,12 @@ async fn process_single_file( .unwrap_or("bin") .to_lowercase(); - // Generate document key with unique object ID for NATS storage - let document_key = DocumentKey::generate(ctx.workspace_id); + // Generate file key with unique object ID for NATS storage + let file_key = FileKey::generate(ctx.workspace_id); tracing::debug!( target: TRACING_TARGET, - object_id = %document_key.object_id(), + object_id = %file_key.object_id, "Streaming file to storage" ); @@ -137,11 +140,11 @@ async fn process_single_file( field.map(|result| result.map_err(std::io::Error::other)), ); - let put_result = ctx.document_store.put(&document_key, reader).await?; + let put_result = ctx.file_store.put(&file_key, reader).await?; tracing::debug!( target: TRACING_TARGET, - object_id = %document_key.object_id(), + object_id = %file_key.object_id, size = put_result.size(), sha256 = %put_result.sha256_hex(), "File streamed to storage" @@ -156,27 +159,22 @@ async fn process_single_file( file_extension: Some(file_extension.clone()), file_size_bytes: put_result.size() as i64, file_hash_sha256: put_result.sha256().to_vec(), - storage_path: document_key.to_string(), - storage_bucket: ctx.document_store.bucket().to_owned(), + storage_path: file_key.to_string(), + storage_bucket: ctx.file_store.bucket().to_owned(), ..Default::default() }; let created_file = conn.create_file(file_record).await?; // Step 3: Publish job to queue (use Postgres-generated file ID) - let job = nvisy_nats::stream::DocumentJob::new( - created_file.id, - document_key.to_string(), - file_extension, - PreprocessingData::default(), - ); + let job = FileJob::new(created_file.id, file_key.to_string(), file_extension, ()); - ctx.publisher.publish_job(&job).await.map_err(|err| { + ctx.publisher.publish(&job).await.map_err(|err| { tracing::error!( target: TRACING_TARGET, error = %err, file_id = %created_file.id, - "Failed to publish document job" + "Failed to publish file job" ); ErrorKind::InternalServerError.with_message("Failed to queue file for processing") })?; @@ -185,7 +183,7 @@ async fn process_single_file( target: TRACING_TARGET, file_id = %created_file.id, job_id = %job.id, - "Document job published" + "File job published" ); Ok(created_file) @@ -202,6 +200,7 @@ async fn process_single_file( async fn upload_file( State(pg_client): State, State(nats_client): State, + State(webhook_emitter): State, Path(path_params): Path, AuthState(auth_claims): AuthState, Multipart(mut multipart): Multipart, @@ -214,16 +213,14 @@ async fn upload_file( .authorize_workspace(&mut conn, path_params.workspace_id, Permission::UploadFiles) .await?; - let document_store = nats_client.document_store::().await?; + let file_store = nats_client.object_store::().await?; - let publisher = nats_client - .document_job_publisher::() - .await?; + let publisher: FileJobPublisher = nats_client.event_publisher().await?; let ctx = FileUploadContext { workspace_id: path_params.workspace_id, account_id: auth_claims.account_id, - document_store, + file_store, publisher, }; @@ -243,6 +240,30 @@ async fn upload_file( return Err(ErrorKind::BadRequest.with_message("No files provided in multipart request")); } + // Emit webhook events for created files (fire-and-forget) + for file in &uploaded_files { + let data = serde_json::json!({ + "displayName": file.display_name, + "fileSizeBytes": file.file_size, + }); + if let Err(err) = webhook_emitter + .emit_file_created( + path_params.workspace_id, + file.id, + Some(auth_claims.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + file_id = %file.id, + "Failed to emit file:created webhook event" + ); + } + } + tracing::info!( target: TRACING_TARGET, file_count = uploaded_files.len(), @@ -308,6 +329,7 @@ fn read_file_docs(op: TransformOperation) -> TransformOperation { )] async fn update_file( State(pg_client): State, + State(webhook_emitter): State, Path(path_params): Path, AuthState(auth_claims): AuthState, ValidateJson(request): ValidateJson, @@ -333,6 +355,27 @@ async fn update_file( ErrorKind::InternalServerError.with_message("Failed to update file") })?; + // Emit webhook event (fire-and-forget) + let data = serde_json::json!({ + "displayName": updated_file.display_name, + }); + if let Err(err) = webhook_emitter + .emit_file_updated( + file.workspace_id, + path_params.file_id, + Some(auth_claims.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + file_id = %path_params.file_id, + "Failed to emit file:updated webhook event" + ); + } + tracing::info!(target: TRACING_TARGET, "File updated"); Ok(( @@ -376,19 +419,19 @@ async fn download_file( .authorize_workspace(&mut conn, file.workspace_id, Permission::DownloadFiles) .await?; - let document_store = nats_client - .document_store::() + let file_store = nats_client + .object_store::() .await .map_err(|err| { tracing::error!( target: TRACING_TARGET, error = %err, - "Failed to create document store" + "Failed to create file store" ); ErrorKind::InternalServerError.with_message("Failed to initialize file storage") })?; - let document_key = DocumentKey::from_str(&file.storage_path).map_err(|err| { + let file_key = FileKey::from_str(&file.storage_path).map_err(|err| { tracing::error!( target: TRACING_TARGET, error = %err, @@ -400,9 +443,9 @@ async fn download_file( .with_context(format!("Parse error: {}", err)) })?; - // Get streaming content from NATS document store - let get_result = document_store - .get(&document_key) + // Get streaming content from NATS file store + let get_result = file_store + .get(&file_key) .await .map_err(|err| { tracing::error!( @@ -472,6 +515,7 @@ fn download_file_docs(op: TransformOperation) -> TransformOperation { )] async fn delete_file( State(pg_client): State, + State(webhook_emitter): State, Path(path_params): Path, AuthState(auth_claims): AuthState, ) -> Result { @@ -493,6 +537,27 @@ async fn delete_file( .with_context(format!("Database error: {}", err)) })?; + // Emit webhook event (fire-and-forget) + let data = serde_json::json!({ + "displayName": file.display_name, + }); + if let Err(err) = webhook_emitter + .emit_file_deleted( + file.workspace_id, + path_params.file_id, + Some(auth_claims.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + file_id = %path_params.file_id, + "Failed to emit file:deleted webhook event" + ); + } + tracing::info!(target: TRACING_TARGET, "File deleted"); Ok(StatusCode::NO_CONTENT) } diff --git a/crates/nvisy-server/src/handler/members.rs b/crates/nvisy-server/src/handler/members.rs index ef3426b..77b117f 100644 --- a/crates/nvisy-server/src/handler/members.rs +++ b/crates/nvisy-server/src/handler/members.rs @@ -19,7 +19,7 @@ use crate::handler::request::{ }; use crate::handler::response::{ErrorResponse, Member, MembersPage, Page}; use crate::handler::{ErrorKind, Result}; -use crate::service::ServiceState; +use crate::service::{ServiceState, WebhookEmitter}; /// Tracing target for workspace member operations. const TRACING_TARGET: &str = "nvisy_server::handler::members"; @@ -150,6 +150,7 @@ fn get_member_docs(op: TransformOperation) -> TransformOperation { )] async fn delete_member( State(pg_client): State, + State(webhook_emitter): State, AuthState(auth_state): AuthState, Path(path_params): Path, ) -> Result { @@ -188,6 +189,27 @@ async fn delete_member( conn.remove_workspace_member(path_params.workspace_id, path_params.account_id) .await?; + // Emit webhook event (fire-and-forget) + let data = serde_json::json!({ + "removedAccountId": path_params.account_id, + "removedBy": auth_state.account_id, + }); + if let Err(err) = webhook_emitter + .emit_member_deleted( + path_params.workspace_id, + path_params.account_id, // Use account_id as resource_id + Some(auth_state.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + "Failed to emit member:deleted webhook event" + ); + } + tracing::warn!(target: TRACING_TARGET, "Workspace member removed"); Ok(StatusCode::OK) @@ -221,6 +243,7 @@ fn delete_member_docs(op: TransformOperation) -> TransformOperation { )] async fn update_member( State(pg_client): State, + State(webhook_emitter): State, AuthState(auth_state): AuthState, Path(path_params): Path, ValidateJson(request): ValidateJson, @@ -254,6 +277,7 @@ async fn update_member( .with_context("Owners can only leave the workspace themselves")); } + let new_role = request.role; conn.update_workspace_member( path_params.workspace_id, path_params.account_id, @@ -268,6 +292,28 @@ async fn update_member( return Err(ErrorKind::NotFound.with_resource("workspace_member")); }; + // Emit webhook event (fire-and-forget) + let data = serde_json::json!({ + "accountId": path_params.account_id, + "previousRole": current_member.member_role.to_string(), + "newRole": new_role.to_string(), + }); + if let Err(err) = webhook_emitter + .emit_member_updated( + path_params.workspace_id, + path_params.account_id, // Use account_id as resource_id + Some(auth_state.account_id), + Some(data), + ) + .await + { + tracing::warn!( + target: TRACING_TARGET, + error = %err, + "Failed to emit member:updated webhook event" + ); + } + tracing::info!( target: TRACING_TARGET, new_role = ?updated_member.member_role, diff --git a/crates/nvisy-server/src/handler/response/webhooks.rs b/crates/nvisy-server/src/handler/response/webhooks.rs index c27f724..be79265 100644 --- a/crates/nvisy-server/src/handler/response/webhooks.rs +++ b/crates/nvisy-server/src/handler/response/webhooks.rs @@ -71,6 +71,35 @@ impl Webhook { } } +/// Webhook creation response that includes the secret (visible only once). +/// +/// The secret is used for HMAC-SHA256 signature verification of webhook payloads. +/// It is only returned when the webhook is first created and cannot be retrieved +/// again. Store it securely. +#[must_use] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct WebhookCreated { + /// The created webhook details. + #[serde(flatten)] + pub webhook: Webhook, + /// HMAC-SHA256 signing secret for webhook verification. + /// + /// **Important**: This is the only time the secret will be shown. + /// Store it securely as it cannot be retrieved again. + pub secret: String, +} + +impl WebhookCreated { + pub fn from_model(webhook: model::WorkspaceWebhook) -> Self { + let secret = webhook.secret.clone(); + Self { + webhook: Webhook::from_model(webhook), + secret, + } + } +} + /// Paginated response for workspace webhooks. pub type WebhooksPage = Page; diff --git a/crates/nvisy-server/src/handler/webhooks.rs b/crates/nvisy-server/src/handler/webhooks.rs index 37da975..14576f8 100644 --- a/crates/nvisy-server/src/handler/webhooks.rs +++ b/crates/nvisy-server/src/handler/webhooks.rs @@ -19,7 +19,9 @@ use crate::handler::request::{ CreateWebhook, CursorPagination, TestWebhook, UpdateWebhook as UpdateWebhookRequest, WebhookPathParams, WorkspacePathParams, }; -use crate::handler::response::{ErrorResponse, Webhook, WebhookResult, WebhooksPage}; +use crate::handler::response::{ + ErrorResponse, Webhook, WebhookCreated, WebhookResult, WebhooksPage, +}; use crate::handler::{ErrorKind, Result}; use crate::service::ServiceState; @@ -41,7 +43,7 @@ async fn create_webhook( AuthState(auth_state): AuthState, Path(path_params): Path, ValidateJson(request): ValidateJson, -) -> Result<(StatusCode, Json)> { +) -> Result<(StatusCode, Json)> { tracing::debug!(target: TRACING_TARGET, "Creating workspace webhook"); let mut conn = pg_client.get_connection().await?; @@ -63,13 +65,21 @@ async fn create_webhook( "Webhook created", ); - Ok((StatusCode::CREATED, Json(Webhook::from_model(webhook)))) + // Return WebhookCreated which includes the secret (visible only once) + Ok(( + StatusCode::CREATED, + Json(WebhookCreated::from_model(webhook)), + )) } fn create_webhook_docs(op: TransformOperation) -> TransformOperation { op.summary("Create webhook") - .description("Creates a new webhook for the workspace.") - .response::<201, Json>() + .description( + "Creates a new webhook for the workspace. The response includes the signing secret \ + which is used for HMAC-SHA256 verification of webhook payloads. **Important**: The \ + secret is only shown once upon creation and cannot be retrieved again.", + ) + .response::<201, Json>() .response::<400, Json>() .response::<401, Json>() .response::<403, Json>() diff --git a/crates/nvisy-server/src/lib.rs b/crates/nvisy-server/src/lib.rs index 9987e42..40b5661 100644 --- a/crates/nvisy-server/src/lib.rs +++ b/crates/nvisy-server/src/lib.rs @@ -8,5 +8,6 @@ pub mod extract; pub mod handler; pub mod middleware; pub mod service; +pub mod worker; pub use crate::error::{BoxedError, Error, ErrorKind, Result}; diff --git a/crates/nvisy-server/src/service/mod.rs b/crates/nvisy-server/src/service/mod.rs index c3323de..a84be8e 100644 --- a/crates/nvisy-server/src/service/mod.rs +++ b/crates/nvisy-server/src/service/mod.rs @@ -4,6 +4,7 @@ mod cache; mod config; mod integration; mod security; +mod webhook; use nvisy_nats::NatsClient; use nvisy_postgres::PgClient; @@ -16,6 +17,7 @@ pub use crate::service::integration::IntegrationProvider; pub use crate::service::security::{ PasswordHasher, PasswordStrength, SessionKeys, SessionKeysConfig, UserAgentParser, }; +pub use crate::service::webhook::WebhookEmitter; /// Application state. /// @@ -37,6 +39,7 @@ pub struct ServiceState { pub password_strength: PasswordStrength, pub session_keys: SessionKeys, pub user_agent_parser: UserAgentParser, + pub webhook_emitter: WebhookEmitter, } impl ServiceState { @@ -50,6 +53,8 @@ impl ServiceState { let postgres = service_config.connect_postgres().await?; let nats = service_config.connect_nats().await?; + let webhook_emitter = WebhookEmitter::new(postgres.clone(), nats.clone()); + let service_state = Self { postgres, nats, @@ -61,6 +66,7 @@ impl ServiceState { password_strength: PasswordStrength::new(), session_keys: service_config.load_session_keys().await?, user_agent_parser: UserAgentParser::new(), + webhook_emitter, }; Ok(service_state) @@ -89,3 +95,4 @@ impl_di!(password_hasher: PasswordHasher); impl_di!(password_strength: PasswordStrength); impl_di!(session_keys: SessionKeys); impl_di!(user_agent_parser: UserAgentParser); +impl_di!(webhook_emitter: WebhookEmitter); diff --git a/crates/nvisy-server/src/service/webhook/emitter.rs b/crates/nvisy-server/src/service/webhook/emitter.rs new file mode 100644 index 0000000..941f8ec --- /dev/null +++ b/crates/nvisy-server/src/service/webhook/emitter.rs @@ -0,0 +1,440 @@ +//! Webhook event emitter for publishing domain events to NATS. + +use std::collections::HashMap; +use std::time::Duration; + +use nvisy_nats::NatsClient; +use nvisy_nats::stream::{EventPublisher, WebhookStream}; +use nvisy_postgres::PgClient; +use nvisy_postgres::query::WorkspaceWebhookRepository; +use nvisy_postgres::types::WebhookEvent; +use nvisy_webhook::{WebhookContext, WebhookRequest}; +use url::Url; +use uuid::Uuid; + +use crate::Result; + +/// Type alias for webhook publisher. +type WebhookPublisher = EventPublisher; + +/// Tracing target for webhook event emission. +const TRACING_TARGET: &str = "nvisy_server::service::webhook"; + +/// Default timeout for webhook delivery. +const DEFAULT_DELIVERY_TIMEOUT: Duration = Duration::from_secs(30); + +/// Webhook event emitter for publishing domain events. +/// +/// This service queries webhooks subscribed to specific events and publishes +/// requests to NATS for asynchronous delivery. +#[derive(Clone)] +pub struct WebhookEmitter { + pg_client: PgClient, + nats_client: NatsClient, +} + +impl WebhookEmitter { + /// Create a new webhook emitter. + pub fn new(pg_client: PgClient, nats_client: NatsClient) -> Self { + Self { + pg_client, + nats_client, + } + } + + /// Emit a webhook event for a workspace. + /// + /// This method: + /// 1. Queries all active webhooks subscribed to the event type + /// 2. Creates a `WebhookRequest` for each webhook + /// 3. Publishes the requests to NATS for asynchronous delivery + /// + /// # Arguments + /// + /// * `workspace_id` - The workspace where the event occurred + /// * `event` - The type of event that occurred + /// * `resource_id` - The ID of the affected resource + /// * `triggered_by` - The account ID that triggered the event (if any) + /// * `data` - Additional event-specific data + #[tracing::instrument( + skip(self, data), + fields( + workspace_id = %workspace_id, + event = %event, + resource_id = %resource_id, + ) + )] + pub async fn emit( + &self, + workspace_id: Uuid, + event: WebhookEvent, + resource_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + // Find all active webhooks subscribed to this event + let mut conn = self.pg_client.get_connection().await?; + let webhooks = conn.find_webhooks_for_event(workspace_id, event).await?; + + if webhooks.is_empty() { + tracing::debug!( + target: TRACING_TARGET, + "No webhooks subscribed to event" + ); + return Ok(0); + } + + tracing::debug!( + target: TRACING_TARGET, + webhook_count = webhooks.len(), + "Found webhooks subscribed to event" + ); + + // Create webhook requests + let event_subject = event.as_subject(); + let event_str = event.to_string(); + let resource_type = event.category().to_string(); + + let requests: Vec = webhooks + .into_iter() + .filter_map(|webhook| { + // Parse URL - skip invalid URLs + let url: Url = match webhook.url.parse() { + Ok(u) => u, + Err(err) => { + tracing::warn!( + target: TRACING_TARGET, + webhook_id = %webhook.id, + url = %webhook.url, + error = %err, + "Skipping webhook with invalid URL" + ); + return None; + } + }; + + // Build context + let mut context = WebhookContext::new(webhook.id, workspace_id, resource_id) + .with_resource_type(&resource_type); + + if let Some(account_id) = triggered_by { + context = context.with_account(account_id); + } + + if let Some(ref metadata) = data { + context = context.with_metadata(metadata.clone()); + } + + // Build request + let mut request = + WebhookRequest::new(url, &event_str, format!("Event: {}", event_str), context) + .with_timeout(DEFAULT_DELIVERY_TIMEOUT) + .with_secret(webhook.secret); + + // Add custom headers from webhook config + if !webhook.headers.is_null() + && let Some(obj) = webhook.headers.as_object() + { + let header_map: HashMap = obj + .iter() + .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string()))) + .collect(); + if !header_map.is_empty() { + request = request.with_headers(header_map); + } + } + + Some(request) + }) + .collect(); + + let request_count = requests.len(); + + if request_count == 0 { + return Ok(0); + } + + // Publish requests to NATS + let publisher: WebhookPublisher = self.nats_client.event_publisher().await?; + + for request in &requests { + // Use workspace_id.event_subject as the routing subject + let subject = format!("{}.{}", request.context.workspace_id, event_subject); + publisher.publish_to(&subject, request).await?; + } + + tracing::info!( + target: TRACING_TARGET, + request_count, + "Published webhook requests" + ); + + Ok(request_count) + } + + /// Emit a document created event. + #[inline] + pub async fn emit_document_created( + &self, + workspace_id: Uuid, + document_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::DocumentCreated, + document_id, + triggered_by, + data, + ) + .await + } + + /// Emit a document updated event. + #[inline] + pub async fn emit_document_updated( + &self, + workspace_id: Uuid, + document_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::DocumentUpdated, + document_id, + triggered_by, + data, + ) + .await + } + + /// Emit a document deleted event. + #[inline] + pub async fn emit_document_deleted( + &self, + workspace_id: Uuid, + document_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::DocumentDeleted, + document_id, + triggered_by, + data, + ) + .await + } + + /// Emit a file created event. + #[inline] + pub async fn emit_file_created( + &self, + workspace_id: Uuid, + file_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::FileCreated, + file_id, + triggered_by, + data, + ) + .await + } + + /// Emit a file updated event. + #[inline] + pub async fn emit_file_updated( + &self, + workspace_id: Uuid, + file_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::FileUpdated, + file_id, + triggered_by, + data, + ) + .await + } + + /// Emit a file deleted event. + #[inline] + pub async fn emit_file_deleted( + &self, + workspace_id: Uuid, + file_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::FileDeleted, + file_id, + triggered_by, + data, + ) + .await + } + + /// Emit a member added event. + #[inline] + pub async fn emit_member_added( + &self, + workspace_id: Uuid, + member_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::MemberAdded, + member_id, + triggered_by, + data, + ) + .await + } + + /// Emit a member updated event. + #[inline] + pub async fn emit_member_updated( + &self, + workspace_id: Uuid, + member_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::MemberUpdated, + member_id, + triggered_by, + data, + ) + .await + } + + /// Emit a member deleted event. + #[inline] + pub async fn emit_member_deleted( + &self, + workspace_id: Uuid, + member_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::MemberDeleted, + member_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration created event. + #[inline] + pub async fn emit_integration_created( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationCreated, + integration_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration updated event. + #[inline] + pub async fn emit_integration_updated( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationUpdated, + integration_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration deleted event. + #[inline] + pub async fn emit_integration_deleted( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationDeleted, + integration_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration synced event. + #[inline] + pub async fn emit_integration_synced( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationSynced, + integration_id, + triggered_by, + data, + ) + .await + } + + /// Emit an integration desynced event. + #[inline] + pub async fn emit_integration_desynced( + &self, + workspace_id: Uuid, + integration_id: Uuid, + triggered_by: Option, + data: Option, + ) -> Result { + self.emit( + workspace_id, + WebhookEvent::IntegrationDesynced, + integration_id, + triggered_by, + data, + ) + .await + } +} diff --git a/crates/nvisy-server/src/service/webhook/mod.rs b/crates/nvisy-server/src/service/webhook/mod.rs new file mode 100644 index 0000000..7c3ddf6 --- /dev/null +++ b/crates/nvisy-server/src/service/webhook/mod.rs @@ -0,0 +1,7 @@ +//! Webhook event emission service. +//! +//! Provides helpers for emitting domain events to webhooks via NATS JetStream. + +mod emitter; + +pub use emitter::WebhookEmitter; diff --git a/crates/nvisy-server/src/worker/mod.rs b/crates/nvisy-server/src/worker/mod.rs new file mode 100644 index 0000000..4e558cf --- /dev/null +++ b/crates/nvisy-server/src/worker/mod.rs @@ -0,0 +1,5 @@ +//! Background workers for async processing. + +mod webhook; + +pub use webhook::WebhookWorker; diff --git a/crates/nvisy-server/src/worker/webhook.rs b/crates/nvisy-server/src/worker/webhook.rs new file mode 100644 index 0000000..d3758a4 --- /dev/null +++ b/crates/nvisy-server/src/worker/webhook.rs @@ -0,0 +1,178 @@ +//! Webhook delivery worker. +//! +//! Consumes webhook requests from NATS and delivers them to external endpoints. + +use std::time::Duration; + +use nvisy_nats::NatsClient; +use nvisy_nats::stream::{EventSubscriber, WebhookStream}; +use nvisy_webhook::{WebhookRequest, WebhookService}; +use tokio_util::sync::CancellationToken; + +use crate::Result; + +/// Type alias for webhook subscriber. +type WebhookSubscriber = EventSubscriber; + +/// Tracing target for webhook worker operations. +const TRACING_TARGET: &str = "nvisy_server::worker::webhook"; + +/// Webhook delivery worker. +/// +/// This worker subscribes to the `WEBHOOKS` NATS stream and delivers +/// webhook payloads to external endpoints with HMAC-SHA256 signatures. +pub struct WebhookWorker { + nats_client: NatsClient, + webhook_service: WebhookService, +} + +impl WebhookWorker { + /// Create a new webhook worker. + pub fn new(nats_client: NatsClient, webhook_service: WebhookService) -> Self { + Self { + nats_client, + webhook_service, + } + } + + /// Run the webhook worker until cancelled. + /// + /// This method will continuously consume webhook requests from NATS and + /// deliver them to the configured endpoints. Logs lifecycle events + /// (start, stop, errors) internally. + pub async fn run(&self, cancel: CancellationToken) -> Result<()> { + tracing::info!( + target: TRACING_TARGET, + "Starting webhook worker" + ); + + let result = self.run_inner(cancel).await; + + match &result { + Ok(()) => { + tracing::info!( + target: TRACING_TARGET, + "Webhook worker stopped" + ); + } + Err(err) => { + tracing::error!( + target: TRACING_TARGET, + error = %err, + "Webhook worker failed" + ); + } + } + + result + } + + /// Internal run loop. + async fn run_inner(&self, cancel: CancellationToken) -> Result<()> { + let subscriber: WebhookSubscriber = self.nats_client.webhook_subscriber().await?; + + let mut stream = subscriber.subscribe().await?; + + loop { + tokio::select! { + _ = cancel.cancelled() => { + tracing::info!( + target: TRACING_TARGET, + "Webhook worker shutdown requested" + ); + break; + } + result = stream.next_with_timeout(Duration::from_secs(5)) => { + match result { + Ok(Some(mut message)) => { + let request = message.payload(); + + if let Err(err) = self.deliver(request).await { + tracing::error!( + target: TRACING_TARGET, + error = %err, + request_id = %request.request_id, + webhook_id = %request.context.webhook_id, + "Failed to deliver webhook" + ); + // Nack the message for redelivery + if let Err(nack_err) = message.nack().await { + tracing::error!( + target: TRACING_TARGET, + error = %nack_err, + "Failed to nack message" + ); + } + } else { + // Ack successful delivery + if let Err(ack_err) = message.ack().await { + tracing::error!( + target: TRACING_TARGET, + error = %ack_err, + "Failed to ack message" + ); + } + } + } + Ok(None) => { + // Timeout, continue loop + } + Err(err) => { + tracing::error!( + target: TRACING_TARGET, + error = %err, + "Error receiving message from stream" + ); + // Brief pause before retrying + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + } + } + } + + Ok(()) + } + + /// Deliver a webhook request. + /// + /// The `WebhookService` handles HMAC-SHA256 signing automatically + /// when `request.secret` is present. + async fn deliver(&self, request: &WebhookRequest) -> Result<()> { + tracing::debug!( + target: TRACING_TARGET, + request_id = %request.request_id, + webhook_id = %request.context.webhook_id, + event = %request.event, + "Delivering webhook" + ); + + let response = self.webhook_service.deliver(request).await.map_err(|err| { + crate::error::Error::external("webhook", format!("Delivery failed: {}", err)) + })?; + + if response.is_success() { + tracing::info!( + target: TRACING_TARGET, + request_id = %request.request_id, + webhook_id = %request.context.webhook_id, + status_code = response.status_code, + "Webhook delivered successfully" + ); + Ok(()) + } else { + tracing::warn!( + target: TRACING_TARGET, + request_id = %request.request_id, + webhook_id = %request.context.webhook_id, + status_code = response.status_code, + "Webhook delivery returned non-success status" + ); + // Return error to trigger nack/retry + Err(crate::error::Error::external( + "webhook", + format!("Delivery returned status {}", response.status_code), + )) + } + } +} diff --git a/crates/nvisy-webhook/src/request.rs b/crates/nvisy-webhook/src/request.rs index cef1185..417aeab 100644 --- a/crates/nvisy-webhook/src/request.rs +++ b/crates/nvisy-webhook/src/request.rs @@ -9,11 +9,13 @@ use url::Url; use uuid::Uuid; /// A webhook delivery request. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] +#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))] pub struct WebhookRequest { /// Unique identifier for this request. pub request_id: Uuid, /// The webhook endpoint URL. + #[cfg_attr(feature = "schema", schemars(with = "String"))] pub url: Url, /// The event type that triggered this webhook delivery. pub event: String, @@ -24,7 +26,12 @@ pub struct WebhookRequest { /// Custom headers to include in the request. pub headers: HashMap, /// Optional request timeout (uses client default if not set). + #[serde(default, skip_serializing_if = "Option::is_none")] + #[cfg_attr(feature = "schema", schemars(with = "Option"))] pub timeout: Option, + /// HMAC-SHA256 signing secret for request authentication. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub secret: Option, } impl WebhookRequest { @@ -43,6 +50,7 @@ impl WebhookRequest { context, headers: HashMap::new(), timeout: None, + secret: None, } } @@ -74,6 +82,12 @@ impl WebhookRequest { self } + /// Sets the signing secret for HMAC-SHA256 authentication. + pub fn with_secret(mut self, secret: impl Into) -> Self { + self.secret = Some(secret.into()); + self + } + /// Converts this request into a payload for serialization. pub fn into_payload(self) -> WebhookPayload { WebhookPayload { diff --git a/crates/nvisy-webhook/src/reqwest/client.rs b/crates/nvisy-webhook/src/reqwest/client.rs index a55b47f..139de98 100644 --- a/crates/nvisy-webhook/src/reqwest/client.rs +++ b/crates/nvisy-webhook/src/reqwest/client.rs @@ -148,6 +148,13 @@ impl WebhookProvider for ReqwestClient { .header("X-Webhook-Request-Id", request.request_id.to_string()) .timeout(timeout); + // Add HMAC-SHA256 signature if secret is present + if let Some(ref secret) = request.secret { + let signature = Self::sign_payload(secret, timestamp, &payload_bytes); + http_request = + http_request.header("X-Webhook-Signature", format!("sha256={}", signature)); + } + // Add custom headers for (name, value) in &request.headers { http_request = http_request.header(name, value); diff --git a/docker/README.md b/docker/README.md index 1cadf86..2b23273 100644 --- a/docker/README.md +++ b/docker/README.md @@ -39,58 +39,6 @@ docker compose up -d --build | NATS | 4222, 8222 | Message queue (JetStream) | | Server | 8080 | Nvisy API | -## Optional Integrations (Development) - -The development compose file includes optional services that can be enabled using Docker Compose profiles. These are useful for testing integrations locally. - -### Available Profiles - -| Profile | Services | Description | -| -------------- | -------------- | ------------------------------------ | -| `minio` | MinIO | S3-compatible object storage | -| `n8n` | N8n | Workflow automation platform | -| `integrations` | MinIO + N8n | All optional integration services | - -### Optional Services - -| Service | Port(s) | Console URL | Description | -| ------- | ----------- | ----------------------- | ---------------------------- | -| MinIO | 9000, 9001 | http://localhost:9001 | S3-compatible object storage | -| N8n | 5678 | http://localhost:5678 | Workflow automation | - -### Usage - -Start core services only (PostgreSQL + NATS): - -```bash -docker compose -f docker-compose.dev.yml up -d -``` - -Start with MinIO: - -```bash -docker compose -f docker-compose.dev.yml --profile minio up -d -``` - -Start with N8n: - -```bash -docker compose -f docker-compose.dev.yml --profile n8n up -d -``` - -Start with all integrations: - -```bash -docker compose -f docker-compose.dev.yml --profile integrations up -d -``` - -### Default Credentials - -| Service | Username | Password | Environment Variables | -| ------- | ------------ | ------------ | ------------------------------------------------ | -| MinIO | `minioadmin` | `minioadmin` | `MINIO_ROOT_USER`, `MINIO_ROOT_PASSWORD` | -| N8n | `admin` | `admin` | `N8N_BASIC_AUTH_USER`, `N8N_BASIC_AUTH_PASSWORD` | - ## Commands ```bash diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 6b2f75b..a287d72 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -44,67 +44,9 @@ services: networks: - nvisy-dev - # Optional: MinIO (S3-compatible object storage) - minio: - image: minio/minio:latest - container_name: nvisy-minio-dev - profiles: - - minio - - integrations - restart: unless-stopped - ports: - - "${MINIO_API_PORT:-9000}:9000" - - "${MINIO_CONSOLE_PORT:-9001}:9001" - environment: - MINIO_ROOT_USER: ${MINIO_ROOT_USER:-minioadmin} - MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD:-minioadmin} - command: server /data --console-address ":9001" - volumes: - - minio_data:/data - healthcheck: - test: ["CMD", "mc", "ready", "local"] - interval: 5s - timeout: 5s - retries: 5 - start_period: 5s - networks: - - nvisy-dev - - # Optional: N8n (workflow automation) - n8n: - image: n8nio/n8n:latest - container_name: nvisy-n8n-dev - profiles: - - n8n - - integrations - restart: unless-stopped - ports: - - "${N8N_PORT:-5678}:5678" - environment: - N8N_HOST: ${N8N_HOST:-localhost} - N8N_PORT: 5678 - N8N_PROTOCOL: ${N8N_PROTOCOL:-http} - WEBHOOK_URL: ${N8N_WEBHOOK_URL:-http://localhost:5678} - GENERIC_TIMEZONE: ${TZ:-UTC} - N8N_BASIC_AUTH_ACTIVE: ${N8N_BASIC_AUTH_ACTIVE:-true} - N8N_BASIC_AUTH_USER: ${N8N_BASIC_AUTH_USER:-admin} - N8N_BASIC_AUTH_PASSWORD: ${N8N_BASIC_AUTH_PASSWORD:-admin} - volumes: - - n8n_data:/home/node/.n8n - healthcheck: - test: ["CMD-SHELL", "wget -qO- http://localhost:5678/healthz || exit 1"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 30s - networks: - - nvisy-dev - volumes: postgres_data: nats_data: - minio_data: - n8n_data: networks: nvisy-dev: diff --git a/integrations/nvisy-rig/src/rag/mod.rs b/integrations/nvisy-rig/src/rag/mod.rs index 45f4181..2a9ffd8 100644 --- a/integrations/nvisy-rig/src/rag/mod.rs +++ b/integrations/nvisy-rig/src/rag/mod.rs @@ -31,7 +31,7 @@ mod splitter; use std::sync::Arc; use nvisy_nats::NatsClient; -use nvisy_nats::object::{DocumentStore, Files}; +use nvisy_nats::object::{FileKey, FilesBucket, ObjectStore}; use nvisy_postgres::PgClient; use uuid::Uuid; @@ -54,7 +54,7 @@ pub struct RagService { struct RagServiceInner { provider: EmbeddingProvider, db: PgClient, - files: DocumentStore, + files: ObjectStore, config: RagConfig, } @@ -67,9 +67,9 @@ impl RagService { nats: NatsClient, ) -> Result { let files = nats - .document_store::() + .object_store::() .await - .map_err(|e| crate::Error::retrieval(format!("failed to open document store: {e}")))?; + .map_err(|e| crate::Error::retrieval(format!("failed to open file store: {e}")))?; let inner = RagServiceInner { provider, diff --git a/integrations/nvisy-rig/src/rag/searcher/mod.rs b/integrations/nvisy-rig/src/rag/searcher/mod.rs index e4419be..61f24a5 100644 --- a/integrations/nvisy-rig/src/rag/searcher/mod.rs +++ b/integrations/nvisy-rig/src/rag/searcher/mod.rs @@ -7,7 +7,7 @@ mod scope; use std::collections::HashMap; -use nvisy_nats::object::{DocumentKey, DocumentStore, Files}; +use nvisy_nats::object::{FileKey, FilesBucket, ObjectStore}; use nvisy_postgres::model::ScoredFileChunk; use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; @@ -25,7 +25,7 @@ use crate::{Error, Result}; pub struct Searcher { provider: EmbeddingProvider, db: PgClient, - files: DocumentStore, + files: ObjectStore, scope: SearchScope, min_score: Option, } @@ -35,7 +35,7 @@ impl Searcher { pub(crate) fn new( provider: EmbeddingProvider, db: PgClient, - files: DocumentStore, + files: ObjectStore, scope: SearchScope, ) -> Self { Self { @@ -150,7 +150,7 @@ impl Searcher { } async fn fetch_file(&self, file_id: Uuid) -> Result> { - let key = DocumentKey::from_parts(Uuid::nil(), file_id); + let key = FileKey::from_parts(Uuid::nil(), file_id); let mut result = self .files diff --git a/integrations/nvisy-rig/src/session/store.rs b/integrations/nvisy-rig/src/session/store.rs index d8700a3..b0011ba 100644 --- a/integrations/nvisy-rig/src/session/store.rs +++ b/integrations/nvisy-rig/src/session/store.rs @@ -7,14 +7,17 @@ use std::time::Duration; use derive_more::{Deref, DerefMut}; use nvisy_nats::NatsClient; -use nvisy_nats::kv::ChatHistoryStore; +use nvisy_nats::kv::{ChatHistoryBucket, KvStore, SessionKey}; use super::Session; use crate::Result; +/// Type alias for session KV store. +type SessionKvStore = KvStore; + /// Session store backed by NATS KV. /// -/// This is a thin wrapper around `nvisy_nats::kv::ChatHistoryStore` +/// This is a thin wrapper around `KvStore` /// that provides session persistence for rig agents. /// /// This type is cheap to clone and can be shared across threads. @@ -22,14 +25,14 @@ use crate::Result; pub struct SessionStore { #[deref] #[deref_mut] - inner: ChatHistoryStore, + inner: SessionKvStore, } impl SessionStore { /// Creates a new session store with default TTL (30 minutes). pub async fn new(nats: NatsClient) -> Result { let inner = nats - .chat_history_store(None) + .chat_history_store() .await .map_err(|e| crate::Error::session(format!("failed to create store: {e}")))?; Ok(Self { inner }) @@ -38,7 +41,7 @@ impl SessionStore { /// Creates a session store with custom TTL. pub async fn with_ttl(nats: NatsClient, ttl: Duration) -> Result { let inner = nats - .chat_history_store(Some(ttl)) + .chat_history_store_with_ttl(ttl) .await .map_err(|e| crate::Error::session(format!("failed to create store: {e}")))?; Ok(Self { inner }) @@ -46,40 +49,48 @@ impl SessionStore { /// Creates a new session. pub async fn create(&self, session: &Session) -> Result<()> { + let key = SessionKey::from(session.id()); self.inner - .create(session.id(), session) + .put(&key, session) .await - .map_err(|e| crate::Error::session(format!("failed to create: {e}"))) + .map_err(|e| crate::Error::session(format!("failed to create: {e}")))?; + Ok(()) } /// Gets a session by ID. pub async fn get(&self, session_id: uuid::Uuid) -> Result> { + let key = SessionKey::from(session_id); self.inner - .get(session_id) + .get_value(&key) .await .map_err(|e| crate::Error::session(format!("failed to get: {e}"))) } /// Updates an existing session (also resets TTL). pub async fn update(&self, session: &Session) -> Result<()> { + let key = SessionKey::from(session.id()); self.inner - .update(session.id(), session) + .put(&key, session) .await - .map_err(|e| crate::Error::session(format!("failed to update: {e}"))) + .map_err(|e| crate::Error::session(format!("failed to update: {e}")))?; + Ok(()) } /// Touches a session to reset its TTL. pub async fn touch(&self, session_id: uuid::Uuid) -> Result<()> { + let key = SessionKey::from(session_id); self.inner - .touch(session_id) + .touch(&key) .await - .map_err(|e| crate::Error::session(format!("failed to touch: {e}"))) + .map_err(|e| crate::Error::session(format!("failed to touch: {e}")))?; + Ok(()) } /// Deletes a session. pub async fn delete(&self, session_id: uuid::Uuid) -> Result<()> { + let key = SessionKey::from(session_id); self.inner - .delete(session_id) + .delete(&key) .await .map_err(|e| crate::Error::session(format!("failed to delete: {e}"))) } diff --git a/integrations/nvisy-vector/src/pgvector/backend.rs b/integrations/nvisy-vector/src/pgvector/backend.rs index f04088f..c9c1708 100644 --- a/integrations/nvisy-vector/src/pgvector/backend.rs +++ b/integrations/nvisy-vector/src/pgvector/backend.rs @@ -147,8 +147,7 @@ impl VectorOutput for PgVectorBackend { } // Get dimensions from the first vector - let dimensions = vectors - .get(0) + let dimensions = <[_]>::first(&vectors) .map(|v| v.vector.len()) .ok_or_else(|| DataError::invalid("No vectors provided"))?; From 81f33c3a172a1628547c9b90eb276738ab980ce6 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Tue, 20 Jan 2026 07:30:37 +0100 Subject: [PATCH 09/28] chore: sort workspace members and move nvisy-vector deps to workspace --- Cargo.toml | 27 +++++++++++-------- .../nvisy-data/Cargo.toml | 0 .../nvisy-data/src/error.rs | 0 .../nvisy-data/src/input.rs | 0 .../nvisy-data/src/lib.rs | 0 .../nvisy-data/src/output.rs | 0 .../nvisy-data/src/types.rs | 0 .../nvisy-data/src/vector.rs | 0 crates/nvisy-nats/src/kv/kv_bucket.rs | 4 +-- crates/nvisy-nats/src/stream/event_stream.rs | 8 +++--- .../nvisy-opendal/Cargo.toml | 0 .../nvisy-opendal/README.md | 0 .../nvisy-opendal/src/azblob/config.rs | 0 .../nvisy-opendal/src/azblob/mod.rs | 0 .../nvisy-opendal/src/backend.rs | 0 .../nvisy-opendal/src/config.rs | 0 .../nvisy-opendal/src/dropbox/config.rs | 0 .../nvisy-opendal/src/dropbox/mod.rs | 0 .../nvisy-opendal/src/gcs/config.rs | 0 .../nvisy-opendal/src/gcs/mod.rs | 0 .../nvisy-opendal/src/gdrive/config.rs | 0 .../nvisy-opendal/src/gdrive/mod.rs | 0 .../nvisy-opendal/src/lib.rs | 1 - .../nvisy-opendal/src/onedrive/config.rs | 0 .../nvisy-opendal/src/onedrive/mod.rs | 0 .../nvisy-opendal/src/s3/config.rs | 0 .../nvisy-opendal/src/s3/mod.rs | 0 {integrations => crates}/nvisy-rig/Cargo.toml | 0 {integrations => crates}/nvisy-rig/README.md | 0 .../nvisy-rig/src/chat/agent/context.rs | 0 .../nvisy-rig/src/chat/agent/executor.rs | 0 .../nvisy-rig/src/chat/agent/mod.rs | 0 .../nvisy-rig/src/chat/agent/prompt.rs | 0 .../nvisy-rig/src/chat/event.rs | 0 .../nvisy-rig/src/chat/mod.rs | 0 .../nvisy-rig/src/chat/response.rs | 0 .../nvisy-rig/src/chat/service.rs | 0 .../nvisy-rig/src/chat/stream.rs | 0 .../nvisy-rig/src/chat/usage.rs | 0 .../nvisy-rig/src/error.rs | 0 {integrations => crates}/nvisy-rig/src/lib.rs | 0 .../nvisy-rig/src/provider/config.rs | 0 .../nvisy-rig/src/provider/embedding.rs | 0 .../nvisy-rig/src/provider/mod.rs | 0 .../nvisy-rig/src/provider/registry.rs | 0 .../nvisy-rig/src/rag/config.rs | 0 .../nvisy-rig/src/rag/indexer/indexed.rs | 0 .../nvisy-rig/src/rag/indexer/mod.rs | 0 .../nvisy-rig/src/rag/mod.rs | 0 .../nvisy-rig/src/rag/searcher/mod.rs | 0 .../nvisy-rig/src/rag/searcher/retrieved.rs | 0 .../nvisy-rig/src/rag/searcher/scope.rs | 0 .../nvisy-rig/src/rag/splitter/chunk.rs | 0 .../nvisy-rig/src/rag/splitter/metadata.rs | 0 .../nvisy-rig/src/rag/splitter/mod.rs | 0 .../nvisy-rig/src/service/config.rs | 0 .../nvisy-rig/src/service/mod.rs | 0 .../nvisy-rig/src/service/rig.rs | 0 .../nvisy-rig/src/session/message.rs | 0 .../nvisy-rig/src/session/mod.rs | 0 .../nvisy-rig/src/session/policy.rs | 0 .../nvisy-rig/src/session/store.rs | 0 .../nvisy-rig/src/tool/definition.rs | 0 .../nvisy-rig/src/tool/edit/mod.rs | 0 .../nvisy-rig/src/tool/edit/operation.rs | 0 .../nvisy-rig/src/tool/edit/proposed.rs | 0 .../nvisy-rig/src/tool/mod.rs | 0 .../nvisy-rig/src/tool/registry.rs | 0 .../nvisy-rig/src/tool/types.rs | 0 .../nvisy-runtime/Cargo.toml | 0 .../nvisy-runtime/README.md | 0 .../nvisy-runtime/src/engine/config.rs | 0 .../nvisy-runtime/src/engine/executor.rs | 0 .../nvisy-runtime/src/engine/mod.rs | 0 .../nvisy-runtime/src/error.rs | 0 .../nvisy-runtime/src/graph/edge.rs | 0 .../nvisy-runtime/src/graph/mod.rs | 0 .../nvisy-runtime/src/graph/workflow.rs | 0 .../nvisy-runtime/src/lib.rs | 0 .../nvisy-runtime/src/node/data.rs | 0 .../nvisy-runtime/src/node/id.rs | 0 .../nvisy-runtime/src/node/input/config.rs | 0 .../nvisy-runtime/src/node/input/mod.rs | 0 .../nvisy-runtime/src/node/mod.rs | 0 .../nvisy-runtime/src/node/output/config.rs | 0 .../nvisy-runtime/src/node/output/mod.rs | 0 .../src/node/transformer/chunking.rs | 0 .../src/node/transformer/config.rs | 0 .../src/node/transformer/document.rs | 0 .../src/node/transformer/embedding.rs | 0 .../src/node/transformer/extraction.rs | 0 .../nvisy-runtime/src/node/transformer/mod.rs | 0 .../src/node/transformer/processing.rs | 0 .../src/node/transformer/quality.rs | 0 .../src/node/transformer/routing.rs | 0 .../nvisy-runtime/src/runtime/config.rs | 0 .../nvisy-runtime/src/runtime/mod.rs | 0 .../nvisy-runtime/src/runtime/service.rs | 0 .../nvisy-vector/Cargo.toml | 8 +++--- .../nvisy-vector/README.md | 0 .../nvisy-vector/src/config.rs | 0 .../nvisy-vector/src/lib.rs | 3 +-- .../nvisy-vector/src/milvus/backend.rs | 0 .../nvisy-vector/src/milvus/config.rs | 0 .../nvisy-vector/src/milvus/mod.rs | 0 .../nvisy-vector/src/pgvector/backend.rs | 0 .../nvisy-vector/src/pgvector/config.rs | 0 .../nvisy-vector/src/pgvector/mod.rs | 0 .../nvisy-vector/src/pinecone/backend.rs | 0 .../nvisy-vector/src/pinecone/config.rs | 0 .../nvisy-vector/src/pinecone/mod.rs | 0 .../nvisy-vector/src/qdrant/backend.rs | 0 .../nvisy-vector/src/qdrant/config.rs | 0 .../nvisy-vector/src/qdrant/mod.rs | 0 .../nvisy-vector/src/store.rs | 0 115 files changed, 27 insertions(+), 24 deletions(-) rename {integrations => crates}/nvisy-data/Cargo.toml (100%) rename {integrations => crates}/nvisy-data/src/error.rs (100%) rename {integrations => crates}/nvisy-data/src/input.rs (100%) rename {integrations => crates}/nvisy-data/src/lib.rs (100%) rename {integrations => crates}/nvisy-data/src/output.rs (100%) rename {integrations => crates}/nvisy-data/src/types.rs (100%) rename {integrations => crates}/nvisy-data/src/vector.rs (100%) rename {integrations => crates}/nvisy-opendal/Cargo.toml (100%) rename {integrations => crates}/nvisy-opendal/README.md (100%) rename {integrations => crates}/nvisy-opendal/src/azblob/config.rs (100%) rename {integrations => crates}/nvisy-opendal/src/azblob/mod.rs (100%) rename {integrations => crates}/nvisy-opendal/src/backend.rs (100%) rename {integrations => crates}/nvisy-opendal/src/config.rs (100%) rename {integrations => crates}/nvisy-opendal/src/dropbox/config.rs (100%) rename {integrations => crates}/nvisy-opendal/src/dropbox/mod.rs (100%) rename {integrations => crates}/nvisy-opendal/src/gcs/config.rs (100%) rename {integrations => crates}/nvisy-opendal/src/gcs/mod.rs (100%) rename {integrations => crates}/nvisy-opendal/src/gdrive/config.rs (100%) rename {integrations => crates}/nvisy-opendal/src/gdrive/mod.rs (100%) rename {integrations => crates}/nvisy-opendal/src/lib.rs (99%) rename {integrations => crates}/nvisy-opendal/src/onedrive/config.rs (100%) rename {integrations => crates}/nvisy-opendal/src/onedrive/mod.rs (100%) rename {integrations => crates}/nvisy-opendal/src/s3/config.rs (100%) rename {integrations => crates}/nvisy-opendal/src/s3/mod.rs (100%) rename {integrations => crates}/nvisy-rig/Cargo.toml (100%) rename {integrations => crates}/nvisy-rig/README.md (100%) rename {integrations => crates}/nvisy-rig/src/chat/agent/context.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/agent/executor.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/agent/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/agent/prompt.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/event.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/response.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/service.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/stream.rs (100%) rename {integrations => crates}/nvisy-rig/src/chat/usage.rs (100%) rename {integrations => crates}/nvisy-rig/src/error.rs (100%) rename {integrations => crates}/nvisy-rig/src/lib.rs (100%) rename {integrations => crates}/nvisy-rig/src/provider/config.rs (100%) rename {integrations => crates}/nvisy-rig/src/provider/embedding.rs (100%) rename {integrations => crates}/nvisy-rig/src/provider/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/provider/registry.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/config.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/indexer/indexed.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/indexer/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/searcher/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/searcher/retrieved.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/searcher/scope.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/splitter/chunk.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/splitter/metadata.rs (100%) rename {integrations => crates}/nvisy-rig/src/rag/splitter/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/service/config.rs (100%) rename {integrations => crates}/nvisy-rig/src/service/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/service/rig.rs (100%) rename {integrations => crates}/nvisy-rig/src/session/message.rs (100%) rename {integrations => crates}/nvisy-rig/src/session/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/session/policy.rs (100%) rename {integrations => crates}/nvisy-rig/src/session/store.rs (100%) rename {integrations => crates}/nvisy-rig/src/tool/definition.rs (100%) rename {integrations => crates}/nvisy-rig/src/tool/edit/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/tool/edit/operation.rs (100%) rename {integrations => crates}/nvisy-rig/src/tool/edit/proposed.rs (100%) rename {integrations => crates}/nvisy-rig/src/tool/mod.rs (100%) rename {integrations => crates}/nvisy-rig/src/tool/registry.rs (100%) rename {integrations => crates}/nvisy-rig/src/tool/types.rs (100%) rename {integrations => crates}/nvisy-runtime/Cargo.toml (100%) rename {integrations => crates}/nvisy-runtime/README.md (100%) rename {integrations => crates}/nvisy-runtime/src/engine/config.rs (100%) rename {integrations => crates}/nvisy-runtime/src/engine/executor.rs (100%) rename {integrations => crates}/nvisy-runtime/src/engine/mod.rs (100%) rename {integrations => crates}/nvisy-runtime/src/error.rs (100%) rename {integrations => crates}/nvisy-runtime/src/graph/edge.rs (100%) rename {integrations => crates}/nvisy-runtime/src/graph/mod.rs (100%) rename {integrations => crates}/nvisy-runtime/src/graph/workflow.rs (100%) rename {integrations => crates}/nvisy-runtime/src/lib.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/data.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/id.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/input/config.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/input/mod.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/mod.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/output/config.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/output/mod.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/chunking.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/config.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/document.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/embedding.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/extraction.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/mod.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/processing.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/quality.rs (100%) rename {integrations => crates}/nvisy-runtime/src/node/transformer/routing.rs (100%) rename {integrations => crates}/nvisy-runtime/src/runtime/config.rs (100%) rename {integrations => crates}/nvisy-runtime/src/runtime/mod.rs (100%) rename {integrations => crates}/nvisy-runtime/src/runtime/service.rs (100%) rename {integrations => crates}/nvisy-vector/Cargo.toml (87%) rename {integrations => crates}/nvisy-vector/README.md (100%) rename {integrations => crates}/nvisy-vector/src/config.rs (100%) rename {integrations => crates}/nvisy-vector/src/lib.rs (99%) rename {integrations => crates}/nvisy-vector/src/milvus/backend.rs (100%) rename {integrations => crates}/nvisy-vector/src/milvus/config.rs (100%) rename {integrations => crates}/nvisy-vector/src/milvus/mod.rs (100%) rename {integrations => crates}/nvisy-vector/src/pgvector/backend.rs (100%) rename {integrations => crates}/nvisy-vector/src/pgvector/config.rs (100%) rename {integrations => crates}/nvisy-vector/src/pgvector/mod.rs (100%) rename {integrations => crates}/nvisy-vector/src/pinecone/backend.rs (100%) rename {integrations => crates}/nvisy-vector/src/pinecone/config.rs (100%) rename {integrations => crates}/nvisy-vector/src/pinecone/mod.rs (100%) rename {integrations => crates}/nvisy-vector/src/qdrant/backend.rs (100%) rename {integrations => crates}/nvisy-vector/src/qdrant/config.rs (100%) rename {integrations => crates}/nvisy-vector/src/qdrant/mod.rs (100%) rename {integrations => crates}/nvisy-vector/src/store.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 03afe35..bbb3e3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,15 +5,15 @@ resolver = "2" members = [ "./crates/nvisy-cli", "./crates/nvisy-core", + "./crates/nvisy-data", "./crates/nvisy-nats", + "./crates/nvisy-opendal", "./crates/nvisy-postgres", + "./crates/nvisy-rig", + "./crates/nvisy-runtime", "./crates/nvisy-server", + "./crates/nvisy-vector", "./crates/nvisy-webhook", - "./integrations/nvisy-data", - "./integrations/nvisy-opendal", - "./integrations/nvisy-rig", - "./integrations/nvisy-runtime", - "./integrations/nvisy-vector", ] [workspace.package] @@ -36,14 +36,14 @@ documentation = "https://docs.rs/nvisy-server" # Internal crates nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } -nvisy-data = { path = "./integrations/nvisy-data", version = "0.1.0" } +nvisy-data = { path = "./crates/nvisy-data", version = "0.1.0" } nvisy-nats = { path = "./crates/nvisy-nats", version = "0.1.0" } -nvisy-opendal = { path = "./integrations/nvisy-opendal", version = "0.1.0" } +nvisy-opendal = { path = "./crates/nvisy-opendal", version = "0.1.0" } nvisy-postgres = { path = "./crates/nvisy-postgres", version = "0.1.0" } -nvisy-rig = { path = "./integrations/nvisy-rig", version = "0.1.0" } -nvisy-runtime = { path = "./integrations/nvisy-runtime", version = "0.1.0" } +nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" } +nvisy-runtime = { path = "./crates/nvisy-runtime", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } -nvisy-vector = { path = "./integrations/nvisy-vector", version = "0.1.0" } +nvisy-vector = { path = "./crates/nvisy-vector", version = "0.1.0" } nvisy-webhook = { path = "./crates/nvisy-webhook", version = "0.1.0" } # Runtime crates (from github.com/nvisycom/runtime) @@ -140,10 +140,15 @@ woothee = { version = "0.13", features = [] } # AI/ML frameworks rig-core = { version = "0.28", default-features = false, features = ["reqwest-rustls"] } +# Vector store clients +qdrant-client = { version = "1.13", features = [] } +pinecone-sdk = { version = "0.1", features = [] } +milvus-sdk-rust = { version = "0.1", features = [] } +prost-types = { version = "0.12", features = [] } + # Storage abstraction opendal = { version = "0.53", features = [] } - # Graph data structures petgraph = { version = "0.8", features = ["serde-1"] } diff --git a/integrations/nvisy-data/Cargo.toml b/crates/nvisy-data/Cargo.toml similarity index 100% rename from integrations/nvisy-data/Cargo.toml rename to crates/nvisy-data/Cargo.toml diff --git a/integrations/nvisy-data/src/error.rs b/crates/nvisy-data/src/error.rs similarity index 100% rename from integrations/nvisy-data/src/error.rs rename to crates/nvisy-data/src/error.rs diff --git a/integrations/nvisy-data/src/input.rs b/crates/nvisy-data/src/input.rs similarity index 100% rename from integrations/nvisy-data/src/input.rs rename to crates/nvisy-data/src/input.rs diff --git a/integrations/nvisy-data/src/lib.rs b/crates/nvisy-data/src/lib.rs similarity index 100% rename from integrations/nvisy-data/src/lib.rs rename to crates/nvisy-data/src/lib.rs diff --git a/integrations/nvisy-data/src/output.rs b/crates/nvisy-data/src/output.rs similarity index 100% rename from integrations/nvisy-data/src/output.rs rename to crates/nvisy-data/src/output.rs diff --git a/integrations/nvisy-data/src/types.rs b/crates/nvisy-data/src/types.rs similarity index 100% rename from integrations/nvisy-data/src/types.rs rename to crates/nvisy-data/src/types.rs diff --git a/integrations/nvisy-data/src/vector.rs b/crates/nvisy-data/src/vector.rs similarity index 100% rename from integrations/nvisy-data/src/vector.rs rename to crates/nvisy-data/src/vector.rs diff --git a/crates/nvisy-nats/src/kv/kv_bucket.rs b/crates/nvisy-nats/src/kv/kv_bucket.rs index 881d67a..b8b22b1 100644 --- a/crates/nvisy-nats/src/kv/kv_bucket.rs +++ b/crates/nvisy-nats/src/kv/kv_bucket.rs @@ -23,8 +23,8 @@ pub trait KvBucket: Clone + Send + Sync + 'static { pub struct ApiTokensBucket; impl KvBucket for ApiTokensBucket { - const NAME: &'static str = "api_tokens"; const DESCRIPTION: &'static str = "API authentication tokens"; + const NAME: &'static str = "api_tokens"; const TTL: Option = Some(Duration::from_secs(24 * 60 * 60)); // 24 hours } @@ -33,8 +33,8 @@ impl KvBucket for ApiTokensBucket { pub struct ChatHistoryBucket; impl KvBucket for ChatHistoryBucket { - const NAME: &'static str = "chat_history"; const DESCRIPTION: &'static str = "Ephemeral chat sessions"; + const NAME: &'static str = "chat_history"; const TTL: Option = Some(Duration::from_secs(30 * 60)); // 30 minutes } diff --git a/crates/nvisy-nats/src/stream/event_stream.rs b/crates/nvisy-nats/src/stream/event_stream.rs index 7d95301..3fb0efb 100644 --- a/crates/nvisy-nats/src/stream/event_stream.rs +++ b/crates/nvisy-nats/src/stream/event_stream.rs @@ -27,10 +27,10 @@ pub trait EventStream: Clone + Send + Sync + 'static { pub struct FileStream; impl EventStream for FileStream { + const CONSUMER_NAME: &'static str = "file-worker"; + const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); const NAME: &'static str = "FILE_JOBS"; const SUBJECT: &'static str = "file.jobs"; - const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); - const CONSUMER_NAME: &'static str = "file-worker"; } /// Stream for webhook delivery. @@ -40,10 +40,10 @@ impl EventStream for FileStream { pub struct WebhookStream; impl EventStream for WebhookStream { + const CONSUMER_NAME: &'static str = "webhook-worker"; + const MAX_AGE: Option = Some(Duration::from_secs(24 * 60 * 60)); const NAME: &'static str = "WEBHOOKS"; const SUBJECT: &'static str = "webhooks"; - const MAX_AGE: Option = Some(Duration::from_secs(24 * 60 * 60)); - const CONSUMER_NAME: &'static str = "webhook-worker"; } #[cfg(test)] diff --git a/integrations/nvisy-opendal/Cargo.toml b/crates/nvisy-opendal/Cargo.toml similarity index 100% rename from integrations/nvisy-opendal/Cargo.toml rename to crates/nvisy-opendal/Cargo.toml diff --git a/integrations/nvisy-opendal/README.md b/crates/nvisy-opendal/README.md similarity index 100% rename from integrations/nvisy-opendal/README.md rename to crates/nvisy-opendal/README.md diff --git a/integrations/nvisy-opendal/src/azblob/config.rs b/crates/nvisy-opendal/src/azblob/config.rs similarity index 100% rename from integrations/nvisy-opendal/src/azblob/config.rs rename to crates/nvisy-opendal/src/azblob/config.rs diff --git a/integrations/nvisy-opendal/src/azblob/mod.rs b/crates/nvisy-opendal/src/azblob/mod.rs similarity index 100% rename from integrations/nvisy-opendal/src/azblob/mod.rs rename to crates/nvisy-opendal/src/azblob/mod.rs diff --git a/integrations/nvisy-opendal/src/backend.rs b/crates/nvisy-opendal/src/backend.rs similarity index 100% rename from integrations/nvisy-opendal/src/backend.rs rename to crates/nvisy-opendal/src/backend.rs diff --git a/integrations/nvisy-opendal/src/config.rs b/crates/nvisy-opendal/src/config.rs similarity index 100% rename from integrations/nvisy-opendal/src/config.rs rename to crates/nvisy-opendal/src/config.rs diff --git a/integrations/nvisy-opendal/src/dropbox/config.rs b/crates/nvisy-opendal/src/dropbox/config.rs similarity index 100% rename from integrations/nvisy-opendal/src/dropbox/config.rs rename to crates/nvisy-opendal/src/dropbox/config.rs diff --git a/integrations/nvisy-opendal/src/dropbox/mod.rs b/crates/nvisy-opendal/src/dropbox/mod.rs similarity index 100% rename from integrations/nvisy-opendal/src/dropbox/mod.rs rename to crates/nvisy-opendal/src/dropbox/mod.rs diff --git a/integrations/nvisy-opendal/src/gcs/config.rs b/crates/nvisy-opendal/src/gcs/config.rs similarity index 100% rename from integrations/nvisy-opendal/src/gcs/config.rs rename to crates/nvisy-opendal/src/gcs/config.rs diff --git a/integrations/nvisy-opendal/src/gcs/mod.rs b/crates/nvisy-opendal/src/gcs/mod.rs similarity index 100% rename from integrations/nvisy-opendal/src/gcs/mod.rs rename to crates/nvisy-opendal/src/gcs/mod.rs diff --git a/integrations/nvisy-opendal/src/gdrive/config.rs b/crates/nvisy-opendal/src/gdrive/config.rs similarity index 100% rename from integrations/nvisy-opendal/src/gdrive/config.rs rename to crates/nvisy-opendal/src/gdrive/config.rs diff --git a/integrations/nvisy-opendal/src/gdrive/mod.rs b/crates/nvisy-opendal/src/gdrive/mod.rs similarity index 100% rename from integrations/nvisy-opendal/src/gdrive/mod.rs rename to crates/nvisy-opendal/src/gdrive/mod.rs diff --git a/integrations/nvisy-opendal/src/lib.rs b/crates/nvisy-opendal/src/lib.rs similarity index 99% rename from integrations/nvisy-opendal/src/lib.rs rename to crates/nvisy-opendal/src/lib.rs index 783080d..a6c8d7c 100644 --- a/integrations/nvisy-opendal/src/lib.rs +++ b/crates/nvisy-opendal/src/lib.rs @@ -21,7 +21,6 @@ pub use config::{ AzureBlobConfig, DropboxConfig, GcsConfig, GoogleDriveConfig, OneDriveConfig, S3Config, StorageConfig, }; - // Re-export types from nvisy-data for convenience pub use nvisy_data::{DataError, DataInput, DataOutput, DataResult, InputContext, OutputContext}; diff --git a/integrations/nvisy-opendal/src/onedrive/config.rs b/crates/nvisy-opendal/src/onedrive/config.rs similarity index 100% rename from integrations/nvisy-opendal/src/onedrive/config.rs rename to crates/nvisy-opendal/src/onedrive/config.rs diff --git a/integrations/nvisy-opendal/src/onedrive/mod.rs b/crates/nvisy-opendal/src/onedrive/mod.rs similarity index 100% rename from integrations/nvisy-opendal/src/onedrive/mod.rs rename to crates/nvisy-opendal/src/onedrive/mod.rs diff --git a/integrations/nvisy-opendal/src/s3/config.rs b/crates/nvisy-opendal/src/s3/config.rs similarity index 100% rename from integrations/nvisy-opendal/src/s3/config.rs rename to crates/nvisy-opendal/src/s3/config.rs diff --git a/integrations/nvisy-opendal/src/s3/mod.rs b/crates/nvisy-opendal/src/s3/mod.rs similarity index 100% rename from integrations/nvisy-opendal/src/s3/mod.rs rename to crates/nvisy-opendal/src/s3/mod.rs diff --git a/integrations/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml similarity index 100% rename from integrations/nvisy-rig/Cargo.toml rename to crates/nvisy-rig/Cargo.toml diff --git a/integrations/nvisy-rig/README.md b/crates/nvisy-rig/README.md similarity index 100% rename from integrations/nvisy-rig/README.md rename to crates/nvisy-rig/README.md diff --git a/integrations/nvisy-rig/src/chat/agent/context.rs b/crates/nvisy-rig/src/chat/agent/context.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/agent/context.rs rename to crates/nvisy-rig/src/chat/agent/context.rs diff --git a/integrations/nvisy-rig/src/chat/agent/executor.rs b/crates/nvisy-rig/src/chat/agent/executor.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/agent/executor.rs rename to crates/nvisy-rig/src/chat/agent/executor.rs diff --git a/integrations/nvisy-rig/src/chat/agent/mod.rs b/crates/nvisy-rig/src/chat/agent/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/agent/mod.rs rename to crates/nvisy-rig/src/chat/agent/mod.rs diff --git a/integrations/nvisy-rig/src/chat/agent/prompt.rs b/crates/nvisy-rig/src/chat/agent/prompt.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/agent/prompt.rs rename to crates/nvisy-rig/src/chat/agent/prompt.rs diff --git a/integrations/nvisy-rig/src/chat/event.rs b/crates/nvisy-rig/src/chat/event.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/event.rs rename to crates/nvisy-rig/src/chat/event.rs diff --git a/integrations/nvisy-rig/src/chat/mod.rs b/crates/nvisy-rig/src/chat/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/mod.rs rename to crates/nvisy-rig/src/chat/mod.rs diff --git a/integrations/nvisy-rig/src/chat/response.rs b/crates/nvisy-rig/src/chat/response.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/response.rs rename to crates/nvisy-rig/src/chat/response.rs diff --git a/integrations/nvisy-rig/src/chat/service.rs b/crates/nvisy-rig/src/chat/service.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/service.rs rename to crates/nvisy-rig/src/chat/service.rs diff --git a/integrations/nvisy-rig/src/chat/stream.rs b/crates/nvisy-rig/src/chat/stream.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/stream.rs rename to crates/nvisy-rig/src/chat/stream.rs diff --git a/integrations/nvisy-rig/src/chat/usage.rs b/crates/nvisy-rig/src/chat/usage.rs similarity index 100% rename from integrations/nvisy-rig/src/chat/usage.rs rename to crates/nvisy-rig/src/chat/usage.rs diff --git a/integrations/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs similarity index 100% rename from integrations/nvisy-rig/src/error.rs rename to crates/nvisy-rig/src/error.rs diff --git a/integrations/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs similarity index 100% rename from integrations/nvisy-rig/src/lib.rs rename to crates/nvisy-rig/src/lib.rs diff --git a/integrations/nvisy-rig/src/provider/config.rs b/crates/nvisy-rig/src/provider/config.rs similarity index 100% rename from integrations/nvisy-rig/src/provider/config.rs rename to crates/nvisy-rig/src/provider/config.rs diff --git a/integrations/nvisy-rig/src/provider/embedding.rs b/crates/nvisy-rig/src/provider/embedding.rs similarity index 100% rename from integrations/nvisy-rig/src/provider/embedding.rs rename to crates/nvisy-rig/src/provider/embedding.rs diff --git a/integrations/nvisy-rig/src/provider/mod.rs b/crates/nvisy-rig/src/provider/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/provider/mod.rs rename to crates/nvisy-rig/src/provider/mod.rs diff --git a/integrations/nvisy-rig/src/provider/registry.rs b/crates/nvisy-rig/src/provider/registry.rs similarity index 100% rename from integrations/nvisy-rig/src/provider/registry.rs rename to crates/nvisy-rig/src/provider/registry.rs diff --git a/integrations/nvisy-rig/src/rag/config.rs b/crates/nvisy-rig/src/rag/config.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/config.rs rename to crates/nvisy-rig/src/rag/config.rs diff --git a/integrations/nvisy-rig/src/rag/indexer/indexed.rs b/crates/nvisy-rig/src/rag/indexer/indexed.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/indexer/indexed.rs rename to crates/nvisy-rig/src/rag/indexer/indexed.rs diff --git a/integrations/nvisy-rig/src/rag/indexer/mod.rs b/crates/nvisy-rig/src/rag/indexer/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/indexer/mod.rs rename to crates/nvisy-rig/src/rag/indexer/mod.rs diff --git a/integrations/nvisy-rig/src/rag/mod.rs b/crates/nvisy-rig/src/rag/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/mod.rs rename to crates/nvisy-rig/src/rag/mod.rs diff --git a/integrations/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/searcher/mod.rs rename to crates/nvisy-rig/src/rag/searcher/mod.rs diff --git a/integrations/nvisy-rig/src/rag/searcher/retrieved.rs b/crates/nvisy-rig/src/rag/searcher/retrieved.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/searcher/retrieved.rs rename to crates/nvisy-rig/src/rag/searcher/retrieved.rs diff --git a/integrations/nvisy-rig/src/rag/searcher/scope.rs b/crates/nvisy-rig/src/rag/searcher/scope.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/searcher/scope.rs rename to crates/nvisy-rig/src/rag/searcher/scope.rs diff --git a/integrations/nvisy-rig/src/rag/splitter/chunk.rs b/crates/nvisy-rig/src/rag/splitter/chunk.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/splitter/chunk.rs rename to crates/nvisy-rig/src/rag/splitter/chunk.rs diff --git a/integrations/nvisy-rig/src/rag/splitter/metadata.rs b/crates/nvisy-rig/src/rag/splitter/metadata.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/splitter/metadata.rs rename to crates/nvisy-rig/src/rag/splitter/metadata.rs diff --git a/integrations/nvisy-rig/src/rag/splitter/mod.rs b/crates/nvisy-rig/src/rag/splitter/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/rag/splitter/mod.rs rename to crates/nvisy-rig/src/rag/splitter/mod.rs diff --git a/integrations/nvisy-rig/src/service/config.rs b/crates/nvisy-rig/src/service/config.rs similarity index 100% rename from integrations/nvisy-rig/src/service/config.rs rename to crates/nvisy-rig/src/service/config.rs diff --git a/integrations/nvisy-rig/src/service/mod.rs b/crates/nvisy-rig/src/service/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/service/mod.rs rename to crates/nvisy-rig/src/service/mod.rs diff --git a/integrations/nvisy-rig/src/service/rig.rs b/crates/nvisy-rig/src/service/rig.rs similarity index 100% rename from integrations/nvisy-rig/src/service/rig.rs rename to crates/nvisy-rig/src/service/rig.rs diff --git a/integrations/nvisy-rig/src/session/message.rs b/crates/nvisy-rig/src/session/message.rs similarity index 100% rename from integrations/nvisy-rig/src/session/message.rs rename to crates/nvisy-rig/src/session/message.rs diff --git a/integrations/nvisy-rig/src/session/mod.rs b/crates/nvisy-rig/src/session/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/session/mod.rs rename to crates/nvisy-rig/src/session/mod.rs diff --git a/integrations/nvisy-rig/src/session/policy.rs b/crates/nvisy-rig/src/session/policy.rs similarity index 100% rename from integrations/nvisy-rig/src/session/policy.rs rename to crates/nvisy-rig/src/session/policy.rs diff --git a/integrations/nvisy-rig/src/session/store.rs b/crates/nvisy-rig/src/session/store.rs similarity index 100% rename from integrations/nvisy-rig/src/session/store.rs rename to crates/nvisy-rig/src/session/store.rs diff --git a/integrations/nvisy-rig/src/tool/definition.rs b/crates/nvisy-rig/src/tool/definition.rs similarity index 100% rename from integrations/nvisy-rig/src/tool/definition.rs rename to crates/nvisy-rig/src/tool/definition.rs diff --git a/integrations/nvisy-rig/src/tool/edit/mod.rs b/crates/nvisy-rig/src/tool/edit/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/tool/edit/mod.rs rename to crates/nvisy-rig/src/tool/edit/mod.rs diff --git a/integrations/nvisy-rig/src/tool/edit/operation.rs b/crates/nvisy-rig/src/tool/edit/operation.rs similarity index 100% rename from integrations/nvisy-rig/src/tool/edit/operation.rs rename to crates/nvisy-rig/src/tool/edit/operation.rs diff --git a/integrations/nvisy-rig/src/tool/edit/proposed.rs b/crates/nvisy-rig/src/tool/edit/proposed.rs similarity index 100% rename from integrations/nvisy-rig/src/tool/edit/proposed.rs rename to crates/nvisy-rig/src/tool/edit/proposed.rs diff --git a/integrations/nvisy-rig/src/tool/mod.rs b/crates/nvisy-rig/src/tool/mod.rs similarity index 100% rename from integrations/nvisy-rig/src/tool/mod.rs rename to crates/nvisy-rig/src/tool/mod.rs diff --git a/integrations/nvisy-rig/src/tool/registry.rs b/crates/nvisy-rig/src/tool/registry.rs similarity index 100% rename from integrations/nvisy-rig/src/tool/registry.rs rename to crates/nvisy-rig/src/tool/registry.rs diff --git a/integrations/nvisy-rig/src/tool/types.rs b/crates/nvisy-rig/src/tool/types.rs similarity index 100% rename from integrations/nvisy-rig/src/tool/types.rs rename to crates/nvisy-rig/src/tool/types.rs diff --git a/integrations/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml similarity index 100% rename from integrations/nvisy-runtime/Cargo.toml rename to crates/nvisy-runtime/Cargo.toml diff --git a/integrations/nvisy-runtime/README.md b/crates/nvisy-runtime/README.md similarity index 100% rename from integrations/nvisy-runtime/README.md rename to crates/nvisy-runtime/README.md diff --git a/integrations/nvisy-runtime/src/engine/config.rs b/crates/nvisy-runtime/src/engine/config.rs similarity index 100% rename from integrations/nvisy-runtime/src/engine/config.rs rename to crates/nvisy-runtime/src/engine/config.rs diff --git a/integrations/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs similarity index 100% rename from integrations/nvisy-runtime/src/engine/executor.rs rename to crates/nvisy-runtime/src/engine/executor.rs diff --git a/integrations/nvisy-runtime/src/engine/mod.rs b/crates/nvisy-runtime/src/engine/mod.rs similarity index 100% rename from integrations/nvisy-runtime/src/engine/mod.rs rename to crates/nvisy-runtime/src/engine/mod.rs diff --git a/integrations/nvisy-runtime/src/error.rs b/crates/nvisy-runtime/src/error.rs similarity index 100% rename from integrations/nvisy-runtime/src/error.rs rename to crates/nvisy-runtime/src/error.rs diff --git a/integrations/nvisy-runtime/src/graph/edge.rs b/crates/nvisy-runtime/src/graph/edge.rs similarity index 100% rename from integrations/nvisy-runtime/src/graph/edge.rs rename to crates/nvisy-runtime/src/graph/edge.rs diff --git a/integrations/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs similarity index 100% rename from integrations/nvisy-runtime/src/graph/mod.rs rename to crates/nvisy-runtime/src/graph/mod.rs diff --git a/integrations/nvisy-runtime/src/graph/workflow.rs b/crates/nvisy-runtime/src/graph/workflow.rs similarity index 100% rename from integrations/nvisy-runtime/src/graph/workflow.rs rename to crates/nvisy-runtime/src/graph/workflow.rs diff --git a/integrations/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs similarity index 100% rename from integrations/nvisy-runtime/src/lib.rs rename to crates/nvisy-runtime/src/lib.rs diff --git a/integrations/nvisy-runtime/src/node/data.rs b/crates/nvisy-runtime/src/node/data.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/data.rs rename to crates/nvisy-runtime/src/node/data.rs diff --git a/integrations/nvisy-runtime/src/node/id.rs b/crates/nvisy-runtime/src/node/id.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/id.rs rename to crates/nvisy-runtime/src/node/id.rs diff --git a/integrations/nvisy-runtime/src/node/input/config.rs b/crates/nvisy-runtime/src/node/input/config.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/input/config.rs rename to crates/nvisy-runtime/src/node/input/config.rs diff --git a/integrations/nvisy-runtime/src/node/input/mod.rs b/crates/nvisy-runtime/src/node/input/mod.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/input/mod.rs rename to crates/nvisy-runtime/src/node/input/mod.rs diff --git a/integrations/nvisy-runtime/src/node/mod.rs b/crates/nvisy-runtime/src/node/mod.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/mod.rs rename to crates/nvisy-runtime/src/node/mod.rs diff --git a/integrations/nvisy-runtime/src/node/output/config.rs b/crates/nvisy-runtime/src/node/output/config.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/output/config.rs rename to crates/nvisy-runtime/src/node/output/config.rs diff --git a/integrations/nvisy-runtime/src/node/output/mod.rs b/crates/nvisy-runtime/src/node/output/mod.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/output/mod.rs rename to crates/nvisy-runtime/src/node/output/mod.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/chunking.rs b/crates/nvisy-runtime/src/node/transformer/chunking.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/chunking.rs rename to crates/nvisy-runtime/src/node/transformer/chunking.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/config.rs b/crates/nvisy-runtime/src/node/transformer/config.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/config.rs rename to crates/nvisy-runtime/src/node/transformer/config.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/document.rs b/crates/nvisy-runtime/src/node/transformer/document.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/document.rs rename to crates/nvisy-runtime/src/node/transformer/document.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/embedding.rs b/crates/nvisy-runtime/src/node/transformer/embedding.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/embedding.rs rename to crates/nvisy-runtime/src/node/transformer/embedding.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/extraction.rs b/crates/nvisy-runtime/src/node/transformer/extraction.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/extraction.rs rename to crates/nvisy-runtime/src/node/transformer/extraction.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/mod.rs b/crates/nvisy-runtime/src/node/transformer/mod.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/mod.rs rename to crates/nvisy-runtime/src/node/transformer/mod.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/processing.rs b/crates/nvisy-runtime/src/node/transformer/processing.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/processing.rs rename to crates/nvisy-runtime/src/node/transformer/processing.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/quality.rs b/crates/nvisy-runtime/src/node/transformer/quality.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/quality.rs rename to crates/nvisy-runtime/src/node/transformer/quality.rs diff --git a/integrations/nvisy-runtime/src/node/transformer/routing.rs b/crates/nvisy-runtime/src/node/transformer/routing.rs similarity index 100% rename from integrations/nvisy-runtime/src/node/transformer/routing.rs rename to crates/nvisy-runtime/src/node/transformer/routing.rs diff --git a/integrations/nvisy-runtime/src/runtime/config.rs b/crates/nvisy-runtime/src/runtime/config.rs similarity index 100% rename from integrations/nvisy-runtime/src/runtime/config.rs rename to crates/nvisy-runtime/src/runtime/config.rs diff --git a/integrations/nvisy-runtime/src/runtime/mod.rs b/crates/nvisy-runtime/src/runtime/mod.rs similarity index 100% rename from integrations/nvisy-runtime/src/runtime/mod.rs rename to crates/nvisy-runtime/src/runtime/mod.rs diff --git a/integrations/nvisy-runtime/src/runtime/service.rs b/crates/nvisy-runtime/src/runtime/service.rs similarity index 100% rename from integrations/nvisy-runtime/src/runtime/service.rs rename to crates/nvisy-runtime/src/runtime/service.rs diff --git a/integrations/nvisy-vector/Cargo.toml b/crates/nvisy-vector/Cargo.toml similarity index 87% rename from integrations/nvisy-vector/Cargo.toml rename to crates/nvisy-vector/Cargo.toml index 7f5971e..25fb2ea 100644 --- a/integrations/nvisy-vector/Cargo.toml +++ b/crates/nvisy-vector/Cargo.toml @@ -40,10 +40,10 @@ serde_json = { workspace = true, features = [] } async-trait = { workspace = true, features = [] } # Vector store clients -qdrant-client = "1.13" -pinecone-sdk = "0.1" -milvus-sdk-rust = "0.1" -prost-types = "0.12" +qdrant-client = { workspace = true, features = [] } +pinecone-sdk = { workspace = true, features = [] } +milvus-sdk-rust = { workspace = true, features = [] } +prost-types = { workspace = true, features = [] } # Database (for pgvector) diesel = { workspace = true, features = ["postgres"] } diff --git a/integrations/nvisy-vector/README.md b/crates/nvisy-vector/README.md similarity index 100% rename from integrations/nvisy-vector/README.md rename to crates/nvisy-vector/README.md diff --git a/integrations/nvisy-vector/src/config.rs b/crates/nvisy-vector/src/config.rs similarity index 100% rename from integrations/nvisy-vector/src/config.rs rename to crates/nvisy-vector/src/config.rs diff --git a/integrations/nvisy-vector/src/lib.rs b/crates/nvisy-vector/src/lib.rs similarity index 99% rename from integrations/nvisy-vector/src/lib.rs rename to crates/nvisy-vector/src/lib.rs index 2d6260e..8733072 100644 --- a/integrations/nvisy-vector/src/lib.rs +++ b/crates/nvisy-vector/src/lib.rs @@ -18,13 +18,12 @@ pub use config::{ MilvusConfig, PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType, PineconeConfig, QdrantConfig, VectorStoreConfig, }; -pub use store::VectorStore; - // Re-export types from nvisy-data for convenience pub use nvisy_data::{ DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, VectorSearchResult, }; +pub use store::VectorStore; /// Tracing target for vector store operations. pub const TRACING_TARGET: &str = "nvisy_vector"; diff --git a/integrations/nvisy-vector/src/milvus/backend.rs b/crates/nvisy-vector/src/milvus/backend.rs similarity index 100% rename from integrations/nvisy-vector/src/milvus/backend.rs rename to crates/nvisy-vector/src/milvus/backend.rs diff --git a/integrations/nvisy-vector/src/milvus/config.rs b/crates/nvisy-vector/src/milvus/config.rs similarity index 100% rename from integrations/nvisy-vector/src/milvus/config.rs rename to crates/nvisy-vector/src/milvus/config.rs diff --git a/integrations/nvisy-vector/src/milvus/mod.rs b/crates/nvisy-vector/src/milvus/mod.rs similarity index 100% rename from integrations/nvisy-vector/src/milvus/mod.rs rename to crates/nvisy-vector/src/milvus/mod.rs diff --git a/integrations/nvisy-vector/src/pgvector/backend.rs b/crates/nvisy-vector/src/pgvector/backend.rs similarity index 100% rename from integrations/nvisy-vector/src/pgvector/backend.rs rename to crates/nvisy-vector/src/pgvector/backend.rs diff --git a/integrations/nvisy-vector/src/pgvector/config.rs b/crates/nvisy-vector/src/pgvector/config.rs similarity index 100% rename from integrations/nvisy-vector/src/pgvector/config.rs rename to crates/nvisy-vector/src/pgvector/config.rs diff --git a/integrations/nvisy-vector/src/pgvector/mod.rs b/crates/nvisy-vector/src/pgvector/mod.rs similarity index 100% rename from integrations/nvisy-vector/src/pgvector/mod.rs rename to crates/nvisy-vector/src/pgvector/mod.rs diff --git a/integrations/nvisy-vector/src/pinecone/backend.rs b/crates/nvisy-vector/src/pinecone/backend.rs similarity index 100% rename from integrations/nvisy-vector/src/pinecone/backend.rs rename to crates/nvisy-vector/src/pinecone/backend.rs diff --git a/integrations/nvisy-vector/src/pinecone/config.rs b/crates/nvisy-vector/src/pinecone/config.rs similarity index 100% rename from integrations/nvisy-vector/src/pinecone/config.rs rename to crates/nvisy-vector/src/pinecone/config.rs diff --git a/integrations/nvisy-vector/src/pinecone/mod.rs b/crates/nvisy-vector/src/pinecone/mod.rs similarity index 100% rename from integrations/nvisy-vector/src/pinecone/mod.rs rename to crates/nvisy-vector/src/pinecone/mod.rs diff --git a/integrations/nvisy-vector/src/qdrant/backend.rs b/crates/nvisy-vector/src/qdrant/backend.rs similarity index 100% rename from integrations/nvisy-vector/src/qdrant/backend.rs rename to crates/nvisy-vector/src/qdrant/backend.rs diff --git a/integrations/nvisy-vector/src/qdrant/config.rs b/crates/nvisy-vector/src/qdrant/config.rs similarity index 100% rename from integrations/nvisy-vector/src/qdrant/config.rs rename to crates/nvisy-vector/src/qdrant/config.rs diff --git a/integrations/nvisy-vector/src/qdrant/mod.rs b/crates/nvisy-vector/src/qdrant/mod.rs similarity index 100% rename from integrations/nvisy-vector/src/qdrant/mod.rs rename to crates/nvisy-vector/src/qdrant/mod.rs diff --git a/integrations/nvisy-vector/src/store.rs b/crates/nvisy-vector/src/store.rs similarity index 100% rename from integrations/nvisy-vector/src/store.rs rename to crates/nvisy-vector/src/store.rs From 8ec4935b44d4fa1fbe206e5e85aa57d6c2273196 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Tue, 20 Jan 2026 10:32:39 +0100 Subject: [PATCH 10/28] feat(dal, runtime): add nvisy-dal crate, provider params/credentials split - Add nvisy-dal crate with unified data abstraction layer - DataTypeId enum and AnyDataValue for runtime type dispatch - ProviderConfig for S3, GCS, Azure Blob, Postgres, MySQL - Core module with Context, InputStream, OutputStream, DataInput/DataOutput traits - Storage and vector providers (Qdrant, Pinecone, Milvus, pgvector) - Add provider module to nvisy-runtime/node - ProviderParams: non-sensitive params with credentials_id reference - ProviderCredentials: sensitive credentials stored per workspace - CredentialsRegistry: in-memory registry with JSON construction - Update InputNode and OutputNode to use ProviderParams - Add CredentialsRegistry and CredentialsNotFound error variants - Remove nvisy-opendal, nvisy-vector, nvisy-data crates (consolidated into nvisy-dal) --- Cargo.lock | 485 ++++++++++++++---- Cargo.toml | 22 +- crates/{nvisy-vector => nvisy-dal}/Cargo.toml | 25 +- crates/nvisy-dal/README.md | 180 +++++++ crates/nvisy-dal/src/core/context.rs | 55 ++ crates/nvisy-dal/src/core/mod.rs | 31 ++ crates/nvisy-dal/src/core/stream.rs | 118 +++++ crates/nvisy-dal/src/datatype/blob.rs | 74 +++ crates/nvisy-dal/src/datatype/document.rs | 42 ++ crates/nvisy-dal/src/datatype/embedding.rs | 47 ++ crates/nvisy-dal/src/datatype/graph.rs | 129 +++++ crates/nvisy-dal/src/datatype/message.rs | 82 +++ crates/nvisy-dal/src/datatype/mod.rs | 137 +++++ crates/nvisy-dal/src/datatype/record.rs | 53 ++ crates/nvisy-dal/src/error.rs | 83 +++ crates/nvisy-dal/src/lib.rs | 24 + .../src/provider}/azblob/config.rs | 24 +- crates/nvisy-dal/src/provider/azblob/mod.rs | 110 ++++ crates/nvisy-dal/src/provider/config.rs | 32 ++ .../src/provider}/gcs/config.rs | 2 +- crates/nvisy-dal/src/provider/gcs/mod.rs | 104 ++++ .../src/provider}/milvus/config.rs | 0 .../src/provider/milvus/mod.rs} | 199 +++---- crates/nvisy-dal/src/provider/mod.rs | 33 ++ crates/nvisy-dal/src/provider/mysql/config.rs | 39 ++ crates/nvisy-dal/src/provider/mysql/mod.rs | 126 +++++ .../src/provider}/pgvector/config.rs | 18 +- .../src/provider/pgvector/mod.rs} | 229 +++++---- .../src/provider}/pinecone/config.rs | 0 .../src/provider/pinecone/mod.rs} | 167 +++--- .../nvisy-dal/src/provider/postgres/config.rs | 39 ++ crates/nvisy-dal/src/provider/postgres/mod.rs | 127 +++++ .../src/provider}/qdrant/config.rs | 10 - .../src/provider/qdrant/mod.rs} | 179 ++++--- .../src/provider}/s3/config.rs | 0 crates/nvisy-dal/src/provider/s3/mod.rs | 114 ++++ crates/nvisy-data/Cargo.toml | 39 -- crates/nvisy-data/src/error.rs | 99 ---- crates/nvisy-data/src/input.rs | 55 -- crates/nvisy-data/src/lib.rs | 20 - crates/nvisy-data/src/output.rs | 61 --- crates/nvisy-data/src/types.rs | 52 -- crates/nvisy-data/src/vector.rs | 86 ---- crates/nvisy-opendal/Cargo.toml | 54 -- crates/nvisy-opendal/README.md | 44 -- crates/nvisy-opendal/src/azblob/mod.rs | 5 - crates/nvisy-opendal/src/backend.rs | 405 --------------- crates/nvisy-opendal/src/config.rs | 44 -- crates/nvisy-opendal/src/dropbox/config.rs | 58 --- crates/nvisy-opendal/src/dropbox/mod.rs | 5 - crates/nvisy-opendal/src/gcs/mod.rs | 5 - crates/nvisy-opendal/src/gdrive/config.rs | 58 --- crates/nvisy-opendal/src/gdrive/mod.rs | 5 - crates/nvisy-opendal/src/lib.rs | 28 - crates/nvisy-opendal/src/onedrive/config.rs | 58 --- crates/nvisy-opendal/src/onedrive/mod.rs | 5 - crates/nvisy-opendal/src/s3/mod.rs | 5 - crates/nvisy-runtime/Cargo.toml | 3 +- crates/nvisy-runtime/src/error.rs | 11 +- crates/nvisy-runtime/src/node/input/config.rs | 3 - crates/nvisy-runtime/src/node/input/mod.rs | 25 +- crates/nvisy-runtime/src/node/mod.rs | 10 +- .../nvisy-runtime/src/node/output/config.rs | 30 -- crates/nvisy-runtime/src/node/output/mod.rs | 19 +- .../src/node/provider/credentials.rs | 68 +++ crates/nvisy-runtime/src/node/provider/mod.rs | 69 +++ .../nvisy-runtime/src/node/provider/params.rs | 203 ++++++++ crates/nvisy-vector/README.md | 42 -- crates/nvisy-vector/src/config.rs | 36 -- crates/nvisy-vector/src/lib.rs | 29 -- crates/nvisy-vector/src/milvus/mod.rs | 7 - crates/nvisy-vector/src/pgvector/mod.rs | 7 - crates/nvisy-vector/src/pinecone/mod.rs | 7 - crates/nvisy-vector/src/qdrant/mod.rs | 7 - crates/nvisy-vector/src/store.rs | 95 ---- 75 files changed, 3023 insertions(+), 1978 deletions(-) rename crates/{nvisy-vector => nvisy-dal}/Cargo.toml (73%) create mode 100644 crates/nvisy-dal/README.md create mode 100644 crates/nvisy-dal/src/core/context.rs create mode 100644 crates/nvisy-dal/src/core/mod.rs create mode 100644 crates/nvisy-dal/src/core/stream.rs create mode 100644 crates/nvisy-dal/src/datatype/blob.rs create mode 100644 crates/nvisy-dal/src/datatype/document.rs create mode 100644 crates/nvisy-dal/src/datatype/embedding.rs create mode 100644 crates/nvisy-dal/src/datatype/graph.rs create mode 100644 crates/nvisy-dal/src/datatype/message.rs create mode 100644 crates/nvisy-dal/src/datatype/mod.rs create mode 100644 crates/nvisy-dal/src/datatype/record.rs create mode 100644 crates/nvisy-dal/src/error.rs create mode 100644 crates/nvisy-dal/src/lib.rs rename crates/{nvisy-opendal/src => nvisy-dal/src/provider}/azblob/config.rs (69%) create mode 100644 crates/nvisy-dal/src/provider/azblob/mod.rs create mode 100644 crates/nvisy-dal/src/provider/config.rs rename crates/{nvisy-opendal/src => nvisy-dal/src/provider}/gcs/config.rs (95%) create mode 100644 crates/nvisy-dal/src/provider/gcs/mod.rs rename crates/{nvisy-vector/src => nvisy-dal/src/provider}/milvus/config.rs (100%) rename crates/{nvisy-vector/src/milvus/backend.rs => nvisy-dal/src/provider/milvus/mod.rs} (66%) create mode 100644 crates/nvisy-dal/src/provider/mod.rs create mode 100644 crates/nvisy-dal/src/provider/mysql/config.rs create mode 100644 crates/nvisy-dal/src/provider/mysql/mod.rs rename crates/{nvisy-vector/src => nvisy-dal/src/provider}/pgvector/config.rs (81%) rename crates/{nvisy-vector/src/pgvector/backend.rs => nvisy-dal/src/provider/pgvector/mod.rs} (64%) rename crates/{nvisy-vector/src => nvisy-dal/src/provider}/pinecone/config.rs (100%) rename crates/{nvisy-vector/src/pinecone/backend.rs => nvisy-dal/src/provider/pinecone/mod.rs} (67%) create mode 100644 crates/nvisy-dal/src/provider/postgres/config.rs create mode 100644 crates/nvisy-dal/src/provider/postgres/mod.rs rename crates/{nvisy-vector/src => nvisy-dal/src/provider}/qdrant/config.rs (86%) rename crates/{nvisy-vector/src/qdrant/backend.rs => nvisy-dal/src/provider/qdrant/mod.rs} (72%) rename crates/{nvisy-opendal/src => nvisy-dal/src/provider}/s3/config.rs (100%) create mode 100644 crates/nvisy-dal/src/provider/s3/mod.rs delete mode 100644 crates/nvisy-data/Cargo.toml delete mode 100644 crates/nvisy-data/src/error.rs delete mode 100644 crates/nvisy-data/src/input.rs delete mode 100644 crates/nvisy-data/src/lib.rs delete mode 100644 crates/nvisy-data/src/output.rs delete mode 100644 crates/nvisy-data/src/types.rs delete mode 100644 crates/nvisy-data/src/vector.rs delete mode 100644 crates/nvisy-opendal/Cargo.toml delete mode 100644 crates/nvisy-opendal/README.md delete mode 100644 crates/nvisy-opendal/src/azblob/mod.rs delete mode 100644 crates/nvisy-opendal/src/backend.rs delete mode 100644 crates/nvisy-opendal/src/config.rs delete mode 100644 crates/nvisy-opendal/src/dropbox/config.rs delete mode 100644 crates/nvisy-opendal/src/dropbox/mod.rs delete mode 100644 crates/nvisy-opendal/src/gcs/mod.rs delete mode 100644 crates/nvisy-opendal/src/gdrive/config.rs delete mode 100644 crates/nvisy-opendal/src/gdrive/mod.rs delete mode 100644 crates/nvisy-opendal/src/lib.rs delete mode 100644 crates/nvisy-opendal/src/onedrive/config.rs delete mode 100644 crates/nvisy-opendal/src/onedrive/mod.rs delete mode 100644 crates/nvisy-opendal/src/s3/mod.rs delete mode 100644 crates/nvisy-runtime/src/node/input/config.rs delete mode 100644 crates/nvisy-runtime/src/node/output/config.rs create mode 100644 crates/nvisy-runtime/src/node/provider/credentials.rs create mode 100644 crates/nvisy-runtime/src/node/provider/mod.rs create mode 100644 crates/nvisy-runtime/src/node/provider/params.rs delete mode 100644 crates/nvisy-vector/README.md delete mode 100644 crates/nvisy-vector/src/config.rs delete mode 100644 crates/nvisy-vector/src/lib.rs delete mode 100644 crates/nvisy-vector/src/milvus/mod.rs delete mode 100644 crates/nvisy-vector/src/pgvector/mod.rs delete mode 100644 crates/nvisy-vector/src/pinecone/mod.rs delete mode 100644 crates/nvisy-vector/src/qdrant/mod.rs delete mode 100644 crates/nvisy-vector/src/store.rs diff --git a/Cargo.lock b/Cargo.lock index 27da2ea..b2540c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,6 +100,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -207,17 +213,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "async-lock" -version = "3.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290f7f2596bd5b78a9fec8088ccd89180d7f9f55b94b0576823bbbdc72ee8311" -dependencies = [ - "event-listener", - "event-listener-strategy", - "pin-project-lite", -] - [[package]] name = "async-nats" version = "0.45.0" @@ -288,6 +283,15 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "atoi" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -692,6 +696,9 @@ name = "bitflags" version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +dependencies = [ + "serde_core", +] [[package]] name = "blake2" @@ -1075,19 +1082,10 @@ dependencies = [ ] [[package]] -name = "crossbeam-channel" -version = "0.5.15" +name = "crossbeam-queue" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" dependencies = [ "crossbeam-utils", ] @@ -1555,6 +1553,9 @@ name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +dependencies = [ + "serde", +] [[package]] name = "email_address" @@ -1602,23 +1603,24 @@ dependencies = [ ] [[package]] -name = "event-listener" -version = "5.4.1" +name = "etcetera" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", + "cfg-if", + "home", + "windows-sys 0.48.0", ] [[package]] -name = "event-listener-strategy" -version = "0.5.4" +name = "event-listener" +version = "5.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" +checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" dependencies = [ - "event-listener", + "concurrent-queue", + "parking", "pin-project-lite", ] @@ -1731,6 +1733,17 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin 0.9.8", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1825,6 +1838,17 @@ dependencies = [ "futures-util", ] +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-io" version = "0.3.31" @@ -2023,6 +2047,8 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ + "allocator-api2", + "equivalent", "foldhash", ] @@ -2032,6 +2058,15 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "hdrhistogram" version = "7.5.4" @@ -2100,6 +2135,15 @@ dependencies = [ "serde", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + [[package]] name = "hmac" version = "0.12.1" @@ -2758,6 +2802,16 @@ dependencies = [ "redox_syscall 0.7.0", ] +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "pkg-config", + "vcpkg", +] + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -2960,26 +3014,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "moka" -version = "0.12.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" -dependencies = [ - "async-lock", - "crossbeam-channel", - "crossbeam-epoch", - "crossbeam-utils", - "equivalent", - "event-listener", - "futures-util", - "parking_lot", - "portable-atomic", - "smallvec", - "tagptr", - "uuid", -] - [[package]] name = "multer" version = "3.1.0" @@ -3209,16 +3243,28 @@ dependencies = [ ] [[package]] -name = "nvisy-data" +name = "nvisy-dal" version = "0.1.0" dependencies = [ "async-trait", "bytes", + "deadpool", + "derive_more", + "diesel", + "diesel-async", "futures", + "jiff", + "milvus-sdk-rust", + "opendal", + "pgvector", + "pinecone-sdk", + "prost-types 0.12.6", + "qdrant-client", "serde", "serde_json", "thiserror 2.0.18", "tokio", + "uuid", ] [[package]] @@ -3243,22 +3289,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "nvisy-opendal" -version = "0.1.0" -dependencies = [ - "async-trait", - "bytes", - "futures", - "jiff", - "nvisy-data", - "opendal", - "serde", - "serde_json", - "tokio", - "tracing", -] - [[package]] name = "nvisy-postgres" version = "0.1.0" @@ -3437,11 +3467,12 @@ dependencies = [ name = "nvisy-runtime" version = "0.1.0" dependencies = [ + "async-trait", "derive_builder", "derive_more", "futures", "nvisy-core", - "nvisy-opendal", + "nvisy-dal", "nvisy-rt-core", "nvisy-rt-engine", "petgraph 0.8.3", @@ -3501,28 +3532,6 @@ dependencies = [ "zxcvbn", ] -[[package]] -name = "nvisy-vector" -version = "0.1.0" -dependencies = [ - "async-trait", - "deadpool", - "diesel", - "diesel-async", - "futures", - "milvus-sdk-rust", - "nvisy-data", - "pgvector", - "pinecone-sdk", - "prost-types 0.12.6", - "qdrant-client", - "reqwest", - "serde", - "serde_json", - "tokio", - "tracing", -] - [[package]] name = "nvisy-webhook" version = "0.1.0" @@ -3585,7 +3594,6 @@ dependencies = [ "http-body 1.0.1", "log", "md-5", - "moka", "percent-encoding", "quick-xml", "reqsign", @@ -3593,6 +3601,7 @@ dependencies = [ "serde", "serde_json", "sha2", + "sqlx", "tokio", "uuid", ] @@ -5224,6 +5233,9 @@ name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +dependencies = [ + "serde", +] [[package]] name = "snafu" @@ -5277,6 +5289,9 @@ name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] [[package]] name = "spki" @@ -5288,6 +5303,196 @@ dependencies = [ "der", ] +[[package]] +name = "sqlx" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +dependencies = [ + "sqlx-core", + "sqlx-macros", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", +] + +[[package]] +name = "sqlx-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +dependencies = [ + "base64 0.22.1", + "bytes", + "crc", + "crossbeam-queue", + "either", + "event-listener", + "futures-core", + "futures-intrusive", + "futures-io", + "futures-util", + "hashbrown 0.15.5", + "hashlink", + "indexmap 2.13.0", + "log", + "memchr", + "once_cell", + "percent-encoding", + "rustls 0.23.36", + "serde", + "serde_json", + "sha2", + "smallvec", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tracing", + "url", + "webpki-roots 0.26.11", +] + +[[package]] +name = "sqlx-macros" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +dependencies = [ + "proc-macro2", + "quote", + "sqlx-core", + "sqlx-macros-core", + "syn 2.0.114", +] + +[[package]] +name = "sqlx-macros-core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +dependencies = [ + "dotenvy", + "either", + "heck 0.5.0", + "hex", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "sha2", + "sqlx-core", + "sqlx-mysql", + "sqlx-postgres", + "sqlx-sqlite", + "syn 2.0.114", + "tokio", + "url", +] + +[[package]] +name = "sqlx-mysql" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +dependencies = [ + "atoi", + "base64 0.22.1", + "bitflags 2.10.0", + "byteorder", + "bytes", + "crc", + "digest 0.10.7", + "dotenvy", + "either", + "futures-channel", + "futures-core", + "futures-io", + "futures-util", + "generic-array", + "hex", + "hkdf", + "hmac", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "percent-encoding", + "rand 0.8.5", + "rsa", + "serde", + "sha1", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "whoami 1.6.1", +] + +[[package]] +name = "sqlx-postgres" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +dependencies = [ + "atoi", + "base64 0.22.1", + "bitflags 2.10.0", + "byteorder", + "crc", + "dotenvy", + "etcetera", + "futures-channel", + "futures-core", + "futures-util", + "hex", + "hkdf", + "hmac", + "home", + "itoa", + "log", + "md-5", + "memchr", + "once_cell", + "rand 0.8.5", + "serde", + "serde_json", + "sha2", + "smallvec", + "sqlx-core", + "stringprep", + "thiserror 2.0.18", + "tracing", + "whoami 1.6.1", +] + +[[package]] +name = "sqlx-sqlite" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" +dependencies = [ + "atoi", + "flume", + "futures-channel", + "futures-core", + "futures-executor", + "futures-intrusive", + "futures-util", + "libsqlite3-sys", + "log", + "percent-encoding", + "serde", + "serde_urlencoded", + "sqlx-core", + "thiserror 2.0.18", + "tracing", + "url", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -5426,12 +5631,6 @@ dependencies = [ "libc", ] -[[package]] -name = "tagptr" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" - [[package]] name = "tar" version = "0.4.44" @@ -5659,7 +5858,7 @@ dependencies = [ "socket2 0.6.1", "tokio", "tokio-util", - "whoami", + "whoami 2.0.2", ] [[package]] @@ -6299,6 +6498,12 @@ dependencies = [ "wit-bindgen", ] +[[package]] +name = "wasite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" + [[package]] name = "wasite" version = "1.0.2" @@ -6440,6 +6645,16 @@ dependencies = [ "rustix 0.38.44", ] +[[package]] +name = "whoami" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" +dependencies = [ + "libredox", + "wasite 0.1.0", +] + [[package]] name = "whoami" version = "2.0.2" @@ -6447,7 +6662,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace4d5c7b5ab3d99629156d4e0997edbe98a4beb6d5ba99e2cae830207a81983" dependencies = [ "libredox", - "wasite", + "wasite 1.0.2", "web-sys", ] @@ -6543,6 +6758,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" @@ -6570,6 +6794,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -6603,6 +6842,12 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -6615,6 +6860,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -6627,6 +6878,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -6651,6 +6908,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -6663,6 +6926,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -6675,6 +6944,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -6687,6 +6962,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" diff --git a/Cargo.toml b/Cargo.toml index bbb3e3e..576fc6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,14 +5,12 @@ resolver = "2" members = [ "./crates/nvisy-cli", "./crates/nvisy-core", - "./crates/nvisy-data", + "./crates/nvisy-dal", "./crates/nvisy-nats", - "./crates/nvisy-opendal", "./crates/nvisy-postgres", "./crates/nvisy-rig", "./crates/nvisy-runtime", "./crates/nvisy-server", - "./crates/nvisy-vector", "./crates/nvisy-webhook", ] @@ -36,14 +34,12 @@ documentation = "https://docs.rs/nvisy-server" # Internal crates nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0" } -nvisy-data = { path = "./crates/nvisy-data", version = "0.1.0" } +nvisy-dal = { path = "./crates/nvisy-dal", version = "0.1.0" } nvisy-nats = { path = "./crates/nvisy-nats", version = "0.1.0" } -nvisy-opendal = { path = "./crates/nvisy-opendal", version = "0.1.0" } nvisy-postgres = { path = "./crates/nvisy-postgres", version = "0.1.0" } nvisy-rig = { path = "./crates/nvisy-rig", version = "0.1.0" } nvisy-runtime = { path = "./crates/nvisy-runtime", version = "0.1.0" } nvisy-server = { path = "./crates/nvisy-server", version = "0.1.0" } -nvisy-vector = { path = "./crates/nvisy-vector", version = "0.1.0" } nvisy-webhook = { path = "./crates/nvisy-webhook", version = "0.1.0" } # Runtime crates (from github.com/nvisycom/runtime) @@ -130,6 +126,8 @@ uuid = { version = "1.18", features = ["serde", "v4", "v7"] } ipnet = { version = "2.11", features = [] } bigdecimal = { version = "0.4", features = ["serde"] } bytes = { version = "1.10", features = ["serde"] } +petgraph = { version = "0.8", features = ["serde-1"] } +semver = { version = "1.0", features = ["serde"] } url = { version = "2.5", features = [] } # Text processing @@ -140,17 +138,9 @@ woothee = { version = "0.13", features = [] } # AI/ML frameworks rig-core = { version = "0.28", default-features = false, features = ["reqwest-rustls"] } -# Vector store clients +# Storage abstractions and providers +opendal = { version = "0.53", features = [] } qdrant-client = { version = "1.13", features = [] } pinecone-sdk = { version = "0.1", features = [] } milvus-sdk-rust = { version = "0.1", features = [] } prost-types = { version = "0.12", features = [] } - -# Storage abstraction -opendal = { version = "0.53", features = [] } - -# Graph data structures -petgraph = { version = "0.8", features = ["serde-1"] } - -# Versioning -semver = { version = "1.0", features = ["serde"] } diff --git a/crates/nvisy-vector/Cargo.toml b/crates/nvisy-dal/Cargo.toml similarity index 73% rename from crates/nvisy-vector/Cargo.toml rename to crates/nvisy-dal/Cargo.toml index 25fb2ea..3c5b027 100644 --- a/crates/nvisy-vector/Cargo.toml +++ b/crates/nvisy-dal/Cargo.toml @@ -1,13 +1,15 @@ # https://doc.rust-lang.org/cargo/reference/manifest.html [package] -name = "nvisy-vector" +name = "nvisy-dal" +description = "Data abstraction layer for workflow inputs and outputs" +readme = "./README.md" + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } @@ -19,25 +21,26 @@ all-features = true rustdoc-args = ["--cfg", "docsrs"] [dependencies] -# Internal crates -nvisy-data = { workspace = true } - # Async runtime tokio = { workspace = true, features = ["rt", "sync"] } futures = { workspace = true, features = [] } -# HTTP client (for REST-based backends) -reqwest = { workspace = true, features = ["json"] } - -# Observability -tracing = { workspace = true, features = [] } - # (De)serialization serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = [] } # Derive macros & utilities async-trait = { workspace = true, features = [] } +derive_more = { workspace = true, features = ["from"] } +thiserror = { workspace = true, features = [] } + +# Data types +bytes = { workspace = true, features = [] } +uuid = { workspace = true, features = ["v4", "v7"] } +jiff = { workspace = true, features = ["serde"] } + +# Storage (OpenDAL) +opendal = { workspace = true, features = ["services-s3", "services-gcs", "services-azblob", "services-postgresql", "services-mysql"] } # Vector store clients qdrant-client = { workspace = true, features = [] } diff --git a/crates/nvisy-dal/README.md b/crates/nvisy-dal/README.md new file mode 100644 index 0000000..314f6e6 --- /dev/null +++ b/crates/nvisy-dal/README.md @@ -0,0 +1,180 @@ +# nvisy-dal + +Data Abstraction Layer for workflow inputs and outputs. + +## Overview + +This crate provides a unified interface for reading and writing data across various storage backends. It supports blob storage (S3, GCS, Azure Blob), relational databases (PostgreSQL, MySQL), and vector databases (Qdrant, Pinecone, Milvus, pgvector). + +## Modules + +- **`context`** - Context types for data operations (target, cursor, limit) +- **`datatype`** - Data types that flow through the DAL (Blob, Document, Embedding, Record, Graph, Message) +- **`provider`** - Storage and database providers +- **`stream`** - Stream types (`InputStream`, `OutputStream`) wrapping `BoxStream` +- **`traits`** - Core traits (`DataInput`, `DataOutput`) + +## Data Types + +All types implement the `DataType` marker trait: + +- **Blob** - Binary data with path and optional content type +- **Document** - Structured documents with title, content, and metadata +- **Embedding** - Vector embeddings with metadata for similarity search +- **Record** - Tabular data as key-value maps +- **Graph** - Graph structures with nodes and edges +- **Message** - Messages for queue-based systems + +## Providers + +### Storage Providers (OpenDAL-based) + +| Provider | Config | Data Type | +|----------|--------|-----------| +| `S3Provider` | `S3Config` | `Blob` | +| `GcsProvider` | `GcsConfig` | `Blob` | +| `AzblobProvider` | `AzblobConfig` | `Blob` | + +### Database Providers (OpenDAL-based) + +| Provider | Config | Data Type | +|----------|--------|-----------| +| `PostgresProvider` | `PostgresConfig` | `Record` | +| `MysqlProvider` | `MysqlConfig` | `Record` | + +### Vector Providers + +| Provider | Config | Data Type | +|----------|--------|-----------| +| `QdrantProvider` | `QdrantConfig` | `Embedding` | +| `PineconeProvider` | `PineconeConfig` | `Embedding` | +| `MilvusProvider` | `MilvusConfig` | `Embedding` | +| `PgVectorProvider` | `PgVectorConfig` | `Embedding` | + +## Streams + +The DAL uses wrapped stream types for better ergonomics: + +```rust +use nvisy_dal::stream::{InputStream, OutputStream, ItemStream}; + +// InputStream wraps a BoxStream with optional pagination cursor +let input: InputStream = provider.read(&ctx).await?; +let cursor = input.cursor(); // Get pagination cursor + +// OutputStream wraps a Sink for streaming writes +``` + +## Usage + +### Storage Example + +```rust +use nvisy_dal::{Context, DataInput, DataOutput}; +use nvisy_dal::provider::{S3Config, S3Provider}; +use nvisy_dal::datatype::Blob; +use futures::StreamExt; + +// Create provider +let config = S3Config::new("my-bucket", "us-east-1") + .with_credentials("access_key", "secret_key"); +let provider = S3Provider::new(&config)?; + +// Read blobs +let ctx = Context::new().with_target("data/"); +let mut stream = provider.read(&ctx).await?; + +while let Some(result) = stream.next().await { + let blob = result?; + println!("Read: {}", blob.path); +} + +// Write blobs +let blob = Blob::new("output/file.txt", b"Hello, world!".to_vec()); +provider.write(&ctx, vec![blob]).await?; +``` + +### Database Example + +```rust +use nvisy_dal::{Context, DataInput, DataOutput}; +use nvisy_dal::provider::{PostgresConfig, PostgresProvider}; +use nvisy_dal::datatype::Record; + +// Create provider +let config = PostgresConfig::new("postgresql://user:pass@localhost/db") + .with_table("my_table"); +let provider = PostgresProvider::new(&config)?; + +// Read records +let ctx = Context::new(); +let stream = provider.read(&ctx).await?; + +// Write records +let record = Record::new() + .set("name", "Alice") + .set("age", 30); +provider.write(&ctx, vec![record]).await?; +``` + +### Vector Example + +```rust +use nvisy_dal::{Context, DataOutput}; +use nvisy_dal::provider::{QdrantConfig, QdrantProvider}; +use nvisy_dal::datatype::Embedding; + +// Create provider +let config = QdrantConfig::new("http://localhost:6334"); +let provider = QdrantProvider::new(&config).await?; + +// Write embeddings +let ctx = Context::new().with_target("my_collection"); +let embedding = Embedding::new("doc1", vec![0.1, 0.2, 0.3]); +provider.write(&ctx, vec![embedding]).await?; + +// Search (provider-specific method) +let results = provider.search( + "my_collection", + vec![0.1, 0.2, 0.3], + 10, + true, // include_vectors + true, // include_metadata + None, // filter +).await?; +``` + +## Traits + +### DataInput + +```rust +#[async_trait] +pub trait DataInput: Send + Sync { + async fn read(&self, ctx: &Context) -> Result>; +} +``` + +### DataOutput + +```rust +#[async_trait] +pub trait DataOutput: Send + Sync { + async fn write(&self, ctx: &Context, items: Vec) -> Result<()>; +} +``` + +## Context + +The `Context` struct provides configuration for read/write operations: + +```rust +let ctx = Context::new() + .with_target("my_collection") // Collection, table, bucket prefix, etc. + .with_cursor("abc123") // Pagination cursor + .with_limit(100); // Maximum items to read +``` + +## License + +MIT diff --git a/crates/nvisy-dal/src/core/context.rs b/crates/nvisy-dal/src/core/context.rs new file mode 100644 index 0000000..dc19ca5 --- /dev/null +++ b/crates/nvisy-dal/src/core/context.rs @@ -0,0 +1,55 @@ +//! Context types for data operations. + +/// Context for data operations. +/// +/// Provides configuration for read/write operations including target, +/// pagination cursor, and limits. +#[derive(Debug, Clone, Default)] +pub struct Context { + /// Target collection, table, bucket, topic, etc. + pub target: Option, + /// Cursor for pagination (provider-specific format). + pub cursor: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +impl Context { + /// Creates a new empty context. + pub fn new() -> Self { + Self::default() + } + + /// Sets the target. + pub fn with_target(mut self, target: impl Into) -> Self { + self.target = Some(target.into()); + self + } + + /// Sets the cursor for pagination. + pub fn with_cursor(mut self, cursor: impl Into) -> Self { + self.cursor = Some(cursor.into()); + self + } + + /// Sets the limit. + pub fn with_limit(mut self, limit: usize) -> Self { + self.limit = Some(limit); + self + } + + /// Returns the target, if set. + pub fn target(&self) -> Option<&str> { + self.target.as_deref() + } + + /// Returns the cursor, if set. + pub fn cursor(&self) -> Option<&str> { + self.cursor.as_deref() + } + + /// Returns the limit, if set. + pub fn limit(&self) -> Option { + self.limit + } +} diff --git a/crates/nvisy-dal/src/core/mod.rs b/crates/nvisy-dal/src/core/mod.rs new file mode 100644 index 0000000..a8fcceb --- /dev/null +++ b/crates/nvisy-dal/src/core/mod.rs @@ -0,0 +1,31 @@ +//! Core types and traits for data operations. + +mod context; +mod stream; + +pub use context::Context; +pub use stream::{InputStream, ItemSink, ItemStream, OutputStream}; + +use crate::Result; +use crate::datatype::DataType; + +/// Trait for reading data from a source. +/// +/// Implementations provide streaming access to data with optional pagination. +#[async_trait::async_trait] +pub trait DataInput: Send + Sync { + /// Reads items from the source. + /// + /// Returns an input stream containing items and an optional cursor + /// for pagination. + async fn read(&self, ctx: &Context) -> Result>; +} + +/// Trait for writing data to a sink. +/// +/// Implementations accept batches of items for writing. +#[async_trait::async_trait] +pub trait DataOutput: Send + Sync { + /// Writes a batch of items to the sink. + async fn write(&self, ctx: &Context, items: Vec) -> Result<()>; +} diff --git a/crates/nvisy-dal/src/core/stream.rs b/crates/nvisy-dal/src/core/stream.rs new file mode 100644 index 0000000..60a599c --- /dev/null +++ b/crates/nvisy-dal/src/core/stream.rs @@ -0,0 +1,118 @@ +//! Stream types for data input and output operations. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use futures::stream::BoxStream; +use futures::{Sink, Stream}; + +use crate::Result; + +/// A boxed stream of items. +pub type ItemStream<'a, T> = BoxStream<'a, Result>; + +/// Input stream wrapper for reading data. +/// +/// Wraps a boxed stream and provides a cursor for pagination. +pub struct InputStream<'a, T> { + stream: ItemStream<'a, T>, + cursor: Option, +} + +impl<'a, T> InputStream<'a, T> { + /// Creates a new input stream. + pub fn new(stream: ItemStream<'a, T>) -> Self { + Self { + stream, + cursor: None, + } + } + + /// Creates a new input stream with a cursor. + pub fn with_cursor(stream: ItemStream<'a, T>, cursor: Option) -> Self { + Self { stream, cursor } + } + + /// Returns the cursor for the next read, if any. + pub fn cursor(&self) -> Option<&str> { + self.cursor.as_deref() + } + + /// Consumes the stream and returns the inner boxed stream. + pub fn into_inner(self) -> ItemStream<'a, T> { + self.stream + } + + /// Consumes the stream and returns both the inner stream and cursor. + pub fn into_parts(self) -> (ItemStream<'a, T>, Option) { + (self.stream, self.cursor) + } +} + +impl Stream for InputStream<'_, T> { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.stream).poll_next(cx) + } + + fn size_hint(&self) -> (usize, Option) { + self.stream.size_hint() + } +} + +/// A boxed sink for items. +pub type ItemSink<'a, T> = Pin + Send + 'a>>; + +/// Output stream wrapper for writing data. +/// +/// Wraps a boxed sink for streaming writes. +pub struct OutputStream<'a, T> { + sink: ItemSink<'a, T>, +} + +impl<'a, T> OutputStream<'a, T> { + /// Creates a new output stream. + pub fn new(sink: ItemSink<'a, T>) -> Self { + Self { sink } + } + + /// Consumes the stream and returns the inner boxed sink. + pub fn into_inner(self) -> ItemSink<'a, T> { + self.sink + } +} + +impl Sink for OutputStream<'_, T> { + type Error = crate::Error; + + fn poll_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_ready(cx) + } + + fn start_send(mut self: Pin<&mut Self>, item: T) -> Result<()> { + self.sink.as_mut().start_send(item) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_flush(cx) + } + + fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_close(cx) + } +} + +impl std::fmt::Debug for OutputStream<'_, T> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OutputStream").finish_non_exhaustive() + } +} + +impl std::fmt::Debug for InputStream<'_, T> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("InputStream") + .field("cursor", &self.cursor) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/datatype/blob.rs b/crates/nvisy-dal/src/datatype/blob.rs new file mode 100644 index 0000000..e6f23a1 --- /dev/null +++ b/crates/nvisy-dal/src/datatype/blob.rs @@ -0,0 +1,74 @@ +//! Blob data type for files and objects. + +use bytes::Bytes; +use serde::{Deserialize, Serialize}; + +use super::{DataType, Metadata}; + +/// A blob representing a file or object. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Blob { + /// Path or key identifying this blob. + pub path: String, + /// Raw binary data. + #[serde(with = "serde_bytes")] + pub data: Bytes, + /// Content type (MIME type). + #[serde(skip_serializing_if = "Option::is_none")] + pub content_type: Option, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl Blob { + /// Creates a new blob. + pub fn new(path: impl Into, data: impl Into) -> Self { + Self { + path: path.into(), + data: data.into(), + content_type: None, + metadata: Metadata::new(), + } + } + + /// Sets the content type. + pub fn with_content_type(mut self, content_type: impl Into) -> Self { + self.content_type = Some(content_type.into()); + self + } + + /// Sets metadata. + pub fn with_metadata(mut self, metadata: Metadata) -> Self { + self.metadata = metadata; + self + } +} + +impl DataType for Blob { + const TYPE_ID: &'static str = "blob"; + + fn data_type_id() -> super::DataTypeId { + super::DataTypeId::Blob + } +} + +mod serde_bytes { + use bytes::Bytes; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + + pub fn serialize(bytes: &Bytes, serializer: S) -> Result + where + S: Serializer, + { + bytes.as_ref().serialize(serializer) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let vec = Vec::::deserialize(deserializer)?; + Ok(Bytes::from(vec)) + } +} diff --git a/crates/nvisy-dal/src/datatype/document.rs b/crates/nvisy-dal/src/datatype/document.rs new file mode 100644 index 0000000..7354346 --- /dev/null +++ b/crates/nvisy-dal/src/datatype/document.rs @@ -0,0 +1,42 @@ +//! Document data type for JSON documents. + +use serde::{Deserialize, Serialize}; + +use super::{DataType, Metadata}; + +/// A document with flexible JSON content. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Document { + /// Unique identifier. + pub id: String, + /// Document content as JSON. + pub content: serde_json::Value, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl Document { + /// Creates a new document. + pub fn new(id: impl Into, content: serde_json::Value) -> Self { + Self { + id: id.into(), + content, + metadata: Metadata::new(), + } + } + + /// Sets metadata. + pub fn with_metadata(mut self, metadata: Metadata) -> Self { + self.metadata = metadata; + self + } +} + +impl DataType for Document { + const TYPE_ID: &'static str = "document"; + + fn data_type_id() -> super::DataTypeId { + super::DataTypeId::Document + } +} diff --git a/crates/nvisy-dal/src/datatype/embedding.rs b/crates/nvisy-dal/src/datatype/embedding.rs new file mode 100644 index 0000000..0e9b152 --- /dev/null +++ b/crates/nvisy-dal/src/datatype/embedding.rs @@ -0,0 +1,47 @@ +//! Embedding data type for vector data. + +use serde::{Deserialize, Serialize}; + +use super::{DataType, Metadata}; + +/// A vector embedding with metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Embedding { + /// Unique identifier. + pub id: String, + /// The embedding vector. + pub vector: Vec, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl Embedding { + /// Creates a new embedding. + pub fn new(id: impl Into, vector: Vec) -> Self { + Self { + id: id.into(), + vector, + metadata: Metadata::new(), + } + } + + /// Sets metadata. + pub fn with_metadata(mut self, metadata: Metadata) -> Self { + self.metadata = metadata; + self + } + + /// Returns the vector dimensions. + pub fn dimensions(&self) -> usize { + self.vector.len() + } +} + +impl DataType for Embedding { + const TYPE_ID: &'static str = "embedding"; + + fn data_type_id() -> super::DataTypeId { + super::DataTypeId::Embedding + } +} diff --git a/crates/nvisy-dal/src/datatype/graph.rs b/crates/nvisy-dal/src/datatype/graph.rs new file mode 100644 index 0000000..beb222e --- /dev/null +++ b/crates/nvisy-dal/src/datatype/graph.rs @@ -0,0 +1,129 @@ +//! Graph data type with nodes and edges. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use super::DataType; + +/// A graph containing nodes and edges. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Graph { + /// Nodes in the graph. + #[serde(default)] + pub nodes: Vec, + /// Edges in the graph. + #[serde(default)] + pub edges: Vec, +} + +impl Graph { + /// Creates a new empty graph. + pub fn new() -> Self { + Self::default() + } + + /// Adds a node. + pub fn with_node(mut self, node: Node) -> Self { + self.nodes.push(node); + self + } + + /// Adds an edge. + pub fn with_edge(mut self, edge: Edge) -> Self { + self.edges.push(edge); + self + } +} + +impl DataType for Graph { + const TYPE_ID: &'static str = "graph"; + + fn data_type_id() -> super::DataTypeId { + super::DataTypeId::Graph + } +} + +/// A node in a graph. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Node { + /// Unique identifier. + pub id: String, + /// Node labels (types). + #[serde(default)] + pub labels: Vec, + /// Node properties. + #[serde(default)] + pub properties: HashMap, +} + +impl Node { + /// Creates a new node. + pub fn new(id: impl Into) -> Self { + Self { + id: id.into(), + labels: Vec::new(), + properties: HashMap::new(), + } + } + + /// Adds a label. + pub fn with_label(mut self, label: impl Into) -> Self { + self.labels.push(label.into()); + self + } + + /// Sets a property. + pub fn with_property( + mut self, + key: impl Into, + value: impl Into, + ) -> Self { + self.properties.insert(key.into(), value.into()); + self + } +} + +/// An edge in a graph. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Edge { + /// Unique identifier. + pub id: String, + /// Source node ID. + pub from: String, + /// Target node ID. + pub to: String, + /// Edge label (relationship type). + pub label: String, + /// Edge properties. + #[serde(default)] + pub properties: HashMap, +} + +impl Edge { + /// Creates a new edge. + pub fn new( + id: impl Into, + from: impl Into, + to: impl Into, + label: impl Into, + ) -> Self { + Self { + id: id.into(), + from: from.into(), + to: to.into(), + label: label.into(), + properties: HashMap::new(), + } + } + + /// Sets a property. + pub fn with_property( + mut self, + key: impl Into, + value: impl Into, + ) -> Self { + self.properties.insert(key.into(), value.into()); + self + } +} diff --git a/crates/nvisy-dal/src/datatype/message.rs b/crates/nvisy-dal/src/datatype/message.rs new file mode 100644 index 0000000..21ca7a0 --- /dev/null +++ b/crates/nvisy-dal/src/datatype/message.rs @@ -0,0 +1,82 @@ +//! Message data type for queue messages. + +use std::collections::HashMap; + +use bytes::Bytes; +use jiff::Timestamp; +use serde::{Deserialize, Serialize}; + +use super::DataType; + +/// A message from a queue or stream. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + /// Unique identifier. + pub id: String, + /// Message payload. + #[serde(with = "serde_bytes")] + pub payload: Bytes, + /// Message headers. + #[serde(default)] + pub headers: HashMap, + /// Timestamp when the message was created. + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, +} + +impl Message { + /// Creates a new message. + pub fn new(id: impl Into, payload: impl Into) -> Self { + Self { + id: id.into(), + payload: payload.into(), + headers: HashMap::new(), + timestamp: None, + } + } + + /// Sets a header. + pub fn with_header(mut self, key: impl Into, value: impl Into) -> Self { + self.headers.insert(key.into(), value.into()); + self + } + + /// Sets the timestamp. + pub fn with_timestamp(mut self, timestamp: Timestamp) -> Self { + self.timestamp = Some(timestamp); + self + } + + /// Tries to deserialize the payload as JSON. + pub fn payload_json(&self) -> Result { + serde_json::from_slice(&self.payload) + } +} + +impl DataType for Message { + const TYPE_ID: &'static str = "message"; + + fn data_type_id() -> super::DataTypeId { + super::DataTypeId::Message + } +} + +mod serde_bytes { + use bytes::Bytes; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + + pub fn serialize(bytes: &Bytes, serializer: S) -> Result + where + S: Serializer, + { + bytes.as_ref().serialize(serializer) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let vec = Vec::::deserialize(deserializer)?; + Ok(Bytes::from(vec)) + } +} diff --git a/crates/nvisy-dal/src/datatype/mod.rs b/crates/nvisy-dal/src/datatype/mod.rs new file mode 100644 index 0000000..46591fe --- /dev/null +++ b/crates/nvisy-dal/src/datatype/mod.rs @@ -0,0 +1,137 @@ +//! Data types for the DAL. + +mod blob; +mod document; +mod embedding; +mod graph; +mod message; +mod record; + +use std::collections::HashMap; + +pub use blob::Blob; +use derive_more::From; +pub use document::Document; +pub use embedding::Embedding; +pub use graph::{Edge, Graph, Node}; +pub use message::Message; +pub use record::Record; +use serde::{Deserialize, Serialize}; + +/// Metadata associated with data items. +pub type Metadata = HashMap; + +/// Marker trait for data types that can be read/written through the DAL. +pub trait DataType: Send + Sync + 'static { + /// Unique type identifier. + const TYPE_ID: &'static str; + + /// Returns the corresponding DataTypeId. + fn data_type_id() -> DataTypeId; +} + +/// Data type identifier for runtime type checking and JSON schema. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DataTypeId { + Blob, + Document, + Embedding, + Graph, + Record, + Message, +} + +impl DataTypeId { + /// Returns the string identifier for this type. + pub const fn as_str(&self) -> &'static str { + match self { + Self::Blob => "blob", + Self::Document => "document", + Self::Embedding => "embedding", + Self::Graph => "graph", + Self::Record => "record", + Self::Message => "message", + } + } +} + +impl std::fmt::Display for DataTypeId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + +/// Type-erased data value for runtime dispatch. +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyDataValue { + Blob(Blob), + Document(Document), + Embedding(Embedding), + Graph(Graph), + Record(Record), + Message(Message), +} + +impl AnyDataValue { + /// Returns the type identifier for this value. + pub const fn type_id(&self) -> DataTypeId { + match self { + Self::Blob(_) => DataTypeId::Blob, + Self::Document(_) => DataTypeId::Document, + Self::Embedding(_) => DataTypeId::Embedding, + Self::Graph(_) => DataTypeId::Graph, + Self::Record(_) => DataTypeId::Record, + Self::Message(_) => DataTypeId::Message, + } + } + + /// Attempts to extract a Blob value. + pub fn into_blob(self) -> Option { + match self { + Self::Blob(v) => Some(v), + _ => None, + } + } + + /// Attempts to extract a Document value. + pub fn into_document(self) -> Option { + match self { + Self::Document(v) => Some(v), + _ => None, + } + } + + /// Attempts to extract an Embedding value. + pub fn into_embedding(self) -> Option { + match self { + Self::Embedding(v) => Some(v), + _ => None, + } + } + + /// Attempts to extract a Graph value. + pub fn into_graph(self) -> Option { + match self { + Self::Graph(v) => Some(v), + _ => None, + } + } + + /// Attempts to extract a Record value. + pub fn into_record(self) -> Option { + match self { + Self::Record(v) => Some(v), + _ => None, + } + } + + /// Attempts to extract a Message value. + pub fn into_message(self) -> Option { + match self { + Self::Message(v) => Some(v), + _ => None, + } + } +} diff --git a/crates/nvisy-dal/src/datatype/record.rs b/crates/nvisy-dal/src/datatype/record.rs new file mode 100644 index 0000000..246fbd1 --- /dev/null +++ b/crates/nvisy-dal/src/datatype/record.rs @@ -0,0 +1,53 @@ +//! Record data type for relational data. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use super::DataType; + +/// A record representing a row in a relational table. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Record { + /// Column values keyed by column name. + pub columns: HashMap, +} + +impl Record { + /// Creates a new empty record. + pub fn new() -> Self { + Self { + columns: HashMap::new(), + } + } + + /// Creates a record from columns. + pub fn from_columns(columns: HashMap) -> Self { + Self { columns } + } + + /// Sets a column value. + pub fn set(mut self, column: impl Into, value: impl Into) -> Self { + self.columns.insert(column.into(), value.into()); + self + } + + /// Gets a column value. + pub fn get(&self, column: &str) -> Option<&serde_json::Value> { + self.columns.get(column) + } +} + +impl Default for Record { + fn default() -> Self { + Self::new() + } +} + +impl DataType for Record { + const TYPE_ID: &'static str = "record"; + + fn data_type_id() -> super::DataTypeId { + super::DataTypeId::Record + } +} diff --git a/crates/nvisy-dal/src/error.rs b/crates/nvisy-dal/src/error.rs new file mode 100644 index 0000000..211df57 --- /dev/null +++ b/crates/nvisy-dal/src/error.rs @@ -0,0 +1,83 @@ +//! Error types for data operations. + +use std::fmt; + +/// Result type for data operations. +pub type Result = std::result::Result; + +/// Error type for data operations. +#[derive(Debug)] +pub struct Error { + kind: ErrorKind, + message: String, + source: Option>, +} + +/// The kind of data error. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ErrorKind { + /// Connection error. + Connection, + /// Resource not found. + NotFound, + /// Invalid input. + InvalidInput, + /// Provider error. + Provider, +} + +impl Error { + /// Creates a new error. + pub fn new(kind: ErrorKind, message: impl Into) -> Self { + Self { + kind, + message: message.into(), + source: None, + } + } + + /// Adds a source error. + pub fn with_source(mut self, source: impl std::error::Error + Send + Sync + 'static) -> Self { + self.source = Some(Box::new(source)); + self + } + + /// Returns the error kind. + pub fn kind(&self) -> ErrorKind { + self.kind + } + + /// Creates a connection error. + pub fn connection(message: impl Into) -> Self { + Self::new(ErrorKind::Connection, message) + } + + /// Creates a not found error. + pub fn not_found(message: impl Into) -> Self { + Self::new(ErrorKind::NotFound, message) + } + + /// Creates an invalid input error. + pub fn invalid_input(message: impl Into) -> Self { + Self::new(ErrorKind::InvalidInput, message) + } + + /// Creates a provider error. + pub fn provider(message: impl Into) -> Self { + Self::new(ErrorKind::Provider, message) + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}: {}", self.kind, self.message) + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.source + .as_ref() + .map(|e| e.as_ref() as &(dyn std::error::Error + 'static)) + } +} diff --git a/crates/nvisy-dal/src/lib.rs b/crates/nvisy-dal/src/lib.rs new file mode 100644 index 0000000..4ef1900 --- /dev/null +++ b/crates/nvisy-dal/src/lib.rs @@ -0,0 +1,24 @@ +//! Data Abstraction Layer for workflow inputs and outputs. +//! +//! This crate provides a unified interface for reading and writing data +//! across various storage backends. + +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] + +pub mod core; +pub mod datatype; +pub mod provider; + +mod error; + +pub use core::{Context, DataInput, DataOutput, InputStream, ItemSink, ItemStream, OutputStream}; + +pub use datatype::{AnyDataValue, DataTypeId}; +pub use error::{Error, ErrorKind, Result}; +pub use provider::ProviderConfig; + +/// Alias for backwards compatibility with nvisy-opendal. +pub type StorageError = Error; +/// Alias for backwards compatibility. +pub type StorageConfig = ProviderConfig; diff --git a/crates/nvisy-opendal/src/azblob/config.rs b/crates/nvisy-dal/src/provider/azblob/config.rs similarity index 69% rename from crates/nvisy-opendal/src/azblob/config.rs rename to crates/nvisy-dal/src/provider/azblob/config.rs index f2ef5d4..ab4f1e9 100644 --- a/crates/nvisy-opendal/src/azblob/config.rs +++ b/crates/nvisy-dal/src/provider/azblob/config.rs @@ -4,26 +4,30 @@ use serde::{Deserialize, Serialize}; /// Azure Blob Storage configuration. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct AzureBlobConfig { - /// Container name. - pub container: String, +pub struct AzblobConfig { /// Storage account name. pub account_name: String, - /// Storage account key. + /// Container name. + pub container: String, + /// Account key for authentication. #[serde(skip_serializing_if = "Option::is_none")] pub account_key: Option, + /// SAS token for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub sas_token: Option, /// Path prefix within the container. #[serde(skip_serializing_if = "Option::is_none")] pub prefix: Option, } -impl AzureBlobConfig { +impl AzblobConfig { /// Creates a new Azure Blob configuration. - pub fn new(container: impl Into, account_name: impl Into) -> Self { + pub fn new(account_name: impl Into, container: impl Into) -> Self { Self { - container: container.into(), account_name: account_name.into(), + container: container.into(), account_key: None, + sas_token: None, prefix: None, } } @@ -34,6 +38,12 @@ impl AzureBlobConfig { self } + /// Sets the SAS token. + pub fn with_sas_token(mut self, sas_token: impl Into) -> Self { + self.sas_token = Some(sas_token.into()); + self + } + /// Sets the path prefix. pub fn with_prefix(mut self, prefix: impl Into) -> Self { self.prefix = Some(prefix.into()); diff --git a/crates/nvisy-dal/src/provider/azblob/mod.rs b/crates/nvisy-dal/src/provider/azblob/mod.rs new file mode 100644 index 0000000..78d0c33 --- /dev/null +++ b/crates/nvisy-dal/src/provider/azblob/mod.rs @@ -0,0 +1,110 @@ +//! Azure Blob Storage provider. + +mod config; + +use async_trait::async_trait; +pub use config::AzblobConfig; +use futures::StreamExt; +use opendal::{Operator, services}; + +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +/// Azure Blob Storage provider for blob storage. +#[derive(Clone)] +pub struct AzblobProvider { + operator: Operator, +} + +impl AzblobProvider { + /// Creates a new Azure Blob provider. + pub fn new(config: &AzblobConfig) -> Result { + let mut builder = services::Azblob::default() + .account_name(&config.account_name) + .container(&config.container); + + if let Some(ref account_key) = config.account_key { + builder = builder.account_key(account_key); + } + + if let Some(ref sas_token) = config.sas_token { + builder = builder.sas_token(sas_token); + } + + if let Some(ref prefix) = config.prefix { + builder = builder.root(prefix); + } + + let operator = Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| Error::connection(e.to_string()))?; + + Ok(Self { operator }) + } +} + +#[async_trait] +impl DataInput for AzblobProvider { + async fn read(&self, ctx: &Context) -> Result> { + let prefix = ctx.target.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let path = entry.path().to_string(); + if path.ends_with('/') { + return None; + } + + match op.read(&path).await { + Ok(data) => { + let mut blob = Blob::new(path.clone(), data.to_bytes()); + if let Ok(meta) = op.stat(&path).await { + if let Some(ct) = meta.content_type() { + blob = blob.with_content_type(ct); + } + } + Some(Ok(blob)) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} + +#[async_trait] +impl DataOutput for AzblobProvider { + async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { + for blob in items { + self.operator + .write(&blob.path, blob.data) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} + +impl std::fmt::Debug for AzblobProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AzblobProvider").finish() + } +} diff --git a/crates/nvisy-dal/src/provider/config.rs b/crates/nvisy-dal/src/provider/config.rs new file mode 100644 index 0000000..e74769f --- /dev/null +++ b/crates/nvisy-dal/src/provider/config.rs @@ -0,0 +1,32 @@ +//! Provider configuration types. + +use serde::{Deserialize, Serialize}; + +use super::{AzblobConfig, GcsConfig, MysqlConfig, PostgresConfig, S3Config}; +use crate::datatype::DataTypeId; + +/// Unified provider configuration for different backends. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum ProviderConfig { + /// Amazon S3 storage. + S3(S3Config), + /// Google Cloud Storage. + Gcs(GcsConfig), + /// Azure Blob Storage. + Azblob(AzblobConfig), + /// PostgreSQL database. + Postgres(PostgresConfig), + /// MySQL database. + Mysql(MysqlConfig), +} + +impl ProviderConfig { + /// Returns the output data type for this provider. + pub const fn output_type(&self) -> DataTypeId { + match self { + Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, + Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, + } + } +} diff --git a/crates/nvisy-opendal/src/gcs/config.rs b/crates/nvisy-dal/src/provider/gcs/config.rs similarity index 95% rename from crates/nvisy-opendal/src/gcs/config.rs rename to crates/nvisy-dal/src/provider/gcs/config.rs index 6eacef3..1dc2a00 100644 --- a/crates/nvisy-opendal/src/gcs/config.rs +++ b/crates/nvisy-dal/src/provider/gcs/config.rs @@ -25,7 +25,7 @@ impl GcsConfig { } } - /// Sets the service account credentials. + /// Sets the credentials JSON. pub fn with_credentials(mut self, credentials: impl Into) -> Self { self.credentials = Some(credentials.into()); self diff --git a/crates/nvisy-dal/src/provider/gcs/mod.rs b/crates/nvisy-dal/src/provider/gcs/mod.rs new file mode 100644 index 0000000..a765aad --- /dev/null +++ b/crates/nvisy-dal/src/provider/gcs/mod.rs @@ -0,0 +1,104 @@ +//! Google Cloud Storage provider. + +mod config; + +use async_trait::async_trait; +pub use config::GcsConfig; +use futures::StreamExt; +use opendal::{Operator, services}; + +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +/// Google Cloud Storage provider for blob storage. +#[derive(Clone)] +pub struct GcsProvider { + operator: Operator, +} + +impl GcsProvider { + /// Creates a new GCS provider. + pub fn new(config: &GcsConfig) -> Result { + let mut builder = services::Gcs::default().bucket(&config.bucket); + + if let Some(ref credentials) = config.credentials { + builder = builder.credential(credentials); + } + + if let Some(ref prefix) = config.prefix { + builder = builder.root(prefix); + } + + let operator = Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| Error::connection(e.to_string()))?; + + Ok(Self { operator }) + } +} + +#[async_trait] +impl DataInput for GcsProvider { + async fn read(&self, ctx: &Context) -> Result> { + let prefix = ctx.target.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let path = entry.path().to_string(); + if path.ends_with('/') { + return None; + } + + match op.read(&path).await { + Ok(data) => { + let mut blob = Blob::new(path.clone(), data.to_bytes()); + if let Ok(meta) = op.stat(&path).await { + if let Some(ct) = meta.content_type() { + blob = blob.with_content_type(ct); + } + } + Some(Ok(blob)) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} + +#[async_trait] +impl DataOutput for GcsProvider { + async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { + for blob in items { + self.operator + .write(&blob.path, blob.data) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} + +impl std::fmt::Debug for GcsProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GcsProvider").finish() + } +} diff --git a/crates/nvisy-vector/src/milvus/config.rs b/crates/nvisy-dal/src/provider/milvus/config.rs similarity index 100% rename from crates/nvisy-vector/src/milvus/config.rs rename to crates/nvisy-dal/src/provider/milvus/config.rs diff --git a/crates/nvisy-vector/src/milvus/backend.rs b/crates/nvisy-dal/src/provider/milvus/mod.rs similarity index 66% rename from crates/nvisy-vector/src/milvus/backend.rs rename to crates/nvisy-dal/src/provider/milvus/mod.rs index 8222732..b623259 100644 --- a/crates/nvisy-vector/src/milvus/backend.rs +++ b/crates/nvisy-dal/src/provider/milvus/mod.rs @@ -1,45 +1,38 @@ -//! Milvus backend implementation. +//! Milvus vector store provider. + +mod config; use std::borrow::Cow; use std::collections::HashMap; use async_trait::async_trait; +pub use config::MilvusConfig; use milvus::client::Client; use milvus::collection::SearchOption; use milvus::data::FieldColumn; use milvus::index::{IndexParams, IndexType, MetricType}; use milvus::schema::{CollectionSchemaBuilder, FieldSchema}; use milvus::value::{Value, ValueVec}; -use nvisy_data::{ - DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, - VectorSearchResult, -}; -use super::MilvusConfig; -use crate::TRACING_TARGET; +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Embedding; +use crate::error::{Error, Result}; -/// Milvus backend implementation. -pub struct MilvusBackend { +/// Milvus provider for vector storage. +pub struct MilvusProvider { client: Client, #[allow(dead_code)] config: MilvusConfig, } -impl MilvusBackend { - /// Creates a new Milvus backend. - pub async fn new(config: &MilvusConfig) -> DataResult { +impl MilvusProvider { + /// Creates a new Milvus provider. + pub async fn new(config: &MilvusConfig) -> Result { let url = format!("http://{}:{}", config.host, config.port); let client = Client::new(url) .await - .map_err(|e| DataError::connection(e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET, - host = %config.host, - port = %config.port, - "Connected to Milvus" - ); + .map_err(|e| Error::connection(e.to_string()))?; Ok(Self { client, @@ -48,12 +41,12 @@ impl MilvusBackend { } /// Ensures a collection exists, creating it if necessary. - async fn ensure_collection(&self, name: &str, dimensions: usize) -> DataResult<()> { + async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { let exists = self .client .has_collection(name) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; if exists { return Ok(()); @@ -72,13 +65,13 @@ impl MilvusBackend { let schema = builder .build() - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; // Create the collection self.client .create_collection(schema, None) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; // Create index on vector field let index_params = IndexParams::new( @@ -92,89 +85,34 @@ impl MilvusBackend { .client .get_collection(name) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; collection .create_index("vector", index_params) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; // Load collection into memory collection .load(1) .await - .map_err(|e| DataError::backend(e.to_string()))?; - - tracing::info!( - target: TRACING_TARGET, - collection = %name, - dimensions = %dimensions, - "Created Milvus collection" - ); + .map_err(|e| Error::provider(e.to_string()))?; Ok(()) } -} - -#[async_trait] -impl VectorOutput for MilvusBackend { - async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()> { - if vectors.is_empty() { - return Ok(()); - } - - // Get the dimension from the first vector - let dim = vectors.first().map(|v| v.vector.len()).unwrap_or(0); - // Ensure collection exists - self.ensure_collection(&ctx.collection, dim).await?; - - let coll = self - .client - .get_collection(&ctx.collection) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - let ids: Vec = vectors.iter().map(|v| v.id.clone()).collect(); - let embeddings: Vec = vectors - .iter() - .flat_map(|v| v.vector.iter().copied()) - .collect(); - let metadata: Vec = vectors - .iter() - .map(|v| serde_json::to_string(&v.metadata).unwrap_or_default()) - .collect(); - - // Create field schemas for columns - let id_schema = FieldSchema::new_varchar("id", "string id", 256); - let vector_schema = FieldSchema::new_float_vector("vector", "embedding vector", dim as i64); - let metadata_schema = FieldSchema::new_varchar("metadata", "json metadata", 65535); - - let columns = vec![ - FieldColumn::new(&id_schema, ValueVec::String(ids)), - FieldColumn::new(&vector_schema, ValueVec::Float(embeddings)), - FieldColumn::new(&metadata_schema, ValueVec::String(metadata)), - ]; - - coll.insert(columns, None) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - Ok(()) - } - - async fn search( + /// Searches for similar vectors. + pub async fn search( &self, - ctx: &VectorContext, + collection: &str, query: Vec, limit: usize, - _options: VectorSearchOptions, - ) -> DataResult> { + ) -> Result> { let coll = self .client - .get_collection(&ctx.collection) + .get_collection(collection) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; let mut search_option = SearchOption::new(); search_option.add_param("nprobe", serde_json::json!(16)); @@ -191,7 +129,7 @@ impl VectorOutput for MilvusBackend { &search_option, ) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; let mut search_results = Vec::new(); @@ -232,7 +170,7 @@ impl VectorOutput for MilvusBackend { }) .unwrap_or(id); - search_results.push(VectorSearchResult { + search_results.push(SearchResult { id: string_id, score, vector: None, @@ -244,3 +182,84 @@ impl VectorOutput for MilvusBackend { Ok(search_results) } } + +/// Result from a vector similarity search. +#[derive(Debug, Clone)] +pub struct SearchResult { + /// The ID of the matched vector. + pub id: String, + /// Similarity score. + pub score: f32, + /// The vector data, if requested. + pub vector: Option>, + /// Metadata associated with this vector. + pub metadata: HashMap, +} + +#[async_trait] +impl DataOutput for MilvusProvider { + async fn write(&self, ctx: &Context, items: Vec) -> Result<()> { + if items.is_empty() { + return Ok(()); + } + + let collection = ctx + .target + .as_deref() + .ok_or_else(|| Error::invalid_input("Collection name required in context.target"))?; + + // Get the dimension from the first vector + let dim = items.first().map(|v| v.vector.len()).unwrap_or(0); + + // Ensure collection exists + self.ensure_collection(collection, dim).await?; + + let coll = self + .client + .get_collection(collection) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let ids: Vec = items.iter().map(|v| v.id.clone()).collect(); + let embeddings: Vec = items + .iter() + .flat_map(|v| v.vector.iter().copied()) + .collect(); + let metadata: Vec = items + .iter() + .map(|v| serde_json::to_string(&v.metadata).unwrap_or_default()) + .collect(); + + // Create field schemas for columns + let id_schema = FieldSchema::new_varchar("id", "string id", 256); + let vector_schema = FieldSchema::new_float_vector("vector", "embedding vector", dim as i64); + let metadata_schema = FieldSchema::new_varchar("metadata", "json metadata", 65535); + + let columns = vec![ + FieldColumn::new(&id_schema, ValueVec::String(ids)), + FieldColumn::new(&vector_schema, ValueVec::Float(embeddings)), + FieldColumn::new(&metadata_schema, ValueVec::String(metadata)), + ]; + + coll.insert(columns, None) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + Ok(()) + } +} + +#[async_trait] +impl DataInput for MilvusProvider { + async fn read(&self, _ctx: &Context) -> Result> { + // Vector stores are primarily write/search, not sequential read + let stream = futures::stream::empty(); + Ok(InputStream::new(Box::pin(stream))) + } +} + +impl std::fmt::Debug for MilvusProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MilvusProvider").finish() + } +} diff --git a/crates/nvisy-dal/src/provider/mod.rs b/crates/nvisy-dal/src/provider/mod.rs new file mode 100644 index 0000000..578d165 --- /dev/null +++ b/crates/nvisy-dal/src/provider/mod.rs @@ -0,0 +1,33 @@ +//! Data providers for various storage backends. + +// Storage providers (OpenDAL-based) +mod azblob; +mod gcs; +mod s3; + +// Database providers (OpenDAL-based) +mod mysql; +mod postgres; + +// Vector providers +mod milvus; +mod pgvector; +mod pinecone; +mod qdrant; + +mod config; + +// Re-export storage providers +pub use azblob::{AzblobConfig, AzblobProvider}; +// Re-export unified config +pub use config::ProviderConfig; +pub use gcs::{GcsConfig, GcsProvider}; +// Re-export vector providers +pub use milvus::{MilvusConfig, MilvusProvider}; +// Re-export database providers +pub use mysql::{MysqlConfig, MysqlProvider}; +pub use pgvector::{DistanceMetric, IndexType, PgVectorConfig, PgVectorProvider}; +pub use pinecone::{PineconeConfig, PineconeProvider}; +pub use postgres::{PostgresConfig, PostgresProvider}; +pub use qdrant::{QdrantConfig, QdrantProvider}; +pub use s3::{S3Config, S3Provider}; diff --git a/crates/nvisy-dal/src/provider/mysql/config.rs b/crates/nvisy-dal/src/provider/mysql/config.rs new file mode 100644 index 0000000..ecba6bc --- /dev/null +++ b/crates/nvisy-dal/src/provider/mysql/config.rs @@ -0,0 +1,39 @@ +//! MySQL configuration. + +use serde::{Deserialize, Serialize}; + +/// MySQL configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MysqlConfig { + /// Connection string (e.g., "mysql://user:pass@host:3306/db"). + pub connection_string: String, + /// Default table name. + #[serde(skip_serializing_if = "Option::is_none")] + pub table: Option, + /// Default database. + #[serde(skip_serializing_if = "Option::is_none")] + pub database: Option, +} + +impl MysqlConfig { + /// Creates a new MySQL configuration. + pub fn new(connection_string: impl Into) -> Self { + Self { + connection_string: connection_string.into(), + table: None, + database: None, + } + } + + /// Sets the default table. + pub fn with_table(mut self, table: impl Into) -> Self { + self.table = Some(table.into()); + self + } + + /// Sets the default database. + pub fn with_database(mut self, database: impl Into) -> Self { + self.database = Some(database.into()); + self + } +} diff --git a/crates/nvisy-dal/src/provider/mysql/mod.rs b/crates/nvisy-dal/src/provider/mysql/mod.rs new file mode 100644 index 0000000..9ecd4c8 --- /dev/null +++ b/crates/nvisy-dal/src/provider/mysql/mod.rs @@ -0,0 +1,126 @@ +//! MySQL provider via OpenDAL. + +mod config; + +use std::collections::HashMap; + +use async_trait::async_trait; +pub use config::MysqlConfig; +use futures::StreamExt; +use opendal::{Operator, services}; + +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Record; +use crate::error::{Error, Result}; + +/// MySQL provider for relational data. +#[derive(Clone)] +pub struct MysqlProvider { + operator: Operator, + #[allow(dead_code)] + config: MysqlConfig, +} + +impl MysqlProvider { + /// Creates a new MySQL provider. + pub fn new(config: &MysqlConfig) -> Result { + let mut builder = services::Mysql::default().connection_string(&config.connection_string); + + if let Some(ref table) = config.table { + builder = builder.table(table); + } + + if let Some(ref root) = config.database { + builder = builder.root(root); + } + + let operator = Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| Error::connection(e.to_string()))?; + + Ok(Self { + operator, + config: config.clone(), + }) + } +} + +#[async_trait] +impl DataInput for MysqlProvider { + async fn read(&self, ctx: &Context) -> Result> { + let prefix = ctx.target.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let key = entry.path().to_string(); + match op.read(&key).await { + Ok(data) => { + // Parse the value as JSON to get columns + let value: serde_json::Value = + serde_json::from_slice(&data.to_bytes()) + .unwrap_or(serde_json::json!({})); + + let columns: HashMap = + if let serde_json::Value::Object(map) = value { + map.into_iter().collect() + } else { + let mut cols = HashMap::new(); + cols.insert("_key".to_string(), serde_json::json!(key)); + cols.insert("_value".to_string(), value); + cols + }; + + Some(Ok(Record::from_columns(columns))) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} + +#[async_trait] +impl DataOutput for MysqlProvider { + async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { + for record in items { + // Use _key column as the key, or generate one + let key = record + .get("_key") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); + + let value = + serde_json::to_vec(&record.columns).map_err(|e| Error::provider(e.to_string()))?; + + self.operator + .write(&key, value) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} + +impl std::fmt::Debug for MysqlProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MysqlProvider").finish() + } +} diff --git a/crates/nvisy-vector/src/pgvector/config.rs b/crates/nvisy-dal/src/provider/pgvector/config.rs similarity index 81% rename from crates/nvisy-vector/src/pgvector/config.rs rename to crates/nvisy-dal/src/provider/pgvector/config.rs index 05ecb09..48edfb6 100644 --- a/crates/nvisy-vector/src/pgvector/config.rs +++ b/crates/nvisy-dal/src/provider/pgvector/config.rs @@ -14,10 +14,10 @@ pub struct PgVectorConfig { pub dimensions: usize, /// Distance metric. #[serde(default)] - pub distance_metric: PgVectorDistanceMetric, + pub distance_metric: DistanceMetric, /// Index type for similarity search. #[serde(default)] - pub index_type: PgVectorIndexType, + pub index_type: IndexType, } impl PgVectorConfig { @@ -27,8 +27,8 @@ impl PgVectorConfig { connection_url: connection_url.into(), table: default_pgvector_table(), dimensions, - distance_metric: PgVectorDistanceMetric::default(), - index_type: PgVectorIndexType::default(), + distance_metric: DistanceMetric::default(), + index_type: IndexType::default(), } } @@ -39,13 +39,13 @@ impl PgVectorConfig { } /// Sets the distance metric. - pub fn with_distance_metric(mut self, metric: PgVectorDistanceMetric) -> Self { + pub fn with_distance_metric(mut self, metric: DistanceMetric) -> Self { self.distance_metric = metric; self } /// Sets the index type. - pub fn with_index_type(mut self, index_type: PgVectorIndexType) -> Self { + pub fn with_index_type(mut self, index_type: IndexType) -> Self { self.index_type = index_type; self } @@ -58,7 +58,7 @@ fn default_pgvector_table() -> String { /// Distance metric for pgvector. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] -pub enum PgVectorDistanceMetric { +pub enum DistanceMetric { /// L2 (Euclidean) distance. #[default] L2, @@ -68,7 +68,7 @@ pub enum PgVectorDistanceMetric { Cosine, } -impl PgVectorDistanceMetric { +impl DistanceMetric { /// Returns the pgvector operator for this metric. pub fn operator(&self) -> &'static str { match self { @@ -82,7 +82,7 @@ impl PgVectorDistanceMetric { /// Index type for pgvector. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] -pub enum PgVectorIndexType { +pub enum IndexType { /// IVFFlat index (faster build, good recall). #[default] IvfFlat, diff --git a/crates/nvisy-vector/src/pgvector/backend.rs b/crates/nvisy-dal/src/provider/pgvector/mod.rs similarity index 64% rename from crates/nvisy-vector/src/pgvector/backend.rs rename to crates/nvisy-dal/src/provider/pgvector/mod.rs index c9c1708..56438d5 100644 --- a/crates/nvisy-vector/src/pgvector/backend.rs +++ b/crates/nvisy-dal/src/provider/pgvector/mod.rs @@ -1,59 +1,52 @@ -//! PostgreSQL pgvector backend implementation. +//! PostgreSQL pgvector provider. + +mod config; use std::collections::HashMap; use async_trait::async_trait; +pub use config::{DistanceMetric, IndexType, PgVectorConfig}; use diesel::prelude::*; use diesel::sql_types::{Float, Integer, Text}; use diesel_async::pooled_connection::AsyncDieselConnectionManager; use diesel_async::pooled_connection::deadpool::Pool; use diesel_async::{AsyncPgConnection, RunQueryDsl}; -use nvisy_data::{ - DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, - VectorSearchResult, -}; -use super::{PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType}; -use crate::TRACING_TARGET; +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Embedding; +use crate::error::{Error, Result}; -/// pgvector backend implementation using Diesel. -pub struct PgVectorBackend { +/// pgvector provider for vector storage using PostgreSQL. +pub struct PgVectorProvider { pool: Pool, config: PgVectorConfig, } -impl PgVectorBackend { - /// Creates a new pgvector backend. - pub async fn new(config: &PgVectorConfig) -> DataResult { +impl PgVectorProvider { + /// Creates a new pgvector provider. + pub async fn new(config: &PgVectorConfig) -> Result { let manager = AsyncDieselConnectionManager::::new(&config.connection_url); let pool = Pool::builder(manager) .build() - .map_err(|e| DataError::connection(e.to_string()))?; + .map_err(|e| Error::connection(e.to_string()))?; // Test connection and ensure pgvector extension exists { let mut conn = pool .get() .await - .map_err(|e| DataError::connection(e.to_string()))?; + .map_err(|e| Error::connection(e.to_string()))?; diesel::sql_query("CREATE EXTENSION IF NOT EXISTS vector") .execute(&mut conn) .await .map_err(|e| { - DataError::backend(format!("Failed to create vector extension: {}", e)) + Error::provider(format!("Failed to create vector extension: {}", e)) })?; } - tracing::debug!( - target: TRACING_TARGET, - table = %config.table, - dimensions = %config.dimensions, - "Initialized pgvector backend" - ); - Ok(Self { pool, config: config.clone(), @@ -62,12 +55,11 @@ impl PgVectorBackend { async fn get_conn( &self, - ) -> DataResult>> - { + ) -> Result>> { self.pool .get() .await - .map_err(|e| DataError::connection(e.to_string())) + .map_err(|e| Error::connection(e.to_string())) } fn distance_operator(&self) -> &'static str { @@ -75,7 +67,7 @@ impl PgVectorBackend { } /// Ensures a collection (table) exists, creating it if necessary. - async fn ensure_collection(&self, name: &str, dimensions: usize) -> DataResult<()> { + async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { let mut conn = self.get_conn().await?; // Create the table @@ -94,14 +86,14 @@ impl PgVectorBackend { diesel::sql_query(&create_table) .execute(&mut conn) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; // Create the index let index_name = format!("{}_vector_idx", name); let operator = self.distance_operator(); let create_index = match self.config.index_type { - PgVectorIndexType::IvfFlat => { + IndexType::IvfFlat => { format!( r#" CREATE INDEX IF NOT EXISTS {} ON {} @@ -111,7 +103,7 @@ impl PgVectorBackend { index_name, name, operator ) } - PgVectorIndexType::Hnsw => { + IndexType::Hnsw => { format!( r#" CREATE INDEX IF NOT EXISTS {} ON {} @@ -126,78 +118,19 @@ impl PgVectorBackend { diesel::sql_query(&create_index) .execute(&mut conn) .await - .map_err(|e| DataError::backend(e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET, - collection = %name, - dimensions = %dimensions, - "Ensured pgvector table exists" - ); - - Ok(()) - } -} - -#[async_trait] -impl VectorOutput for PgVectorBackend { - async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()> { - if vectors.is_empty() { - return Ok(()); - } - - // Get dimensions from the first vector - let dimensions = <[_]>::first(&vectors) - .map(|v| v.vector.len()) - .ok_or_else(|| DataError::invalid("No vectors provided"))?; - - // Ensure collection exists - self.ensure_collection(&ctx.collection, dimensions).await?; - - let mut conn = self.get_conn().await?; - - for v in vectors { - let vector_str = format!( - "[{}]", - v.vector - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ); - let metadata_json = - serde_json::to_string(&v.metadata).unwrap_or_else(|_| "{}".to_string()); - - let upsert_query = format!( - r#" - INSERT INTO {} (id, vector, metadata) - VALUES ($1, $2::vector, $3::jsonb) - ON CONFLICT (id) DO UPDATE SET - vector = EXCLUDED.vector, - metadata = EXCLUDED.metadata - "#, - ctx.collection - ); - - diesel::sql_query(&upsert_query) - .bind::(&v.id) - .bind::(&vector_str) - .bind::(&metadata_json) - .execute(&mut conn) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - } + .map_err(|e| Error::provider(e.to_string()))?; Ok(()) } - async fn search( + /// Searches for similar vectors. + pub async fn search( &self, - ctx: &VectorContext, + collection: &str, query: Vec, limit: usize, - options: VectorSearchOptions, - ) -> DataResult> { + include_vectors: bool, + ) -> Result> { let mut conn = self.get_conn().await?; let operator = self.distance_operator(); @@ -210,17 +143,17 @@ impl VectorOutput for PgVectorBackend { .join(",") ); - let vector_column = if options.include_vectors { + let vector_column = if include_vectors { ", vector::text as vector_data" } else { "" }; - // For cosine and inner product, we need to convert distance to similarity + // For cosine and inner product, convert distance to similarity let score_expr = match self.config.distance_metric { - PgVectorDistanceMetric::L2 => format!("vector {} $1::vector", operator), - PgVectorDistanceMetric::InnerProduct => format!("-(vector {} $1::vector)", operator), - PgVectorDistanceMetric::Cosine => format!("1 - (vector {} $1::vector)", operator), + DistanceMetric::L2 => format!("vector {} $1::vector", operator), + DistanceMetric::InnerProduct => format!("-(vector {} $1::vector)", operator), + DistanceMetric::Cosine => format!("1 - (vector {} $1::vector)", operator), }; let search_query = format!( @@ -230,7 +163,7 @@ impl VectorOutput for PgVectorBackend { ORDER BY vector {} $1::vector LIMIT $2 "#, - score_expr, vector_column, ctx.collection, operator + score_expr, vector_column, collection, operator ); let results: Vec = diesel::sql_query(&search_query) @@ -238,7 +171,7 @@ impl VectorOutput for PgVectorBackend { .bind::(limit as i32) .load(&mut conn) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; let search_results = results .into_iter() @@ -246,13 +179,13 @@ impl VectorOutput for PgVectorBackend { let metadata: HashMap = serde_json::from_str(&row.metadata_json).unwrap_or_default(); - let vector = if options.include_vectors { + let vector = if include_vectors { row.vector_data.and_then(|v| parse_vector(&v).ok()) } else { None }; - VectorSearchResult { + SearchResult { id: row.id, score: row.score, vector, @@ -265,15 +198,101 @@ impl VectorOutput for PgVectorBackend { } } +/// Result from a vector similarity search. +#[derive(Debug, Clone)] +pub struct SearchResult { + /// The ID of the matched vector. + pub id: String, + /// Similarity score. + pub score: f32, + /// The vector data, if requested. + pub vector: Option>, + /// Metadata associated with this vector. + pub metadata: HashMap, +} + +#[async_trait] +impl DataOutput for PgVectorProvider { + async fn write(&self, ctx: &Context, items: Vec) -> Result<()> { + if items.is_empty() { + return Ok(()); + } + + let collection = ctx + .target + .as_deref() + .ok_or_else(|| Error::invalid_input("Collection name required in context.target"))?; + + // Get dimensions from the first vector + let dimensions = <[_]>::first(&items) + .map(|v| v.vector.len()) + .ok_or_else(|| Error::invalid_input("No embeddings provided"))?; + + // Ensure collection exists + self.ensure_collection(collection, dimensions).await?; + + let mut conn = self.get_conn().await?; + + for v in items { + let vector_str = format!( + "[{}]", + v.vector + .iter() + .map(|f| f.to_string()) + .collect::>() + .join(",") + ); + let metadata_json = + serde_json::to_string(&v.metadata).unwrap_or_else(|_| "{}".to_string()); + + let upsert_query = format!( + r#" + INSERT INTO {} (id, vector, metadata) + VALUES ($1, $2::vector, $3::jsonb) + ON CONFLICT (id) DO UPDATE SET + vector = EXCLUDED.vector, + metadata = EXCLUDED.metadata + "#, + collection + ); + + diesel::sql_query(&upsert_query) + .bind::(&v.id) + .bind::(&vector_str) + .bind::(&metadata_json) + .execute(&mut conn) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + + Ok(()) + } +} + +#[async_trait] +impl DataInput for PgVectorProvider { + async fn read(&self, _ctx: &Context) -> Result> { + // Vector stores are primarily write/search, not sequential read + let stream = futures::stream::empty(); + Ok(InputStream::new(Box::pin(stream))) + } +} + +impl std::fmt::Debug for PgVectorProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PgVectorProvider").finish() + } +} + /// Parse a vector string from PostgreSQL format. -fn parse_vector(s: &str) -> DataResult> { +fn parse_vector(s: &str) -> Result> { let trimmed = s.trim_start_matches('[').trim_end_matches(']'); trimmed .split(',') .map(|s| { s.trim() .parse::() - .map_err(|e| DataError::serialization(e.to_string())) + .map_err(|e| Error::provider(e.to_string())) }) .collect() } diff --git a/crates/nvisy-vector/src/pinecone/config.rs b/crates/nvisy-dal/src/provider/pinecone/config.rs similarity index 100% rename from crates/nvisy-vector/src/pinecone/config.rs rename to crates/nvisy-dal/src/provider/pinecone/config.rs diff --git a/crates/nvisy-vector/src/pinecone/backend.rs b/crates/nvisy-dal/src/provider/pinecone/mod.rs similarity index 67% rename from crates/nvisy-vector/src/pinecone/backend.rs rename to crates/nvisy-dal/src/provider/pinecone/mod.rs index c764a4d..b28831e 100644 --- a/crates/nvisy-vector/src/pinecone/backend.rs +++ b/crates/nvisy-dal/src/provider/pinecone/mod.rs @@ -1,29 +1,29 @@ -//! Pinecone backend implementation. +//! Pinecone vector store provider. + +mod config; use std::collections::{BTreeMap, HashMap}; use async_trait::async_trait; -use nvisy_data::{ - DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, - VectorSearchResult, -}; +pub use config::PineconeConfig; use pinecone_sdk::models::{Kind, Metadata, Namespace, Value as PineconeValue, Vector}; use pinecone_sdk::pinecone::PineconeClientConfig; use pinecone_sdk::pinecone::data::Index; use tokio::sync::Mutex; -use super::PineconeConfig; -use crate::TRACING_TARGET; +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Embedding; +use crate::error::{Error, Result}; -/// Pinecone backend implementation. -pub struct PineconeBackend { +/// Pinecone provider for vector storage. +pub struct PineconeProvider { index: Mutex, config: PineconeConfig, } -impl PineconeBackend { - /// Creates a new Pinecone backend. - pub async fn new(config: &PineconeConfig) -> DataResult { +impl PineconeProvider { + /// Creates a new Pinecone provider. + pub async fn new(config: &PineconeConfig) -> Result { let client_config = PineconeClientConfig { api_key: Some(config.api_key.clone()), ..Default::default() @@ -31,28 +31,21 @@ impl PineconeBackend { let client = client_config .client() - .map_err(|e| DataError::connection(e.to_string()))?; + .map_err(|e| Error::connection(e.to_string()))?; // Describe the index to get its host let index_description = client .describe_index(&config.index) .await - .map_err(|e| DataError::connection(format!("Failed to describe index: {}", e)))?; + .map_err(|e| Error::connection(format!("Failed to describe index: {}", e)))?; - // host is a String, not Option let host = &index_description.host; // Connect to the index let index = client .index(host) .await - .map_err(|e| DataError::connection(format!("Failed to connect to index: {}", e)))?; - - tracing::debug!( - target: TRACING_TARGET, - index = %config.index, - "Connected to Pinecone" - ); + .map_err(|e| Error::connection(format!("Failed to connect to index: {}", e)))?; Ok(Self { index: Mutex::new(index), @@ -72,7 +65,7 @@ impl PineconeBackend { } } - /// Convert Pinecone Metadata (prost_types::Struct) to HashMap + /// Convert Pinecone Metadata to HashMap fn metadata_to_hashmap(metadata: Metadata) -> HashMap { metadata .fields @@ -81,7 +74,7 @@ impl PineconeBackend { .collect() } - /// Convert HashMap to Pinecone Metadata (prost_types::Struct) + /// Convert HashMap to Pinecone Metadata fn hashmap_to_metadata(map: HashMap) -> Metadata { let fields: BTreeMap = map .into_iter() @@ -90,52 +83,22 @@ impl PineconeBackend { Metadata { fields } } -} - -#[async_trait] -impl VectorOutput for PineconeBackend { - async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()> { - let namespace = self.get_namespace(&ctx.collection); - - let pinecone_vectors: Vec = vectors - .into_iter() - .map(|v| { - let metadata = if v.metadata.is_empty() { - None - } else { - Some(Self::hashmap_to_metadata(v.metadata)) - }; - - Vector { - id: v.id, - values: v.vector, - sparse_values: None, - metadata, - } - }) - .collect(); - - let mut index = self.index.lock().await; - index - .upsert(&pinecone_vectors, &namespace) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - Ok(()) - } - async fn search( + /// Searches for similar vectors. + pub async fn search( &self, - ctx: &VectorContext, + collection: &str, query: Vec, limit: usize, - options: VectorSearchOptions, - ) -> DataResult> { - let namespace = self.get_namespace(&ctx.collection); + include_vectors: bool, + include_metadata: bool, + filter: Option<&serde_json::Value>, + ) -> Result> { + let namespace = self.get_namespace(collection); - let filter: Option = options.filter.and_then(|f| { + let filter_metadata: Option = filter.and_then(|f| { if let serde_json::Value::Object(obj) = f { - let map: HashMap = obj.into_iter().collect(); + let map: HashMap = obj.clone().into_iter().collect(); Some(Self::hashmap_to_metadata(map)) } else { None @@ -149,12 +112,12 @@ impl VectorOutput for PineconeBackend { None, // sparse values limit as u32, &namespace, - filter, - Some(options.include_vectors), - Some(options.include_metadata), + filter_metadata, + Some(include_vectors), + Some(include_metadata), ) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; let results = response .matches @@ -165,7 +128,7 @@ impl VectorOutput for PineconeBackend { .map(Self::metadata_to_hashmap) .unwrap_or_default(); - VectorSearchResult { + SearchResult { id: m.id, score: m.score, vector: Some(m.values), @@ -178,7 +141,69 @@ impl VectorOutput for PineconeBackend { } } -/// Convert Pinecone Value (prost_types::Value) to serde_json::Value +/// Result from a vector similarity search. +#[derive(Debug, Clone)] +pub struct SearchResult { + /// The ID of the matched vector. + pub id: String, + /// Similarity score. + pub score: f32, + /// The vector data, if requested. + pub vector: Option>, + /// Metadata associated with this vector. + pub metadata: HashMap, +} + +#[async_trait] +impl DataOutput for PineconeProvider { + async fn write(&self, ctx: &Context, items: Vec) -> Result<()> { + let collection = ctx.target.as_deref().unwrap_or(""); + let namespace = self.get_namespace(collection); + + let pinecone_vectors: Vec = items + .into_iter() + .map(|v| { + let metadata = if v.metadata.is_empty() { + None + } else { + Some(Self::hashmap_to_metadata(v.metadata)) + }; + + Vector { + id: v.id, + values: v.vector, + sparse_values: None, + metadata, + } + }) + .collect(); + + let mut index = self.index.lock().await; + index + .upsert(&pinecone_vectors, &namespace) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + Ok(()) + } +} + +#[async_trait] +impl DataInput for PineconeProvider { + async fn read(&self, _ctx: &Context) -> Result> { + // Vector stores are primarily write/search, not sequential read + let stream = futures::stream::empty(); + Ok(InputStream::new(Box::pin(stream))) + } +} + +impl std::fmt::Debug for PineconeProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PineconeProvider").finish() + } +} + +/// Convert Pinecone Value to serde_json::Value fn pinecone_value_to_json(value: PineconeValue) -> serde_json::Value { match value.kind { Some(Kind::NullValue(_)) => serde_json::Value::Null, @@ -207,7 +232,7 @@ fn pinecone_value_to_json(value: PineconeValue) -> serde_json::Value { } } -/// Convert serde_json::Value to Pinecone Value (prost_types::Value) +/// Convert serde_json::Value to Pinecone Value fn json_to_pinecone_value(value: serde_json::Value) -> PineconeValue { let kind = match value { serde_json::Value::Null => Some(Kind::NullValue(0)), diff --git a/crates/nvisy-dal/src/provider/postgres/config.rs b/crates/nvisy-dal/src/provider/postgres/config.rs new file mode 100644 index 0000000..2541c76 --- /dev/null +++ b/crates/nvisy-dal/src/provider/postgres/config.rs @@ -0,0 +1,39 @@ +//! PostgreSQL configuration. + +use serde::{Deserialize, Serialize}; + +/// PostgreSQL configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PostgresConfig { + /// Connection string (e.g., "postgresql://user:pass@host:5432/db"). + pub connection_string: String, + /// Default table name. + #[serde(skip_serializing_if = "Option::is_none")] + pub table: Option, + /// Default schema. + #[serde(skip_serializing_if = "Option::is_none")] + pub schema: Option, +} + +impl PostgresConfig { + /// Creates a new PostgreSQL configuration. + pub fn new(connection_string: impl Into) -> Self { + Self { + connection_string: connection_string.into(), + table: None, + schema: None, + } + } + + /// Sets the default table. + pub fn with_table(mut self, table: impl Into) -> Self { + self.table = Some(table.into()); + self + } + + /// Sets the default schema. + pub fn with_schema(mut self, schema: impl Into) -> Self { + self.schema = Some(schema.into()); + self + } +} diff --git a/crates/nvisy-dal/src/provider/postgres/mod.rs b/crates/nvisy-dal/src/provider/postgres/mod.rs new file mode 100644 index 0000000..bb28fb5 --- /dev/null +++ b/crates/nvisy-dal/src/provider/postgres/mod.rs @@ -0,0 +1,127 @@ +//! PostgreSQL provider via OpenDAL. + +mod config; + +use std::collections::HashMap; + +use async_trait::async_trait; +pub use config::PostgresConfig; +use futures::StreamExt; +use opendal::{Operator, services}; + +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Record; +use crate::error::{Error, Result}; + +/// PostgreSQL provider for relational data. +#[derive(Clone)] +pub struct PostgresProvider { + operator: Operator, + #[allow(dead_code)] + config: PostgresConfig, +} + +impl PostgresProvider { + /// Creates a new PostgreSQL provider. + pub fn new(config: &PostgresConfig) -> Result { + let mut builder = + services::Postgresql::default().connection_string(&config.connection_string); + + if let Some(ref table) = config.table { + builder = builder.table(table); + } + + if let Some(ref root) = config.schema { + builder = builder.root(root); + } + + let operator = Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| Error::connection(e.to_string()))?; + + Ok(Self { + operator, + config: config.clone(), + }) + } +} + +#[async_trait] +impl DataInput for PostgresProvider { + async fn read(&self, ctx: &Context) -> Result> { + let prefix = ctx.target.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let key = entry.path().to_string(); + match op.read(&key).await { + Ok(data) => { + // Parse the value as JSON to get columns + let value: serde_json::Value = + serde_json::from_slice(&data.to_bytes()) + .unwrap_or(serde_json::json!({})); + + let columns: HashMap = + if let serde_json::Value::Object(map) = value { + map.into_iter().collect() + } else { + let mut cols = HashMap::new(); + cols.insert("_key".to_string(), serde_json::json!(key)); + cols.insert("_value".to_string(), value); + cols + }; + + Some(Ok(Record::from_columns(columns))) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} + +#[async_trait] +impl DataOutput for PostgresProvider { + async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { + for record in items { + // Use _key column as the key, or generate one + let key = record + .get("_key") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); + + let value = + serde_json::to_vec(&record.columns).map_err(|e| Error::provider(e.to_string()))?; + + self.operator + .write(&key, value) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} + +impl std::fmt::Debug for PostgresProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PostgresProvider").finish() + } +} diff --git a/crates/nvisy-vector/src/qdrant/config.rs b/crates/nvisy-dal/src/provider/qdrant/config.rs similarity index 86% rename from crates/nvisy-vector/src/qdrant/config.rs rename to crates/nvisy-dal/src/provider/qdrant/config.rs index 05143ec..c0e33f2 100644 --- a/crates/nvisy-vector/src/qdrant/config.rs +++ b/crates/nvisy-dal/src/provider/qdrant/config.rs @@ -16,9 +16,6 @@ pub struct QdrantConfig { /// Vector dimensions. #[serde(skip_serializing_if = "Option::is_none")] pub dimensions: Option, - /// Use gRPC instead of REST. - #[serde(default)] - pub use_grpc: bool, } impl QdrantConfig { @@ -29,7 +26,6 @@ impl QdrantConfig { api_key: None, collection: None, dimensions: None, - use_grpc: false, } } @@ -50,10 +46,4 @@ impl QdrantConfig { self.dimensions = Some(dimensions); self } - - /// Enables gRPC mode. - pub fn with_grpc(mut self) -> Self { - self.use_grpc = true; - self - } } diff --git a/crates/nvisy-vector/src/qdrant/backend.rs b/crates/nvisy-dal/src/provider/qdrant/mod.rs similarity index 72% rename from crates/nvisy-vector/src/qdrant/backend.rs rename to crates/nvisy-dal/src/provider/qdrant/mod.rs index 2dcd8c3..d14e03e 100644 --- a/crates/nvisy-vector/src/qdrant/backend.rs +++ b/crates/nvisy-dal/src/provider/qdrant/mod.rs @@ -1,12 +1,11 @@ -//! Qdrant backend implementation. +//! Qdrant vector store provider. + +mod config; use std::collections::HashMap; use async_trait::async_trait; -use nvisy_data::{ - DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, - VectorSearchResult, -}; +pub use config::QdrantConfig; use qdrant_client::Qdrant; use qdrant_client::qdrant::vectors_config::Config as VectorsConfig; use qdrant_client::qdrant::with_payload_selector::SelectorOptions; @@ -16,29 +15,24 @@ use qdrant_client::qdrant::{ SearchPointsBuilder, UpsertPointsBuilder, VectorParamsBuilder, }; -use super::QdrantConfig; -use crate::TRACING_TARGET; +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Embedding; +use crate::error::{Error, Result}; -/// Qdrant backend implementation. -pub struct QdrantBackend { +/// Qdrant provider for vector storage. +pub struct QdrantProvider { client: Qdrant, #[allow(dead_code)] config: QdrantConfig, } -impl QdrantBackend { - /// Creates a new Qdrant backend. - pub async fn new(config: &QdrantConfig) -> DataResult { +impl QdrantProvider { + /// Creates a new Qdrant provider. + pub async fn new(config: &QdrantConfig) -> Result { let client = Qdrant::from_url(&config.url) .api_key(config.api_key.clone()) .build() - .map_err(|e| DataError::connection(e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET, - url = %config.url, - "Connected to Qdrant" - ); + .map_err(|e| Error::connection(e.to_string()))?; Ok(Self { client, @@ -47,12 +41,12 @@ impl QdrantBackend { } /// Ensures a collection exists, creating it if necessary. - async fn ensure_collection(&self, name: &str, dimensions: usize) -> DataResult<()> { + async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { let exists = self .client .collection_exists(name) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; if !exists { let vectors_config = VectorsConfig::Params( @@ -64,14 +58,7 @@ impl QdrantBackend { CreateCollectionBuilder::new(name).vectors_config(vectors_config), ) .await - .map_err(|e| DataError::backend(e.to_string()))?; - - tracing::info!( - target: TRACING_TARGET, - collection = %name, - dimensions = %dimensions, - "Created Qdrant collection" - ); + .map_err(|e| Error::provider(e.to_string()))?; } Ok(()) @@ -102,64 +89,29 @@ impl QdrantBackend { _ => None, } } -} - -#[async_trait] -impl VectorOutput for QdrantBackend { - async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()> { - if vectors.is_empty() { - return Ok(()); - } - // Get dimensions from the first vector - let dimensions = vectors - .first() - .map(|v| v.vector.len()) - .ok_or_else(|| DataError::invalid("No vectors provided"))?; - - // Ensure collection exists - self.ensure_collection(&ctx.collection, dimensions).await?; - - let points: Vec = vectors - .into_iter() - .map(|v| { - let payload: HashMap = v - .metadata - .into_iter() - .map(|(k, v)| (k, json_to_qdrant_value(v))) - .collect(); - - PointStruct::new(v.id, v.vector, payload) - }) - .collect(); - - self.client - .upsert_points(UpsertPointsBuilder::new(&ctx.collection, points)) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - Ok(()) - } - - async fn search( + /// Searches for similar vectors. + pub async fn search( &self, - ctx: &VectorContext, + collection: &str, query: Vec, limit: usize, - options: VectorSearchOptions, - ) -> DataResult> { - let mut search = SearchPointsBuilder::new(&ctx.collection, query, limit as u64); + include_vectors: bool, + include_metadata: bool, + filter: Option<&serde_json::Value>, + ) -> Result> { + let mut search = SearchPointsBuilder::new(collection, query, limit as u64); - if options.include_vectors { + if include_vectors { search = search.with_vectors(VectorsSelectorOptions::Enable(true)); } - if options.include_metadata { + if include_metadata { search = search.with_payload(SelectorOptions::Enable(true)); } - if let Some(filter_json) = options.filter - && let Some(conditions) = parse_filter(&filter_json) + if let Some(filter_json) = filter + && let Some(conditions) = parse_filter(filter_json) { search = search.filter(Filter::must(conditions)); } @@ -168,7 +120,7 @@ impl VectorOutput for QdrantBackend { .client .search_points(search) .await - .map_err(|e| DataError::backend(e.to_string()))?; + .map_err(|e| Error::provider(e.to_string()))?; let results = response .result @@ -183,7 +135,7 @@ impl VectorOutput for QdrantBackend { .map(|(k, v)| (k, qdrant_value_to_json(v))) .collect(); - VectorSearchResult { + SearchResult { id, score: point.score, vector, @@ -196,6 +148,77 @@ impl VectorOutput for QdrantBackend { } } +/// Result from a vector similarity search. +#[derive(Debug, Clone)] +pub struct SearchResult { + /// The ID of the matched vector. + pub id: String, + /// Similarity score. + pub score: f32, + /// The vector data, if requested. + pub vector: Option>, + /// Metadata associated with this vector. + pub metadata: HashMap, +} + +#[async_trait] +impl DataOutput for QdrantProvider { + async fn write(&self, ctx: &Context, items: Vec) -> Result<()> { + if items.is_empty() { + return Ok(()); + } + + let collection = ctx + .target + .as_deref() + .ok_or_else(|| Error::invalid_input("Collection name required in context.target"))?; + + // Get dimensions from the first vector + let dimensions = items + .first() + .map(|v| v.vector.len()) + .ok_or_else(|| Error::invalid_input("No embeddings provided"))?; + + // Ensure collection exists + self.ensure_collection(collection, dimensions).await?; + + let points: Vec = items + .into_iter() + .map(|v| { + let payload: HashMap = v + .metadata + .into_iter() + .map(|(k, v)| (k, json_to_qdrant_value(v))) + .collect(); + + PointStruct::new(v.id, v.vector, payload) + }) + .collect(); + + self.client + .upsert_points(UpsertPointsBuilder::new(collection, points)) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + Ok(()) + } +} + +#[async_trait] +impl DataInput for QdrantProvider { + async fn read(&self, _ctx: &Context) -> Result> { + // Vector stores are primarily write/search, not sequential read + let stream = futures::stream::empty(); + Ok(InputStream::new(Box::pin(stream))) + } +} + +impl std::fmt::Debug for QdrantProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("QdrantProvider").finish() + } +} + /// Converts JSON value to Qdrant value. fn json_to_qdrant_value(value: serde_json::Value) -> qdrant_client::qdrant::Value { use qdrant_client::qdrant::value::Kind; diff --git a/crates/nvisy-opendal/src/s3/config.rs b/crates/nvisy-dal/src/provider/s3/config.rs similarity index 100% rename from crates/nvisy-opendal/src/s3/config.rs rename to crates/nvisy-dal/src/provider/s3/config.rs diff --git a/crates/nvisy-dal/src/provider/s3/mod.rs b/crates/nvisy-dal/src/provider/s3/mod.rs new file mode 100644 index 0000000..411814f --- /dev/null +++ b/crates/nvisy-dal/src/provider/s3/mod.rs @@ -0,0 +1,114 @@ +//! Amazon S3 provider. + +mod config; + +use async_trait::async_trait; +pub use config::S3Config; +use futures::StreamExt; +use opendal::{Operator, services}; + +use crate::core::{Context, DataInput, DataOutput, InputStream}; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +/// Amazon S3 provider for blob storage. +#[derive(Clone)] +pub struct S3Provider { + operator: Operator, +} + +impl S3Provider { + /// Creates a new S3 provider. + pub fn new(config: &S3Config) -> Result { + let mut builder = services::S3::default() + .bucket(&config.bucket) + .region(&config.region); + + if let Some(ref endpoint) = config.endpoint { + builder = builder.endpoint(endpoint); + } + + if let Some(ref access_key_id) = config.access_key_id { + builder = builder.access_key_id(access_key_id); + } + + if let Some(ref secret_access_key) = config.secret_access_key { + builder = builder.secret_access_key(secret_access_key); + } + + if let Some(ref prefix) = config.prefix { + builder = builder.root(prefix); + } + + let operator = Operator::new(builder) + .map(|op| op.finish()) + .map_err(|e| Error::connection(e.to_string()))?; + + Ok(Self { operator }) + } +} + +#[async_trait] +impl DataInput for S3Provider { + async fn read(&self, ctx: &Context) -> Result> { + let prefix = ctx.target.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let path = entry.path().to_string(); + if path.ends_with('/') { + return None; + } + + match op.read(&path).await { + Ok(data) => { + let mut blob = Blob::new(path.clone(), data.to_bytes()); + if let Ok(meta) = op.stat(&path).await { + if let Some(ct) = meta.content_type() { + blob = blob.with_content_type(ct); + } + } + Some(Ok(blob)) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} + +#[async_trait] +impl DataOutput for S3Provider { + async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { + for blob in items { + self.operator + .write(&blob.path, blob.data) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} + +impl std::fmt::Debug for S3Provider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("S3Provider").finish() + } +} diff --git a/crates/nvisy-data/Cargo.toml b/crates/nvisy-data/Cargo.toml deleted file mode 100644 index 249baaf..0000000 --- a/crates/nvisy-data/Cargo.toml +++ /dev/null @@ -1,39 +0,0 @@ -# https://doc.rust-lang.org/cargo/reference/manifest.html - -[package] -name = "nvisy-data" -version = { workspace = true } -rust-version = { workspace = true } -edition = { workspace = true } -license = { workspace = true } -publish = { workspace = true } -readme = "./README.md" -description = "Foundational traits for data I/O and vector operations" - -authors = { workspace = true } -repository = { workspace = true } -homepage = { workspace = true } -documentation = { workspace = true } - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] - -[dependencies] -# Async runtime -tokio = { workspace = true, features = ["rt", "sync"] } -futures = { workspace = true, features = [] } - -# Bytes -bytes = { workspace = true, features = [] } - -# (De)serialization -serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true, features = [] } - -# Derive macros & utilities -thiserror = { workspace = true, features = [] } -async-trait = { workspace = true, features = [] } - -[dev-dependencies] -tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-data/src/error.rs b/crates/nvisy-data/src/error.rs deleted file mode 100644 index 235b5c5..0000000 --- a/crates/nvisy-data/src/error.rs +++ /dev/null @@ -1,99 +0,0 @@ -//! Error types for data operations. - -use std::fmt; - -/// Result type for data operations. -pub type DataResult = Result; - -/// Error type for data operations. -#[derive(Debug)] -pub struct DataError { - kind: DataErrorKind, - message: String, - source: Option>, -} - -/// The kind of data error. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum DataErrorKind { - /// Connection error (e.g., network failure). - Connection, - /// Not found error (e.g., file or collection doesn't exist). - NotFound, - /// Permission denied. - Permission, - /// Invalid input or configuration. - Invalid, - /// Serialization/deserialization error. - Serialization, - /// Backend-specific error. - Backend, - /// Unknown or unclassified error. - Unknown, -} - -impl DataError { - /// Creates a new error with the given kind and message. - pub fn new(kind: DataErrorKind, message: impl Into) -> Self { - Self { - kind, - message: message.into(), - source: None, - } - } - - /// Adds a source error. - pub fn with_source(mut self, source: impl std::error::Error + Send + Sync + 'static) -> Self { - self.source = Some(Box::new(source)); - self - } - - /// Returns the error kind. - pub fn kind(&self) -> DataErrorKind { - self.kind - } - - /// Creates a connection error. - pub fn connection(message: impl Into) -> Self { - Self::new(DataErrorKind::Connection, message) - } - - /// Creates a not found error. - pub fn not_found(message: impl Into) -> Self { - Self::new(DataErrorKind::NotFound, message) - } - - /// Creates a permission error. - pub fn permission(message: impl Into) -> Self { - Self::new(DataErrorKind::Permission, message) - } - - /// Creates an invalid input error. - pub fn invalid(message: impl Into) -> Self { - Self::new(DataErrorKind::Invalid, message) - } - - /// Creates a serialization error. - pub fn serialization(message: impl Into) -> Self { - Self::new(DataErrorKind::Serialization, message) - } - - /// Creates a backend error. - pub fn backend(message: impl Into) -> Self { - Self::new(DataErrorKind::Backend, message) - } -} - -impl fmt::Display for DataError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.message) - } -} - -impl std::error::Error for DataError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - self.source - .as_ref() - .map(|e| e.as_ref() as &(dyn std::error::Error + 'static)) - } -} diff --git a/crates/nvisy-data/src/input.rs b/crates/nvisy-data/src/input.rs deleted file mode 100644 index 15444d3..0000000 --- a/crates/nvisy-data/src/input.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! Data input trait for reading from storage backends. - -use async_trait::async_trait; -use bytes::Bytes; -use futures::Stream; - -use crate::error::DataResult; - -/// Context for data input operations. -#[derive(Debug, Clone, Default)] -pub struct InputContext { - /// The bucket or container name (for object storage). - pub bucket: Option, - /// Additional options as key-value pairs. - pub options: std::collections::HashMap, -} - -impl InputContext { - /// Creates a new empty context. - pub fn new() -> Self { - Self::default() - } - - /// Sets the bucket/container. - pub fn with_bucket(mut self, bucket: impl Into) -> Self { - self.bucket = Some(bucket.into()); - self - } - - /// Adds an option. - pub fn with_option(mut self, key: impl Into, value: impl Into) -> Self { - self.options.insert(key.into(), value.into()); - self - } -} - -/// Trait for reading data from storage backends. -#[async_trait] -pub trait DataInput: Send + Sync { - /// Reads the entire contents at the given path. - async fn read(&self, ctx: &InputContext, path: &str) -> DataResult; - - /// Reads the contents as a stream of chunks. - async fn read_stream( - &self, - ctx: &InputContext, - path: &str, - ) -> DataResult> + Send + Unpin>>; - - /// Checks if a path exists. - async fn exists(&self, ctx: &InputContext, path: &str) -> DataResult; - - /// Lists paths under the given prefix. - async fn list(&self, ctx: &InputContext, prefix: &str) -> DataResult>; -} diff --git a/crates/nvisy-data/src/lib.rs b/crates/nvisy-data/src/lib.rs deleted file mode 100644 index 376591a..0000000 --- a/crates/nvisy-data/src/lib.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Foundational traits for data I/O and vector operations. -//! -//! This crate provides the core abstractions for: -//! - Data input/output operations (storage backends) -//! - Vector store operations (embeddings storage) -//! - Common types used across integrations - -#![forbid(unsafe_code)] - -mod error; -mod input; -mod output; -mod types; -mod vector; - -pub use error::{DataError, DataErrorKind, DataResult}; -pub use input::{DataInput, InputContext}; -pub use output::{DataOutput, OutputContext}; -pub use types::{Metadata, VectorData, VectorSearchResult}; -pub use vector::{VectorContext, VectorOutput, VectorSearchOptions}; diff --git a/crates/nvisy-data/src/output.rs b/crates/nvisy-data/src/output.rs deleted file mode 100644 index 672f659..0000000 --- a/crates/nvisy-data/src/output.rs +++ /dev/null @@ -1,61 +0,0 @@ -//! Data output trait for writing to storage backends. - -use async_trait::async_trait; -use bytes::Bytes; -use futures::Stream; - -use crate::error::DataResult; - -/// Context for data output operations. -#[derive(Debug, Clone, Default)] -pub struct OutputContext { - /// The bucket or container name (for object storage). - pub bucket: Option, - /// Content type for the data being written. - pub content_type: Option, - /// Additional options as key-value pairs. - pub options: std::collections::HashMap, -} - -impl OutputContext { - /// Creates a new empty context. - pub fn new() -> Self { - Self::default() - } - - /// Sets the bucket/container. - pub fn with_bucket(mut self, bucket: impl Into) -> Self { - self.bucket = Some(bucket.into()); - self - } - - /// Sets the content type. - pub fn with_content_type(mut self, content_type: impl Into) -> Self { - self.content_type = Some(content_type.into()); - self - } - - /// Adds an option. - pub fn with_option(mut self, key: impl Into, value: impl Into) -> Self { - self.options.insert(key.into(), value.into()); - self - } -} - -/// Trait for writing data to storage backends. -#[async_trait] -pub trait DataOutput: Send + Sync { - /// Writes data to the given path. - async fn write(&self, ctx: &OutputContext, path: &str, data: Bytes) -> DataResult<()>; - - /// Writes data from a stream to the given path. - async fn write_stream( - &self, - ctx: &OutputContext, - path: &str, - stream: Box> + Send + Unpin>, - ) -> DataResult<()>; - - /// Deletes the data at the given path. - async fn delete(&self, ctx: &OutputContext, path: &str) -> DataResult<()>; -} diff --git a/crates/nvisy-data/src/types.rs b/crates/nvisy-data/src/types.rs deleted file mode 100644 index 0410f2a..0000000 --- a/crates/nvisy-data/src/types.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! Common types used across integrations. - -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; - -/// Metadata associated with data or vectors. -pub type Metadata = HashMap; - -/// A vector with its ID and metadata. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct VectorData { - /// Unique identifier for this vector. - pub id: String, - /// The embedding vector. - pub vector: Vec, - /// Optional metadata associated with this vector. - #[serde(default)] - pub metadata: Metadata, -} - -impl VectorData { - /// Creates a new vector data with the given ID and vector. - pub fn new(id: impl Into, vector: Vec) -> Self { - Self { - id: id.into(), - vector, - metadata: Metadata::new(), - } - } - - /// Adds metadata to this vector. - pub fn with_metadata(mut self, metadata: Metadata) -> Self { - self.metadata = metadata; - self - } -} - -/// Result from a vector similarity search. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct VectorSearchResult { - /// The ID of the matched vector. - pub id: String, - /// Similarity score (interpretation depends on distance metric). - pub score: f32, - /// The vector data, if requested. - #[serde(skip_serializing_if = "Option::is_none")] - pub vector: Option>, - /// Metadata associated with this vector. - #[serde(default)] - pub metadata: Metadata, -} diff --git a/crates/nvisy-data/src/vector.rs b/crates/nvisy-data/src/vector.rs deleted file mode 100644 index b268338..0000000 --- a/crates/nvisy-data/src/vector.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! Vector output trait for inserting into vector stores. - -use async_trait::async_trait; - -use crate::error::DataResult; -use crate::types::{VectorData, VectorSearchResult}; - -/// Context for vector operations. -#[derive(Debug, Clone, Default)] -pub struct VectorContext { - /// The collection/index/namespace to operate on. - pub collection: String, - /// Additional options as key-value pairs. - pub options: std::collections::HashMap, -} - -impl VectorContext { - /// Creates a new context with the given collection name. - pub fn new(collection: impl Into) -> Self { - Self { - collection: collection.into(), - options: std::collections::HashMap::new(), - } - } - - /// Adds an option. - pub fn with_option(mut self, key: impl Into, value: impl Into) -> Self { - self.options.insert(key.into(), value.into()); - self - } -} - -/// Options for vector search operations. -#[derive(Debug, Clone, Default)] -pub struct VectorSearchOptions { - /// Whether to include the vector data in results. - pub include_vectors: bool, - /// Whether to include metadata in results. - pub include_metadata: bool, - /// Optional filter (backend-specific format). - pub filter: Option, -} - -impl VectorSearchOptions { - /// Creates new search options. - pub fn new() -> Self { - Self::default() - } - - /// Include vectors in the results. - pub fn with_vectors(mut self) -> Self { - self.include_vectors = true; - self - } - - /// Include metadata in the results. - pub fn with_metadata(mut self) -> Self { - self.include_metadata = true; - self - } - - /// Set a filter for the search. - pub fn with_filter(mut self, filter: serde_json::Value) -> Self { - self.filter = Some(filter); - self - } -} - -/// Trait for inserting vectors into vector stores. -#[async_trait] -pub trait VectorOutput: Send + Sync { - /// Inserts vectors into the specified collection. - /// - /// If vectors with the same IDs already exist, they may be overwritten - /// (behavior depends on the backend). - async fn insert(&self, ctx: &VectorContext, vectors: Vec) -> DataResult<()>; - - /// Searches for similar vectors. - async fn search( - &self, - ctx: &VectorContext, - query: Vec, - limit: usize, - options: VectorSearchOptions, - ) -> DataResult>; -} diff --git a/crates/nvisy-opendal/Cargo.toml b/crates/nvisy-opendal/Cargo.toml deleted file mode 100644 index 47406aa..0000000 --- a/crates/nvisy-opendal/Cargo.toml +++ /dev/null @@ -1,54 +0,0 @@ -# https://doc.rust-lang.org/cargo/reference/manifest.html - -[package] -name = "nvisy-opendal" -version = { workspace = true } -rust-version = { workspace = true } -edition = { workspace = true } -license = { workspace = true } -publish = { workspace = true } -readme = "./README.md" - -authors = { workspace = true } -repository = { workspace = true } -homepage = { workspace = true } -documentation = { workspace = true } - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] - -[dependencies] -# Internal crates -nvisy-data = { workspace = true } - -# Async runtime -tokio = { workspace = true, features = ["rt", "sync", "io-util"] } -futures = { workspace = true, features = [] } - -# Storage -opendal = { workspace = true, features = [ - "services-s3", - "services-gcs", - "services-azblob", - "services-gdrive", - "services-dropbox", - "services-onedrive", -] } - -# Observability -tracing = { workspace = true, features = [] } - -# (De)serialization -serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true, features = [] } - -# Derive macros & utilities -async-trait = { workspace = true, features = [] } -bytes = { workspace = true, features = [] } - -# Data types -jiff = { workspace = true, features = ["serde"] } - -[dev-dependencies] -tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-opendal/README.md b/crates/nvisy-opendal/README.md deleted file mode 100644 index bf01e5b..0000000 --- a/crates/nvisy-opendal/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# nvisy-opendal - -Storage abstraction layer for the Nvisy platform using OpenDAL for unified access -to multiple cloud storage backends. - -## Features - -- **Unified API** - Single interface for multiple storage backends -- **Cloud-Native** - Support for major cloud storage providers -- **Async Operations** - Non-blocking I/O with Tokio runtime -- **Feature Flags** - Enable only the backends you need - -## Supported Backends - -| Backend | Feature Flag | Description | -|---------|--------------|-------------| -| Amazon S3 | `s3` | S3-compatible object storage | -| Google Cloud Storage | `gcs` | GCS bucket storage | -| Azure Blob Storage | `azblob` | Azure container storage | -| Google Drive | `gdrive` | Google Drive file storage | -| Dropbox | `dropbox` | Dropbox cloud storage | -| OneDrive | `onedrive` | Microsoft OneDrive storage | - -## Usage - -Enable the backends you need in `Cargo.toml`: - -```toml -[dependencies] -nvisy-opendal = { path = "../nvisy-opendal", features = ["s3", "gcs"] } -``` - -Or enable all backends: - -```toml -[dependencies] -nvisy-opendal = { path = "../nvisy-opendal", features = ["all-backends"] } -``` - -## Key Dependencies - -- `opendal` - Unified data access layer for multiple storage services -- `tokio` - Async runtime for non-blocking I/O operations -- `jiff` - Modern date/time handling for file metadata diff --git a/crates/nvisy-opendal/src/azblob/mod.rs b/crates/nvisy-opendal/src/azblob/mod.rs deleted file mode 100644 index e13cd1e..0000000 --- a/crates/nvisy-opendal/src/azblob/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Azure Blob Storage backend. - -mod config; - -pub use config::AzureBlobConfig; diff --git a/crates/nvisy-opendal/src/backend.rs b/crates/nvisy-opendal/src/backend.rs deleted file mode 100644 index 80d54c6..0000000 --- a/crates/nvisy-opendal/src/backend.rs +++ /dev/null @@ -1,405 +0,0 @@ -//! Storage backend implementation. - -use async_trait::async_trait; -use bytes::Bytes; -use futures::Stream; -use nvisy_data::{DataError, DataInput, DataOutput, DataResult, InputContext, OutputContext}; -use opendal::{Operator, services}; - -use crate::TRACING_TARGET; -use crate::azblob::AzureBlobConfig; -use crate::config::StorageConfig; -use crate::dropbox::DropboxConfig; -use crate::gcs::GcsConfig; -use crate::gdrive::GoogleDriveConfig; -use crate::onedrive::OneDriveConfig; -use crate::s3::S3Config; - -/// Unified storage backend that wraps OpenDAL operators. -#[derive(Clone)] -pub struct StorageBackend { - operator: Operator, - config: StorageConfig, -} - -impl StorageBackend { - /// Creates a new storage backend from configuration. - pub async fn new(config: StorageConfig) -> DataResult { - let operator = Self::create_operator(&config)?; - - tracing::info!( - target: TRACING_TARGET, - backend = %config.backend_name(), - "Storage backend initialized" - ); - - Ok(Self { operator, config }) - } - - /// Returns the configuration for this backend. - pub fn config(&self) -> &StorageConfig { - &self.config - } - - /// Returns the backend name. - pub fn backend_name(&self) -> &'static str { - self.config.backend_name() - } - - /// Gets metadata for a file. - pub async fn stat(&self, path: &str) -> DataResult { - let meta = self - .operator - .stat(path) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - let last_modified = meta - .last_modified() - .and_then(|dt| jiff::Timestamp::from_second(dt.timestamp()).ok()); - - Ok(FileMetadata { - size: meta.content_length(), - last_modified, - content_type: meta.content_type().map(|s| s.to_string()), - }) - } - - /// Copies a file from one path to another. - pub async fn copy(&self, from: &str, to: &str) -> DataResult<()> { - tracing::debug!( - target: TRACING_TARGET, - from = %from, - to = %to, - "Copying file" - ); - - self.operator - .copy(from, to) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - Ok(()) - } - - /// Moves a file from one path to another. - pub async fn rename(&self, from: &str, to: &str) -> DataResult<()> { - tracing::debug!( - target: TRACING_TARGET, - from = %from, - to = %to, - "Moving file" - ); - - self.operator - .rename(from, to) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - Ok(()) - } - - /// Creates an OpenDAL operator based on configuration. - fn create_operator(config: &StorageConfig) -> DataResult { - match config { - StorageConfig::S3(cfg) => Self::create_s3_operator(cfg), - StorageConfig::Gcs(cfg) => Self::create_gcs_operator(cfg), - StorageConfig::AzureBlob(cfg) => Self::create_azblob_operator(cfg), - StorageConfig::GoogleDrive(cfg) => Self::create_gdrive_operator(cfg), - StorageConfig::Dropbox(cfg) => Self::create_dropbox_operator(cfg), - StorageConfig::OneDrive(cfg) => Self::create_onedrive_operator(cfg), - } - } - - fn create_s3_operator(cfg: &S3Config) -> DataResult { - let mut builder = services::S3::default() - .bucket(&cfg.bucket) - .region(&cfg.region); - - if let Some(ref endpoint) = cfg.endpoint { - builder = builder.endpoint(endpoint); - } - - if let Some(ref access_key_id) = cfg.access_key_id { - builder = builder.access_key_id(access_key_id); - } - - if let Some(ref secret_access_key) = cfg.secret_access_key { - builder = builder.secret_access_key(secret_access_key); - } - - if let Some(ref prefix) = cfg.prefix { - builder = builder.root(prefix); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| DataError::backend(e.to_string())) - } - - fn create_gcs_operator(cfg: &GcsConfig) -> DataResult { - let mut builder = services::Gcs::default().bucket(&cfg.bucket); - - if let Some(ref credentials) = cfg.credentials { - builder = builder.credential(credentials); - } - - if let Some(ref prefix) = cfg.prefix { - builder = builder.root(prefix); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| DataError::backend(e.to_string())) - } - - fn create_azblob_operator(cfg: &AzureBlobConfig) -> DataResult { - let mut builder = services::Azblob::default() - .container(&cfg.container) - .account_name(&cfg.account_name); - - if let Some(ref account_key) = cfg.account_key { - builder = builder.account_key(account_key); - } - - if let Some(ref prefix) = cfg.prefix { - builder = builder.root(prefix); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| DataError::backend(e.to_string())) - } - - fn create_gdrive_operator(cfg: &GoogleDriveConfig) -> DataResult { - let mut builder = services::Gdrive::default().root(&cfg.root); - - if let Some(ref access_token) = cfg.access_token { - builder = builder.access_token(access_token); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| DataError::backend(e.to_string())) - } - - fn create_dropbox_operator(cfg: &DropboxConfig) -> DataResult { - let mut builder = services::Dropbox::default().root(&cfg.root); - - if let Some(ref access_token) = cfg.access_token { - builder = builder.access_token(access_token); - } - - if let Some(ref refresh_token) = cfg.refresh_token { - builder = builder.refresh_token(refresh_token); - } - - if let Some(ref client_id) = cfg.client_id { - builder = builder.client_id(client_id); - } - - if let Some(ref client_secret) = cfg.client_secret { - builder = builder.client_secret(client_secret); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| DataError::backend(e.to_string())) - } - - fn create_onedrive_operator(cfg: &OneDriveConfig) -> DataResult { - let mut builder = services::Onedrive::default().root(&cfg.root); - - if let Some(ref access_token) = cfg.access_token { - builder = builder.access_token(access_token); - } - - Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| DataError::backend(e.to_string())) - } -} - -#[async_trait] -impl DataInput for StorageBackend { - async fn read(&self, _ctx: &InputContext, path: &str) -> DataResult { - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "Reading file" - ); - - let data = self - .operator - .read(path) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - size = data.len(), - "File read complete" - ); - - Ok(data.to_bytes()) - } - - async fn read_stream( - &self, - _ctx: &InputContext, - path: &str, - ) -> DataResult> + Send + Unpin>> { - use futures::StreamExt; - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "Reading file as stream" - ); - - let reader = self - .operator - .reader(path) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - let stream = reader - .into_bytes_stream(0..u64::MAX) - .await - .map_err(|e| DataError::backend(e.to_string()))? - .map(|result| result.map_err(|e| DataError::backend(e.to_string()))); - - Ok(Box::new(stream)) - } - - async fn exists(&self, _ctx: &InputContext, path: &str) -> DataResult { - self.operator - .exists(path) - .await - .map_err(|e| DataError::backend(e.to_string())) - } - - async fn list(&self, _ctx: &InputContext, prefix: &str) -> DataResult> { - use futures::TryStreamExt; - - let entries: Vec<_> = self - .operator - .lister(prefix) - .await - .map_err(|e| DataError::backend(e.to_string()))? - .try_collect() - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - Ok(entries.into_iter().map(|e| e.path().to_string()).collect()) - } -} - -#[async_trait] -impl DataOutput for StorageBackend { - async fn write(&self, _ctx: &OutputContext, path: &str, data: Bytes) -> DataResult<()> { - tracing::debug!( - target: TRACING_TARGET, - path = %path, - size = data.len(), - "Writing file" - ); - - self.operator - .write(path, data) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "File write complete" - ); - - Ok(()) - } - - async fn write_stream( - &self, - _ctx: &OutputContext, - path: &str, - stream: Box> + Send + Unpin>, - ) -> DataResult<()> { - use futures::StreamExt; - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "Writing file from stream" - ); - - let mut writer = self - .operator - .writer(path) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - let mut stream = stream; - while let Some(result) = stream.next().await { - let chunk = result?; - writer - .write(chunk) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - } - - writer - .close() - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "File stream write complete" - ); - - Ok(()) - } - - async fn delete(&self, _ctx: &OutputContext, path: &str) -> DataResult<()> { - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "Deleting file" - ); - - self.operator - .delete(path) - .await - .map_err(|e| DataError::backend(e.to_string()))?; - - tracing::debug!( - target: TRACING_TARGET, - path = %path, - "File deleted" - ); - - Ok(()) - } -} - -/// File metadata. -#[derive(Debug, Clone)] -pub struct FileMetadata { - /// File size in bytes. - pub size: u64, - /// Last modification time. - pub last_modified: Option, - /// Content type / MIME type. - pub content_type: Option, -} - -impl std::fmt::Debug for StorageBackend { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("StorageBackend") - .field("backend", &self.config.backend_name()) - .finish() - } -} diff --git a/crates/nvisy-opendal/src/config.rs b/crates/nvisy-opendal/src/config.rs deleted file mode 100644 index 5c75eaa..0000000 --- a/crates/nvisy-opendal/src/config.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Storage configuration types. - -use serde::{Deserialize, Serialize}; - -// Re-export configs from backend modules -pub use crate::azblob::AzureBlobConfig; -pub use crate::dropbox::DropboxConfig; -pub use crate::gcs::GcsConfig; -pub use crate::gdrive::GoogleDriveConfig; -pub use crate::onedrive::OneDriveConfig; -pub use crate::s3::S3Config; - -/// Storage backend configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -#[non_exhaustive] -pub enum StorageConfig { - /// Amazon S3 compatible storage. - S3(S3Config), - /// Google Cloud Storage. - Gcs(GcsConfig), - /// Azure Blob Storage. - AzureBlob(AzureBlobConfig), - /// Google Drive. - GoogleDrive(GoogleDriveConfig), - /// Dropbox. - Dropbox(DropboxConfig), - /// OneDrive. - OneDrive(OneDriveConfig), -} - -impl StorageConfig { - /// Returns the backend name as a static string. - pub fn backend_name(&self) -> &'static str { - match self { - Self::S3(_) => "s3", - Self::Gcs(_) => "gcs", - Self::AzureBlob(_) => "azblob", - Self::GoogleDrive(_) => "gdrive", - Self::Dropbox(_) => "dropbox", - Self::OneDrive(_) => "onedrive", - } - } -} diff --git a/crates/nvisy-opendal/src/dropbox/config.rs b/crates/nvisy-opendal/src/dropbox/config.rs deleted file mode 100644 index 2f435b1..0000000 --- a/crates/nvisy-opendal/src/dropbox/config.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! Dropbox configuration. - -use serde::{Deserialize, Serialize}; - -/// Dropbox configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct DropboxConfig { - /// Root folder path. - pub root: String, - /// OAuth client ID. - #[serde(skip_serializing_if = "Option::is_none")] - pub client_id: Option, - /// OAuth client secret. - #[serde(skip_serializing_if = "Option::is_none")] - pub client_secret: Option, - /// OAuth access token. - #[serde(skip_serializing_if = "Option::is_none")] - pub access_token: Option, - /// OAuth refresh token. - #[serde(skip_serializing_if = "Option::is_none")] - pub refresh_token: Option, -} - -impl DropboxConfig { - /// Creates a new Dropbox configuration. - pub fn new(root: impl Into) -> Self { - Self { - root: root.into(), - client_id: None, - client_secret: None, - access_token: None, - refresh_token: None, - } - } - - /// Sets the OAuth client credentials. - pub fn with_client_credentials( - mut self, - client_id: impl Into, - client_secret: impl Into, - ) -> Self { - self.client_id = Some(client_id.into()); - self.client_secret = Some(client_secret.into()); - self - } - - /// Sets the access token. - pub fn with_access_token(mut self, access_token: impl Into) -> Self { - self.access_token = Some(access_token.into()); - self - } - - /// Sets the refresh token. - pub fn with_refresh_token(mut self, refresh_token: impl Into) -> Self { - self.refresh_token = Some(refresh_token.into()); - self - } -} diff --git a/crates/nvisy-opendal/src/dropbox/mod.rs b/crates/nvisy-opendal/src/dropbox/mod.rs deleted file mode 100644 index 9389461..0000000 --- a/crates/nvisy-opendal/src/dropbox/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Dropbox storage backend. - -mod config; - -pub use config::DropboxConfig; diff --git a/crates/nvisy-opendal/src/gcs/mod.rs b/crates/nvisy-opendal/src/gcs/mod.rs deleted file mode 100644 index 0279e42..0000000 --- a/crates/nvisy-opendal/src/gcs/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Google Cloud Storage backend. - -mod config; - -pub use config::GcsConfig; diff --git a/crates/nvisy-opendal/src/gdrive/config.rs b/crates/nvisy-opendal/src/gdrive/config.rs deleted file mode 100644 index f03449d..0000000 --- a/crates/nvisy-opendal/src/gdrive/config.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! Google Drive configuration. - -use serde::{Deserialize, Serialize}; - -/// Google Drive configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct GoogleDriveConfig { - /// Root folder path or ID. - pub root: String, - /// OAuth client ID. - #[serde(skip_serializing_if = "Option::is_none")] - pub client_id: Option, - /// OAuth client secret. - #[serde(skip_serializing_if = "Option::is_none")] - pub client_secret: Option, - /// OAuth access token. - #[serde(skip_serializing_if = "Option::is_none")] - pub access_token: Option, - /// OAuth refresh token. - #[serde(skip_serializing_if = "Option::is_none")] - pub refresh_token: Option, -} - -impl GoogleDriveConfig { - /// Creates a new Google Drive configuration. - pub fn new(root: impl Into) -> Self { - Self { - root: root.into(), - client_id: None, - client_secret: None, - access_token: None, - refresh_token: None, - } - } - - /// Sets the OAuth client credentials. - pub fn with_client_credentials( - mut self, - client_id: impl Into, - client_secret: impl Into, - ) -> Self { - self.client_id = Some(client_id.into()); - self.client_secret = Some(client_secret.into()); - self - } - - /// Sets the access token. - pub fn with_access_token(mut self, access_token: impl Into) -> Self { - self.access_token = Some(access_token.into()); - self - } - - /// Sets the refresh token. - pub fn with_refresh_token(mut self, refresh_token: impl Into) -> Self { - self.refresh_token = Some(refresh_token.into()); - self - } -} diff --git a/crates/nvisy-opendal/src/gdrive/mod.rs b/crates/nvisy-opendal/src/gdrive/mod.rs deleted file mode 100644 index 9f4259f..0000000 --- a/crates/nvisy-opendal/src/gdrive/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Google Drive storage backend. - -mod config; - -pub use config::GoogleDriveConfig; diff --git a/crates/nvisy-opendal/src/lib.rs b/crates/nvisy-opendal/src/lib.rs deleted file mode 100644 index a6c8d7c..0000000 --- a/crates/nvisy-opendal/src/lib.rs +++ /dev/null @@ -1,28 +0,0 @@ -//! Storage backends using OpenDAL. -//! -//! This crate provides storage implementations that implement the -//! [`DataInput`] and [`DataOutput`] traits from `nvisy-data`. - -#![forbid(unsafe_code)] -#![cfg_attr(docsrs, feature(doc_cfg))] - -pub mod azblob; -pub mod dropbox; -pub mod gcs; -pub mod gdrive; -pub mod onedrive; -pub mod s3; - -mod backend; -mod config; - -pub use backend::{FileMetadata, StorageBackend}; -pub use config::{ - AzureBlobConfig, DropboxConfig, GcsConfig, GoogleDriveConfig, OneDriveConfig, S3Config, - StorageConfig, -}; -// Re-export types from nvisy-data for convenience -pub use nvisy_data::{DataError, DataInput, DataOutput, DataResult, InputContext, OutputContext}; - -/// Tracing target for storage operations. -pub const TRACING_TARGET: &str = "nvisy_opendal"; diff --git a/crates/nvisy-opendal/src/onedrive/config.rs b/crates/nvisy-opendal/src/onedrive/config.rs deleted file mode 100644 index a34c79c..0000000 --- a/crates/nvisy-opendal/src/onedrive/config.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! OneDrive configuration. - -use serde::{Deserialize, Serialize}; - -/// OneDrive configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OneDriveConfig { - /// Root folder path. - pub root: String, - /// OAuth client ID. - #[serde(skip_serializing_if = "Option::is_none")] - pub client_id: Option, - /// OAuth client secret. - #[serde(skip_serializing_if = "Option::is_none")] - pub client_secret: Option, - /// OAuth access token. - #[serde(skip_serializing_if = "Option::is_none")] - pub access_token: Option, - /// OAuth refresh token. - #[serde(skip_serializing_if = "Option::is_none")] - pub refresh_token: Option, -} - -impl OneDriveConfig { - /// Creates a new OneDrive configuration. - pub fn new(root: impl Into) -> Self { - Self { - root: root.into(), - client_id: None, - client_secret: None, - access_token: None, - refresh_token: None, - } - } - - /// Sets the OAuth client credentials. - pub fn with_client_credentials( - mut self, - client_id: impl Into, - client_secret: impl Into, - ) -> Self { - self.client_id = Some(client_id.into()); - self.client_secret = Some(client_secret.into()); - self - } - - /// Sets the access token. - pub fn with_access_token(mut self, access_token: impl Into) -> Self { - self.access_token = Some(access_token.into()); - self - } - - /// Sets the refresh token. - pub fn with_refresh_token(mut self, refresh_token: impl Into) -> Self { - self.refresh_token = Some(refresh_token.into()); - self - } -} diff --git a/crates/nvisy-opendal/src/onedrive/mod.rs b/crates/nvisy-opendal/src/onedrive/mod.rs deleted file mode 100644 index 98b0365..0000000 --- a/crates/nvisy-opendal/src/onedrive/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! OneDrive storage backend. - -mod config; - -pub use config::OneDriveConfig; diff --git a/crates/nvisy-opendal/src/s3/mod.rs b/crates/nvisy-opendal/src/s3/mod.rs deleted file mode 100644 index 243ae8c..0000000 --- a/crates/nvisy-opendal/src/s3/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -//! Amazon S3 storage backend. - -mod config; - -pub use config::S3Config; diff --git a/crates/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml index 591a5ce..ef1f90b 100644 --- a/crates/nvisy-runtime/Cargo.toml +++ b/crates/nvisy-runtime/Cargo.toml @@ -21,7 +21,7 @@ rustdoc-args = ["--cfg", "docsrs"] [dependencies] # Internal crates nvisy-core = { workspace = true } -nvisy-opendal = { workspace = true } +nvisy-dal = { workspace = true } # Runtime crates nvisy-rt-core = { workspace = true } @@ -39,6 +39,7 @@ serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = [] } # Derive macros & utilities +async-trait = { workspace = true, features = [] } thiserror = { workspace = true, features = [] } derive_more = { workspace = true, features = ["debug", "display", "from", "into"] } derive_builder = { workspace = true, features = [] } diff --git a/crates/nvisy-runtime/src/error.rs b/crates/nvisy-runtime/src/error.rs index b809d69..9791324 100644 --- a/crates/nvisy-runtime/src/error.rs +++ b/crates/nvisy-runtime/src/error.rs @@ -1,6 +1,7 @@ //! Workflow error types. use thiserror::Error; +use uuid::Uuid; use crate::node::NodeId; @@ -40,9 +41,17 @@ pub enum WorkflowError { #[error("workflow execution timed out")] Timeout, + /// Failed to construct credentials registry. + #[error("failed to construct credentials registry: {0}")] + CredentialsRegistry(#[source] serde_json::Error), + + /// Credentials not found. + #[error("credentials not found: {0}")] + CredentialsNotFound(Uuid), + /// Storage operation failed. #[error("storage error: {0}")] - Storage(#[from] nvisy_opendal::DataError), + Storage(#[from] nvisy_dal::StorageError), /// Serialization/deserialization error. #[error("serialization error: {0}")] diff --git a/crates/nvisy-runtime/src/node/input/config.rs b/crates/nvisy-runtime/src/node/input/config.rs deleted file mode 100644 index 21c1edf..0000000 --- a/crates/nvisy-runtime/src/node/input/config.rs +++ /dev/null @@ -1,3 +0,0 @@ -//! Input node configuration types. - -pub use nvisy_opendal::StorageConfig as InputConfig; diff --git a/crates/nvisy-runtime/src/node/input/mod.rs b/crates/nvisy-runtime/src/node/input/mod.rs index c8ebb78..e08a1a0 100644 --- a/crates/nvisy-runtime/src/node/input/mod.rs +++ b/crates/nvisy-runtime/src/node/input/mod.rs @@ -1,10 +1,10 @@ //! Input node types for reading data from storage backends. -mod config; - -pub use config::InputConfig; +use nvisy_dal::DataTypeId; use serde::{Deserialize, Serialize}; +use super::provider::ProviderParams; + /// A data input node that reads or produces data. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct InputNode { @@ -14,20 +14,25 @@ pub struct InputNode { /// Description of what this input does. #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, - /// Input configuration. - pub config: InputConfig, + /// Provider parameters (credentials referenced by ID). + pub provider: ProviderParams, } impl InputNode { /// Creates a new input node. - pub fn new(config: InputConfig) -> Self { + pub fn new(provider: ProviderParams) -> Self { Self { name: None, description: None, - config, + provider, } } + /// Returns the output data type based on the provider kind. + pub const fn output_type(&self) -> DataTypeId { + self.provider.output_type() + } + /// Sets the display name. pub fn with_name(mut self, name: impl Into) -> Self { self.name = Some(name.into()); @@ -41,8 +46,8 @@ impl InputNode { } } -impl From for InputNode { - fn from(config: InputConfig) -> Self { - Self::new(config) +impl From for InputNode { + fn from(provider: ProviderParams) -> Self { + Self::new(provider) } } diff --git a/crates/nvisy-runtime/src/node/mod.rs b/crates/nvisy-runtime/src/node/mod.rs index 5c80bab..3f3f984 100644 --- a/crates/nvisy-runtime/src/node/mod.rs +++ b/crates/nvisy-runtime/src/node/mod.rs @@ -8,10 +8,16 @@ mod data; mod id; pub mod input; pub mod output; +pub mod provider; pub mod transformer; pub use data::NodeData; pub use id::NodeId; -pub use input::{InputConfig, InputNode}; -pub use output::{OutputConfig, OutputNode}; +pub use input::InputNode; +pub use output::OutputNode; +pub use provider::{ + AzblobCredentials, AzblobParams, CredentialsRegistry, GcsCredentials, GcsParams, + MysqlCredentials, MysqlParams, PostgresCredentials, PostgresParams, ProviderCredentials, + ProviderParams, S3Credentials, S3Params, +}; pub use transformer::{TransformerConfig, TransformerNode}; diff --git a/crates/nvisy-runtime/src/node/output/config.rs b/crates/nvisy-runtime/src/node/output/config.rs deleted file mode 100644 index c4af3e4..0000000 --- a/crates/nvisy-runtime/src/node/output/config.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! Output node configuration types. - -use serde::{Deserialize, Serialize}; - -/// Output node configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub enum OutputConfig { - /// Storage backend output (S3, GCS, Azure, etc.). - Storage(nvisy_opendal::StorageConfig), - /// Send to webhook. - Webhook(WebhookConfig), -} - -/// Webhook output configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct WebhookConfig { - /// Webhook URL. - pub url: String, - /// HTTP method. - #[serde(default = "default_post")] - pub method: String, - /// Additional headers. - #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")] - pub headers: std::collections::HashMap, -} - -fn default_post() -> String { - "POST".to_string() -} diff --git a/crates/nvisy-runtime/src/node/output/mod.rs b/crates/nvisy-runtime/src/node/output/mod.rs index b49252d..f955966 100644 --- a/crates/nvisy-runtime/src/node/output/mod.rs +++ b/crates/nvisy-runtime/src/node/output/mod.rs @@ -1,10 +1,9 @@ //! Output node types for writing data to storage backends. -mod config; - -pub use config::{OutputConfig, WebhookConfig}; use serde::{Deserialize, Serialize}; +use super::provider::ProviderParams; + /// A data output node that writes or consumes data. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct OutputNode { @@ -14,17 +13,17 @@ pub struct OutputNode { /// Description of what this output does. #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, - /// Output configuration. - pub config: OutputConfig, + /// Provider parameters (credentials referenced by ID). + pub provider: ProviderParams, } impl OutputNode { /// Creates a new output node. - pub fn new(config: OutputConfig) -> Self { + pub fn new(provider: ProviderParams) -> Self { Self { name: None, description: None, - config, + provider, } } @@ -41,8 +40,8 @@ impl OutputNode { } } -impl From for OutputNode { - fn from(config: OutputConfig) -> Self { - Self::new(config) +impl From for OutputNode { + fn from(provider: ProviderParams) -> Self { + Self::new(provider) } } diff --git a/crates/nvisy-runtime/src/node/provider/credentials.rs b/crates/nvisy-runtime/src/node/provider/credentials.rs new file mode 100644 index 0000000..03026be --- /dev/null +++ b/crates/nvisy-runtime/src/node/provider/credentials.rs @@ -0,0 +1,68 @@ +//! Provider credentials (sensitive, stored per workspace). + +use derive_more::From; +use serde::{Deserialize, Serialize}; + +/// Provider credentials (sensitive). +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum ProviderCredentials { + /// Amazon S3 credentials. + S3(S3Credentials), + /// Google Cloud Storage credentials. + Gcs(GcsCredentials), + /// Azure Blob Storage credentials. + Azblob(AzblobCredentials), + /// PostgreSQL credentials. + Postgres(PostgresCredentials), + /// MySQL credentials. + Mysql(MysqlCredentials), +} + +/// Amazon S3 credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct S3Credentials { + /// AWS region. + pub region: String, + /// Access key ID. + pub access_key_id: String, + /// Secret access key. + pub secret_access_key: String, + /// Custom endpoint URL (for S3-compatible storage like MinIO, R2). + #[serde(skip_serializing_if = "Option::is_none")] + pub endpoint: Option, +} + +/// Google Cloud Storage credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GcsCredentials { + /// Service account credentials JSON. + pub credentials_json: String, +} + +/// Azure Blob Storage credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AzblobCredentials { + /// Storage account name. + pub account_name: String, + /// Account key for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub account_key: Option, + /// SAS token for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub sas_token: Option, +} + +/// PostgreSQL credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PostgresCredentials { + /// Connection string (e.g., "postgresql://user:pass@host:5432/db"). + pub connection_string: String, +} + +/// MySQL credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MysqlCredentials { + /// Connection string (e.g., "mysql://user:pass@host:3306/db"). + pub connection_string: String, +} diff --git a/crates/nvisy-runtime/src/node/provider/mod.rs b/crates/nvisy-runtime/src/node/provider/mod.rs new file mode 100644 index 0000000..5f60aba --- /dev/null +++ b/crates/nvisy-runtime/src/node/provider/mod.rs @@ -0,0 +1,69 @@ +//! Provider params, credentials, and registry. +//! +//! This module separates provider configuration into: +//! - [`ProviderParams`]: Non-sensitive parameters (part of node definition) +//! - [`ProviderCredentials`]: Sensitive credentials (stored per workspace) +//! - [`CredentialsRegistry`]: In-memory registry for credentials lookup + +mod credentials; +mod params; + +use std::collections::HashMap; + +pub use credentials::{ + AzblobCredentials, GcsCredentials, MysqlCredentials, PostgresCredentials, ProviderCredentials, + S3Credentials, +}; +pub use params::{AzblobParams, GcsParams, MysqlParams, PostgresParams, ProviderParams, S3Params}; +use uuid::Uuid; + +use crate::error::{WorkflowError, WorkflowResult}; + +/// In-memory credentials registry. +/// +/// Stores credentials by UUID for lookup during workflow execution. +#[derive(Debug, Clone, Default)] +pub struct CredentialsRegistry { + credentials: HashMap, +} + +impl CredentialsRegistry { + /// Creates a new registry from a JSON value. + /// + /// Expects a JSON object with UUID keys and credential objects as values. + pub fn new(value: serde_json::Value) -> WorkflowResult { + let map: HashMap = + serde_json::from_value(value).map_err(WorkflowError::CredentialsRegistry)?; + Ok(Self { credentials: map }) + } + + /// Retrieves credentials by ID. + pub fn get(&self, credentials_id: Uuid) -> WorkflowResult<&ProviderCredentials> { + self.credentials + .get(&credentials_id) + .ok_or(WorkflowError::CredentialsNotFound(credentials_id)) + } + + /// Inserts credentials with a new UUID v4. + /// + /// Generates a unique UUID that doesn't conflict with existing entries. + pub fn insert(&mut self, credentials: ProviderCredentials) -> Uuid { + loop { + let id = Uuid::new_v4(); + if !self.credentials.contains_key(&id) { + self.credentials.insert(id, credentials); + return id; + } + } + } + + /// Removes credentials by ID. + pub fn remove(&mut self, credentials_id: Uuid) -> Option { + self.credentials.remove(&credentials_id) + } + + /// Lists all credential IDs. + pub fn list(&self) -> Vec { + self.credentials.keys().copied().collect() + } +} diff --git a/crates/nvisy-runtime/src/node/provider/params.rs b/crates/nvisy-runtime/src/node/provider/params.rs new file mode 100644 index 0000000..50938b3 --- /dev/null +++ b/crates/nvisy-runtime/src/node/provider/params.rs @@ -0,0 +1,203 @@ +//! Provider parameters (non-sensitive, part of node definition). + +use derive_more::From; +use nvisy_dal::DataTypeId; +use nvisy_dal::provider::{ + AzblobConfig, GcsConfig, MysqlConfig, PostgresConfig, ProviderConfig, S3Config, +}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::{ + AzblobCredentials, GcsCredentials, MysqlCredentials, PostgresCredentials, ProviderCredentials, + S3Credentials, +}; + +/// Provider parameters with credentials reference. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum ProviderParams { + /// Amazon S3 storage. + S3(S3Params), + /// Google Cloud Storage. + Gcs(GcsParams), + /// Azure Blob Storage. + Azblob(AzblobParams), + /// PostgreSQL database. + Postgres(PostgresParams), + /// MySQL database. + Mysql(MysqlParams), +} + +impl ProviderParams { + /// Returns the credentials ID for this provider. + pub fn credentials_id(&self) -> Uuid { + match self { + Self::S3(p) => p.credentials_id, + Self::Gcs(p) => p.credentials_id, + Self::Azblob(p) => p.credentials_id, + Self::Postgres(p) => p.credentials_id, + Self::Mysql(p) => p.credentials_id, + } + } + + /// Returns the output data type for this provider. + pub const fn output_type(&self) -> DataTypeId { + match self { + Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, + Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, + } + } + + /// Combines params with credentials to create a full provider config. + /// + /// # Panics + /// + /// Panics if the credentials type doesn't match the params type. + pub fn into_config(self, credentials: ProviderCredentials) -> ProviderConfig { + match (self, credentials) { + (Self::S3(p), ProviderCredentials::S3(c)) => ProviderConfig::S3(p.into_config(c)), + (Self::Gcs(p), ProviderCredentials::Gcs(c)) => ProviderConfig::Gcs(p.into_config(c)), + (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { + ProviderConfig::Azblob(p.into_config(c)) + } + (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { + ProviderConfig::Postgres(p.into_config(c)) + } + (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { + ProviderConfig::Mysql(p.into_config(c)) + } + _ => panic!("credentials type mismatch"), + } + } +} + +/// Amazon S3 parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct S3Params { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Bucket name. + pub bucket: String, + /// Path prefix within the bucket. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl S3Params { + fn into_config(self, credentials: S3Credentials) -> S3Config { + let mut config = S3Config::new(self.bucket, credentials.region) + .with_credentials(credentials.access_key_id, credentials.secret_access_key); + + if let Some(endpoint) = credentials.endpoint { + config = config.with_endpoint(endpoint); + } + if let Some(prefix) = self.prefix { + config = config.with_prefix(prefix); + } + + config + } +} + +/// Google Cloud Storage parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct GcsParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Bucket name. + pub bucket: String, + /// Path prefix within the bucket. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl GcsParams { + fn into_config(self, credentials: GcsCredentials) -> GcsConfig { + let mut config = GcsConfig::new(self.bucket).with_credentials(credentials.credentials_json); + + if let Some(prefix) = self.prefix { + config = config.with_prefix(prefix); + } + + config + } +} + +/// Azure Blob Storage parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct AzblobParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Container name. + pub container: String, + /// Path prefix within the container. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl AzblobParams { + fn into_config(self, credentials: AzblobCredentials) -> AzblobConfig { + let mut config = AzblobConfig::new(credentials.account_name, self.container); + + if let Some(account_key) = credentials.account_key { + config = config.with_account_key(account_key); + } + if let Some(sas_token) = credentials.sas_token { + config = config.with_sas_token(sas_token); + } + if let Some(prefix) = self.prefix { + config = config.with_prefix(prefix); + } + + config + } +} + +/// PostgreSQL parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PostgresParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Table name. + pub table: String, + /// Schema name. + #[serde(skip_serializing_if = "Option::is_none")] + pub schema: Option, +} + +impl PostgresParams { + fn into_config(self, credentials: PostgresCredentials) -> PostgresConfig { + let mut config = PostgresConfig::new(credentials.connection_string).with_table(self.table); + + if let Some(schema) = self.schema { + config = config.with_schema(schema); + } + + config + } +} + +/// MySQL parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MysqlParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Table name. + pub table: String, + /// Database name. + #[serde(skip_serializing_if = "Option::is_none")] + pub database: Option, +} + +impl MysqlParams { + fn into_config(self, credentials: MysqlCredentials) -> MysqlConfig { + let mut config = MysqlConfig::new(credentials.connection_string).with_table(self.table); + + if let Some(database) = self.database { + config = config.with_database(database); + } + + config + } +} diff --git a/crates/nvisy-vector/README.md b/crates/nvisy-vector/README.md deleted file mode 100644 index 80e64e1..0000000 --- a/crates/nvisy-vector/README.md +++ /dev/null @@ -1,42 +0,0 @@ -# nvisy-vector - -Vector store abstraction layer for Nvisy Server. - -## Supported Backends - -- **Qdrant** - High-performance vector similarity search engine -- **Milvus** - Open-source vector database for AI applications -- **Pinecone** - Managed vector database service -- **pgvector** - PostgreSQL extension for vector similarity search - -## Features - -Enable specific backends via Cargo features: - -```toml -[dependencies] -nvisy-vector = { version = "0.1", features = ["qdrant"] } -``` - -Available features: -- `qdrant` - Qdrant support -- `milvus` - Milvus support -- `pinecone` - Pinecone support -- `pgvector` - PostgreSQL pgvector support -- `all-backends` - All backends - -## Usage - -```rust -use nvisy_vector::{VectorStore, VectorStoreConfig}; - -// Create a store from configuration -let config = VectorStoreConfig::Qdrant(QdrantConfig::new("http://localhost:6334")); -let store = VectorStore::new(config).await?; - -// Upsert vectors -store.upsert("collection", vectors).await?; - -// Search for similar vectors -let results = store.search("collection", query_vector, 10).await?; -``` diff --git a/crates/nvisy-vector/src/config.rs b/crates/nvisy-vector/src/config.rs deleted file mode 100644 index 1c3983c..0000000 --- a/crates/nvisy-vector/src/config.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! Vector store configuration types. - -use serde::{Deserialize, Serialize}; - -// Re-export configs from backend modules -pub use crate::milvus::MilvusConfig; -pub use crate::pgvector::{PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType}; -pub use crate::pinecone::PineconeConfig; -pub use crate::qdrant::QdrantConfig; - -/// Vector store backend configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -#[non_exhaustive] -pub enum VectorStoreConfig { - /// Qdrant vector database. - Qdrant(QdrantConfig), - /// Milvus vector database. - Milvus(MilvusConfig), - /// Pinecone managed vector database. - Pinecone(PineconeConfig), - /// PostgreSQL with pgvector extension. - PgVector(PgVectorConfig), -} - -impl VectorStoreConfig { - /// Returns the backend name as a static string. - pub fn backend_name(&self) -> &'static str { - match self { - Self::Qdrant(_) => "qdrant", - Self::Milvus(_) => "milvus", - Self::Pinecone(_) => "pinecone", - Self::PgVector(_) => "pgvector", - } - } -} diff --git a/crates/nvisy-vector/src/lib.rs b/crates/nvisy-vector/src/lib.rs deleted file mode 100644 index 8733072..0000000 --- a/crates/nvisy-vector/src/lib.rs +++ /dev/null @@ -1,29 +0,0 @@ -//! Vector store backends for nvisy. -//! -//! This crate provides vector store implementations that implement the -//! [`VectorOutput`] trait from `nvisy-data`. - -#![forbid(unsafe_code)] -#![cfg_attr(docsrs, feature(doc_cfg))] - -pub mod milvus; -pub mod pgvector; -pub mod pinecone; -pub mod qdrant; - -mod config; -mod store; - -pub use config::{ - MilvusConfig, PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType, PineconeConfig, - QdrantConfig, VectorStoreConfig, -}; -// Re-export types from nvisy-data for convenience -pub use nvisy_data::{ - DataError, DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, - VectorSearchResult, -}; -pub use store::VectorStore; - -/// Tracing target for vector store operations. -pub const TRACING_TARGET: &str = "nvisy_vector"; diff --git a/crates/nvisy-vector/src/milvus/mod.rs b/crates/nvisy-vector/src/milvus/mod.rs deleted file mode 100644 index dc3b2f0..0000000 --- a/crates/nvisy-vector/src/milvus/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Milvus vector store backend. - -mod backend; -mod config; - -pub use backend::MilvusBackend; -pub use config::MilvusConfig; diff --git a/crates/nvisy-vector/src/pgvector/mod.rs b/crates/nvisy-vector/src/pgvector/mod.rs deleted file mode 100644 index 0755b83..0000000 --- a/crates/nvisy-vector/src/pgvector/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! PostgreSQL pgvector backend. - -mod backend; -mod config; - -pub use backend::PgVectorBackend; -pub use config::{PgVectorConfig, PgVectorDistanceMetric, PgVectorIndexType}; diff --git a/crates/nvisy-vector/src/pinecone/mod.rs b/crates/nvisy-vector/src/pinecone/mod.rs deleted file mode 100644 index 7e618ef..0000000 --- a/crates/nvisy-vector/src/pinecone/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Pinecone vector store backend. - -mod backend; -mod config; - -pub use backend::PineconeBackend; -pub use config::PineconeConfig; diff --git a/crates/nvisy-vector/src/qdrant/mod.rs b/crates/nvisy-vector/src/qdrant/mod.rs deleted file mode 100644 index 929807c..0000000 --- a/crates/nvisy-vector/src/qdrant/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Qdrant vector store backend. - -mod backend; -mod config; - -pub use backend::QdrantBackend; -pub use config::QdrantConfig; diff --git a/crates/nvisy-vector/src/store.rs b/crates/nvisy-vector/src/store.rs deleted file mode 100644 index b136f5a..0000000 --- a/crates/nvisy-vector/src/store.rs +++ /dev/null @@ -1,95 +0,0 @@ -//! Vector store wrapper and unified API. - -use nvisy_data::{ - DataResult, VectorContext, VectorData, VectorOutput, VectorSearchOptions, VectorSearchResult, -}; - -use crate::TRACING_TARGET; -use crate::config::VectorStoreConfig; -use crate::milvus::MilvusBackend; -use crate::pgvector::PgVectorBackend; -use crate::pinecone::PineconeBackend; -use crate::qdrant::QdrantBackend; - -/// Unified vector store that wraps backend implementations. -pub struct VectorStore { - #[allow(dead_code)] - config: VectorStoreConfig, - backend: Box, -} - -impl VectorStore { - /// Creates a new vector store from configuration. - pub async fn new(config: VectorStoreConfig) -> DataResult { - let backend: Box = match &config { - VectorStoreConfig::Qdrant(cfg) => Box::new(QdrantBackend::new(cfg).await?), - VectorStoreConfig::Milvus(cfg) => Box::new(MilvusBackend::new(cfg).await?), - VectorStoreConfig::Pinecone(cfg) => Box::new(PineconeBackend::new(cfg).await?), - VectorStoreConfig::PgVector(cfg) => Box::new(PgVectorBackend::new(cfg).await?), - }; - - tracing::info!( - target: TRACING_TARGET, - backend = %config.backend_name(), - "Vector store initialized" - ); - - Ok(Self { config, backend }) - } - - /// Inserts vectors into a collection. - pub async fn insert(&self, collection: &str, vectors: Vec) -> DataResult<()> { - tracing::debug!( - target: TRACING_TARGET, - collection = %collection, - count = %vectors.len(), - "Inserting vectors" - ); - - let ctx = VectorContext::new(collection); - self.backend.insert(&ctx, vectors).await - } - - /// Searches for similar vectors. - pub async fn search( - &self, - collection: &str, - query: Vec, - limit: usize, - ) -> DataResult> { - self.search_with_options(collection, query, limit, VectorSearchOptions::default()) - .await - } - - /// Searches for similar vectors with options. - pub async fn search_with_options( - &self, - collection: &str, - query: Vec, - limit: usize, - options: VectorSearchOptions, - ) -> DataResult> { - tracing::debug!( - target: TRACING_TARGET, - collection = %collection, - limit = %limit, - "Searching vectors" - ); - - let ctx = VectorContext::new(collection); - self.backend.search(&ctx, query, limit, options).await - } - - /// Returns a reference to the underlying backend. - pub fn backend(&self) -> &dyn VectorOutput { - self.backend.as_ref() - } -} - -impl std::fmt::Debug for VectorStore { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("VectorStore") - .field("backend", &self.config.backend_name()) - .finish() - } -} From 2cc3aed1d0a8d7581d7333d984144df033114afa Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Tue, 20 Jan 2026 10:34:14 +0100 Subject: [PATCH 11/28] docs: add crates README --- crates/README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 crates/README.md diff --git a/crates/README.md b/crates/README.md new file mode 100644 index 0000000..a5ae878 --- /dev/null +++ b/crates/README.md @@ -0,0 +1,32 @@ +# Crates + +This directory contains the workspace crates for Nvisy Server. + +## Core Crates + +| Crate | Description | +|-------|-------------| +| `nvisy-cli` | Server entry point and CLI configuration | +| `nvisy-core` | Shared types, errors, and utilities | +| `nvisy-server` | HTTP API handlers and middleware | + +## Data Layer + +| Crate | Description | +|-------|-------------| +| `nvisy-postgres` | PostgreSQL ORM layer (Diesel async) | +| `nvisy-nats` | NATS client (JetStream, KV, object storage) | +| `nvisy-dal` | Data Abstraction Layer for workflow I/O | + +## AI & Workflows + +| Crate | Description | +|-------|-------------| +| `nvisy-rig` | AI services (chat, RAG, embeddings) | +| `nvisy-runtime` | Workflow execution engine | + +## Integration + +| Crate | Description | +|-------|-------------| +| `nvisy-webhook` | Webhook delivery traits and types | From 04bcd366007b573d6b7520c629bc13ae05dba373 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Wed, 21 Jan 2026 14:15:22 +0100 Subject: [PATCH 12/28] feat(runtime): reorganize provider module structure and fix webhook secret MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move node types (input, output, transformer, data, id) into graph module - Create provider module with backend/, inputs.rs, outputs.rs, registry.rs - Split provider files into backend/ subdirectory (s3, gcs, azblob, postgres, mysql, qdrant, pinecone, milvus, pgvector) - Add read_data!/write_data! macros for DRY helper functions - Update ExecutionContext to use Vec for 1→N transformations - Add secret column to workspace_webhooks migration --- crates/README.md | 53 +- crates/nvisy-runtime/src/engine/context.rs | 84 +++ crates/nvisy-runtime/src/engine/executor.rs | 191 ++++++- crates/nvisy-runtime/src/engine/mod.rs | 3 + crates/nvisy-runtime/src/error.rs | 2 +- .../nvisy-runtime/src/{node => graph}/data.rs | 0 crates/nvisy-runtime/src/graph/edge.rs | 2 +- .../nvisy-runtime/src/{node => graph}/id.rs | 0 .../src/{node => graph}/input/mod.rs | 22 +- crates/nvisy-runtime/src/graph/mod.rs | 14 +- .../src/{node => graph}/output/mod.rs | 26 +- .../{node => graph}/transformer/chunking.rs | 0 .../src/{node => graph}/transformer/config.rs | 0 .../{node => graph}/transformer/document.rs | 0 .../{node => graph}/transformer/embedding.rs | 0 .../{node => graph}/transformer/extraction.rs | 0 .../src/{node => graph}/transformer/mod.rs | 0 .../{node => graph}/transformer/processing.rs | 0 .../{node => graph}/transformer/quality.rs | 0 .../{node => graph}/transformer/routing.rs | 0 crates/nvisy-runtime/src/graph/workflow.rs | 3 +- crates/nvisy-runtime/src/lib.rs | 3 +- crates/nvisy-runtime/src/node/mod.rs | 23 - .../src/node/provider/credentials.rs | 68 --- .../nvisy-runtime/src/node/provider/params.rs | 203 ------- .../src/provider/backend/azblob.rs | 49 ++ .../nvisy-runtime/src/provider/backend/gcs.rs | 37 ++ .../src/provider/backend/milvus.rs | 62 ++ .../nvisy-runtime/src/provider/backend/mod.rs | 32 ++ .../src/provider/backend/mysql.rs | 37 ++ .../src/provider/backend/pgvector.rs | 30 + .../src/provider/backend/pinecone.rs | 46 ++ .../src/provider/backend/postgres.rs | 37 ++ .../src/provider/backend/qdrant.rs | 43 ++ .../nvisy-runtime/src/provider/backend/s3.rs | 48 ++ crates/nvisy-runtime/src/provider/inputs.rs | 183 ++++++ crates/nvisy-runtime/src/provider/mod.rs | 73 +++ crates/nvisy-runtime/src/provider/outputs.rs | 241 ++++++++ .../provider/mod.rs => provider/registry.rs} | 16 +- .../src/{ => provider}/runtime/config.rs | 0 .../src/{ => provider}/runtime/mod.rs | 0 .../src/{ => provider}/runtime/service.rs | 0 crates/nvisy-runtime/stream/event.rs | 118 ++++ crates/nvisy-runtime/stream/event_pub.rs | 76 +++ crates/nvisy-runtime/stream/event_stream.rs | 74 +++ crates/nvisy-runtime/stream/event_sub.rs | 63 +++ crates/nvisy-runtime/stream/mod.rs | 20 + crates/nvisy-runtime/stream/stream_pub.rs | 232 ++++++++ crates/nvisy-runtime/stream/stream_sub.rs | 535 ++++++++++++++++++ .../2025-05-21-222840_workspaces/up.sql | 2 + 50 files changed, 2356 insertions(+), 395 deletions(-) create mode 100644 crates/nvisy-runtime/src/engine/context.rs rename crates/nvisy-runtime/src/{node => graph}/data.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/id.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/input/mod.rs (62%) rename crates/nvisy-runtime/src/{node => graph}/output/mod.rs (53%) rename crates/nvisy-runtime/src/{node => graph}/transformer/chunking.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/transformer/config.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/transformer/document.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/transformer/embedding.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/transformer/extraction.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/transformer/mod.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/transformer/processing.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/transformer/quality.rs (100%) rename crates/nvisy-runtime/src/{node => graph}/transformer/routing.rs (100%) delete mode 100644 crates/nvisy-runtime/src/node/mod.rs delete mode 100644 crates/nvisy-runtime/src/node/provider/credentials.rs delete mode 100644 crates/nvisy-runtime/src/node/provider/params.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/azblob.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/gcs.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/milvus.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/mod.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/mysql.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/pgvector.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/pinecone.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/postgres.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/qdrant.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/s3.rs create mode 100644 crates/nvisy-runtime/src/provider/inputs.rs create mode 100644 crates/nvisy-runtime/src/provider/mod.rs create mode 100644 crates/nvisy-runtime/src/provider/outputs.rs rename crates/nvisy-runtime/src/{node/provider/mod.rs => provider/registry.rs} (74%) rename crates/nvisy-runtime/src/{ => provider}/runtime/config.rs (100%) rename crates/nvisy-runtime/src/{ => provider}/runtime/mod.rs (100%) rename crates/nvisy-runtime/src/{ => provider}/runtime/service.rs (100%) create mode 100644 crates/nvisy-runtime/stream/event.rs create mode 100644 crates/nvisy-runtime/stream/event_pub.rs create mode 100644 crates/nvisy-runtime/stream/event_stream.rs create mode 100644 crates/nvisy-runtime/stream/event_sub.rs create mode 100644 crates/nvisy-runtime/stream/mod.rs create mode 100644 crates/nvisy-runtime/stream/stream_pub.rs create mode 100644 crates/nvisy-runtime/stream/stream_sub.rs diff --git a/crates/README.md b/crates/README.md index a5ae878..f0892c6 100644 --- a/crates/README.md +++ b/crates/README.md @@ -2,31 +2,46 @@ This directory contains the workspace crates for Nvisy Server. -## Core Crates +## Core -| Crate | Description | -|-------|-------------| -| `nvisy-cli` | Server entry point and CLI configuration | -| `nvisy-core` | Shared types, errors, and utilities | -| `nvisy-server` | HTTP API handlers and middleware | +### nvisy-cli + +Server entry point and CLI configuration. Parses command-line arguments, loads environment configuration, and bootstraps the application by initializing all services and starting the HTTP server. + +### nvisy-core + +Shared foundation used across all crates. Contains common error types with retry support, utility functions, and base traits. Provides the `Error` and `ErrorKind` types used throughout the application. + +### nvisy-server + +HTTP API layer built on Axum. Implements REST endpoints for documents, workspaces, accounts, and studio sessions. Includes middleware for authentication (JWT/Ed25519), request validation, and OpenAPI documentation via Aide. ## Data Layer -| Crate | Description | -|-------|-------------| -| `nvisy-postgres` | PostgreSQL ORM layer (Diesel async) | -| `nvisy-nats` | NATS client (JetStream, KV, object storage) | -| `nvisy-dal` | Data Abstraction Layer for workflow I/O | +### nvisy-postgres + +PostgreSQL persistence layer using Diesel async. Defines ORM models, query builders, and repository patterns for all database entities. Handles connection pooling via deadpool and compile-time SQL validation. + +### nvisy-nats -## AI & Workflows +NATS messaging client for real-time features. Provides JetStream for durable message streams, KV store for distributed state, and object storage for large files. Used for pub/sub events and cross-service communication. -| Crate | Description | -|-------|-------------| -| `nvisy-rig` | AI services (chat, RAG, embeddings) | -| `nvisy-runtime` | Workflow execution engine | +## Workflows + +### nvisy-dal + +Data Abstraction Layer for workflow inputs and outputs. Provides unified interfaces for reading/writing data across storage backends (S3, GCS, Azure Blob, PostgreSQL, MySQL) and vector databases (Qdrant, Pinecone, Milvus, pgvector). Defines core data types: Blob, Document, Embedding, Graph, Record, Message. + +### nvisy-runtime + +Workflow execution engine. Defines workflow graphs with input, transformer, and output nodes. Manages provider credentials, node execution, and data flow between pipeline stages. Integrates with nvisy-dal for storage operations. + +### nvisy-rig + +AI services powered by rig-core. Provides chat completions, RAG pipelines with pgvector embeddings, and document processing. Supports multiple LLM providers (OpenAI, Anthropic, OpenRouter) for studio sessions. ## Integration -| Crate | Description | -|-------|-------------| -| `nvisy-webhook` | Webhook delivery traits and types | +### nvisy-webhook + +Webhook delivery system. Defines traits and types for sending HTTP callbacks on events. Used to notify external systems about document processing completion, workflow status changes, and other application events. diff --git a/crates/nvisy-runtime/src/engine/context.rs b/crates/nvisy-runtime/src/engine/context.rs new file mode 100644 index 0000000..1f66c93 --- /dev/null +++ b/crates/nvisy-runtime/src/engine/context.rs @@ -0,0 +1,84 @@ +//! Execution context for workflow runs. + +use nvisy_dal::AnyDataValue; + +use crate::provider::CredentialsRegistry; + +/// Execution context for a workflow run. +/// +/// Manages the current data items flowing through the pipeline and holds +/// credentials for provider access. Execution is pipe-based: each input item +/// flows through the entire pipeline before the next item is processed. +/// +/// A single input can produce multiple outputs (e.g., 1 document → 1000 embeddings), +/// so the context holds a `Vec` of values at each stage. +#[derive(Debug)] +pub struct ExecutionContext { + /// Credentials registry for provider authentication. + credentials: CredentialsRegistry, + /// Current data items being processed (can expand: 1 input → N outputs). + current: Vec, + /// Total input items processed in this execution. + items_processed: usize, +} + +impl ExecutionContext { + /// Creates a new execution context with the given credentials. + pub fn new(credentials: CredentialsRegistry) -> Self { + Self { + credentials, + current: Vec::new(), + items_processed: 0, + } + } + + /// Returns a reference to the credentials registry. + pub fn credentials(&self) -> &CredentialsRegistry { + &self.credentials + } + + /// Sets the current data items being processed. + pub fn set_current(&mut self, data: Vec) { + self.current = data; + } + + /// Sets a single item as current (convenience for input stage). + pub fn set_current_single(&mut self, data: AnyDataValue) { + self.current = vec![data]; + } + + /// Takes the current data items, leaving an empty vec in its place. + pub fn take_current(&mut self) -> Vec { + std::mem::take(&mut self.current) + } + + /// Returns a reference to the current data items. + pub fn current(&self) -> &[AnyDataValue] { + &self.current + } + + /// Returns whether there are any current data items. + pub fn has_current(&self) -> bool { + !self.current.is_empty() + } + + /// Returns the number of current data items. + pub fn current_len(&self) -> usize { + self.current.len() + } + + /// Increments the processed items counter. + pub fn mark_processed(&mut self) { + self.items_processed += 1; + } + + /// Returns the number of input items processed. + pub fn items_processed(&self) -> usize { + self.items_processed + } + + /// Clears the current data items. + pub fn clear(&mut self) { + self.current.clear(); + } +} diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index 246f8e0..4fef41f 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -2,11 +2,14 @@ use std::sync::Arc; +use nvisy_dal::core::Context; use tokio::sync::Semaphore; use super::EngineConfig; -use crate::error::WorkflowResult; -use crate::graph::WorkflowGraph; +use super::context::ExecutionContext; +use crate::error::{WorkflowError, WorkflowResult}; +use crate::graph::{NodeData, NodeId, WorkflowGraph}; +use crate::provider::{CredentialsRegistry, InputProvider, OutputProvider}; /// Tracing target for engine operations. const TRACING_TARGET: &str = "nvisy_workflow::engine"; @@ -14,6 +17,8 @@ const TRACING_TARGET: &str = "nvisy_workflow::engine"; /// The workflow execution engine. /// /// Manages workflow execution, concurrency, and resource allocation. +/// Executes workflows in a pipe-based streaming manner: each data item +/// flows through the entire pipeline before the next item is processed. pub struct Engine { config: EngineConfig, semaphore: Arc, @@ -49,22 +54,24 @@ impl Engine { workflow.validate() } - /// Executes a workflow graph. + /// Executes a workflow graph with the given credentials. /// - /// This will: - /// 1. Acquire a semaphore permit for concurrency control - /// 2. Validate the workflow - /// 3. Execute nodes in topological order - /// 4. Handle errors and retries - pub async fn execute(&self, workflow: &WorkflowGraph) -> WorkflowResult<()> { - let _permit = self.semaphore.acquire().await.map_err(|e| { - crate::error::WorkflowError::Internal(format!("semaphore closed: {}", e)) - })?; - - // Validate the workflow first + /// Execution is pipe-based: items are read from inputs one at a time, + /// flow through all transformers, and are written to outputs before + /// the next item is processed. + pub async fn execute( + &self, + workflow: &WorkflowGraph, + credentials: CredentialsRegistry, + ) -> WorkflowResult { + let _permit = self + .semaphore + .acquire() + .await + .map_err(|e| WorkflowError::Internal(format!("semaphore closed: {}", e)))?; + workflow.validate()?; - // Get execution order let order = workflow.topological_order()?; tracing::debug!( @@ -73,23 +80,145 @@ impl Engine { "Starting workflow execution" ); - // TODO: Execute each node in order - // For now, just log the execution plan - for (idx, node_id) in order.iter().enumerate() { - if let Some(node) = workflow.get_node(*node_id) { - tracing::trace!( - target: TRACING_TARGET, - step = idx + 1, - node_id = %node_id, - node_name = node.name(), - "Would execute node" - ); + let mut ctx = ExecutionContext::new(credentials); + + // Build the pipeline: create providers for input and output nodes + let pipeline = self.build_pipeline(workflow, &order, &ctx).await?; + + // Execute the pipeline: stream items through + self.execute_pipeline(workflow, &order, &pipeline, &mut ctx) + .await?; + + tracing::debug!( + target: TRACING_TARGET, + items_processed = ctx.items_processed(), + "Workflow execution completed" + ); + + Ok(ctx) + } + + /// Builds the pipeline by creating providers for input and output nodes. + async fn build_pipeline( + &self, + workflow: &WorkflowGraph, + order: &[NodeId], + ctx: &ExecutionContext, + ) -> WorkflowResult { + let mut input_providers = Vec::new(); + let mut output_providers = Vec::new(); + + for node_id in order { + let Some(node) = workflow.get_node(*node_id) else { + continue; + }; + + match node { + NodeData::Input(input_node) => { + let credentials_id = input_node.provider.credentials_id(); + let credentials = ctx.credentials().get(credentials_id)?.clone(); + let config = input_node.provider.clone().into_config(credentials)?; + let provider = config.into_provider()?; + input_providers.push((*node_id, provider)); + } + NodeData::Output(output_node) => { + let credentials_id = output_node.provider.credentials_id(); + let credentials = ctx.credentials().get(credentials_id)?.clone(); + let config = output_node.provider.clone().into_config(credentials)?; + let provider = config.into_provider().await?; + output_providers.push((*node_id, provider)); + } + NodeData::Transformer(_) => { + // Transformers don't need pre-built providers + } } } - tracing::debug!( + Ok(Pipeline { + input_providers, + output_providers, + }) + } + + /// Executes the pipeline by streaming items through. + /// + /// For each input item: + /// 1. Set as current (single item) + /// 2. Run through transformers (can expand: 1 item → N items) + /// 3. Write all resulting items to outputs + async fn execute_pipeline( + &self, + workflow: &WorkflowGraph, + order: &[NodeId], + pipeline: &Pipeline, + ctx: &mut ExecutionContext, + ) -> WorkflowResult<()> { + // For each input provider, stream items through the pipeline + for (input_node_id, input_provider) in &pipeline.input_providers { + tracing::debug!( + target: TRACING_TARGET, + node_id = %input_node_id, + "Reading from input provider" + ); + + let dal_ctx = Context::default(); + let items = input_provider.read(&dal_ctx).await?; + + // Process each input item through the pipeline + for item in items { + // Start with single input item + ctx.set_current_single(item); + + // Execute transformers in order (each can expand 1→N) + for node_id in order { + let Some(node) = workflow.get_node(*node_id) else { + continue; + }; + + if let NodeData::Transformer(transformer_node) = node { + self.execute_transformer(*node_id, transformer_node, ctx)?; + } + } + + // Write all resulting items to output providers + let output_data = ctx.take_current(); + if !output_data.is_empty() { + for (output_node_id, output_provider) in &pipeline.output_providers { + tracing::trace!( + target: TRACING_TARGET, + node_id = %output_node_id, + item_count = output_data.len(), + "Writing to output provider" + ); + + let dal_ctx = Context::default(); + output_provider.write(&dal_ctx, output_data.clone()).await?; + } + } + + ctx.mark_processed(); + ctx.clear(); + } + } + + Ok(()) + } + + /// Executes a transformer node on the current data. + fn execute_transformer( + &self, + node_id: NodeId, + _transformer_node: &crate::graph::TransformerNode, + ctx: &mut ExecutionContext, + ) -> WorkflowResult<()> { + // TODO: Apply transformation based on transformer_node.config + // For now, pass through data unchanged + + tracing::trace!( target: TRACING_TARGET, - "Workflow execution completed (placeholder)" + node_id = %node_id, + has_data = ctx.has_current(), + "Transformer node executed (passthrough)" ); Ok(()) @@ -101,6 +230,12 @@ impl Engine { } } +/// Pre-built pipeline with providers ready for execution. +struct Pipeline { + input_providers: Vec<(NodeId, InputProvider)>, + output_providers: Vec<(NodeId, OutputProvider)>, +} + impl std::fmt::Debug for Engine { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Engine") diff --git a/crates/nvisy-runtime/src/engine/mod.rs b/crates/nvisy-runtime/src/engine/mod.rs index 5d64245..e07449a 100644 --- a/crates/nvisy-runtime/src/engine/mod.rs +++ b/crates/nvisy-runtime/src/engine/mod.rs @@ -3,9 +3,12 @@ //! This module provides the runtime for executing workflows: //! - [`Engine`]: The main execution engine //! - [`EngineConfig`]: Configuration options +//! - [`ExecutionContext`]: Runtime context for workflow execution mod config; +mod context; mod executor; pub use config::EngineConfig; +pub use context::ExecutionContext; pub use executor::Engine; diff --git a/crates/nvisy-runtime/src/error.rs b/crates/nvisy-runtime/src/error.rs index 9791324..4c23a88 100644 --- a/crates/nvisy-runtime/src/error.rs +++ b/crates/nvisy-runtime/src/error.rs @@ -3,7 +3,7 @@ use thiserror::Error; use uuid::Uuid; -use crate::node::NodeId; +use crate::graph::NodeId; /// Result type for workflow operations. pub type WorkflowResult = Result; diff --git a/crates/nvisy-runtime/src/node/data.rs b/crates/nvisy-runtime/src/graph/data.rs similarity index 100% rename from crates/nvisy-runtime/src/node/data.rs rename to crates/nvisy-runtime/src/graph/data.rs diff --git a/crates/nvisy-runtime/src/graph/edge.rs b/crates/nvisy-runtime/src/graph/edge.rs index 6c6b401..c67f2a9 100644 --- a/crates/nvisy-runtime/src/graph/edge.rs +++ b/crates/nvisy-runtime/src/graph/edge.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; -use crate::node::NodeId; +use super::NodeId; /// An edge connecting two nodes in the workflow graph. #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] diff --git a/crates/nvisy-runtime/src/node/id.rs b/crates/nvisy-runtime/src/graph/id.rs similarity index 100% rename from crates/nvisy-runtime/src/node/id.rs rename to crates/nvisy-runtime/src/graph/id.rs diff --git a/crates/nvisy-runtime/src/node/input/mod.rs b/crates/nvisy-runtime/src/graph/input/mod.rs similarity index 62% rename from crates/nvisy-runtime/src/node/input/mod.rs rename to crates/nvisy-runtime/src/graph/input/mod.rs index e08a1a0..289a12b 100644 --- a/crates/nvisy-runtime/src/node/input/mod.rs +++ b/crates/nvisy-runtime/src/graph/input/mod.rs @@ -3,7 +3,7 @@ use nvisy_dal::DataTypeId; use serde::{Deserialize, Serialize}; -use super::provider::ProviderParams; +use crate::provider::InputProviderParams; /// A data input node that reads or produces data. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -15,12 +15,12 @@ pub struct InputNode { #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, /// Provider parameters (credentials referenced by ID). - pub provider: ProviderParams, + pub provider: InputProviderParams, } impl InputNode { /// Creates a new input node. - pub fn new(provider: ProviderParams) -> Self { + pub fn new(provider: InputProviderParams) -> Self { Self { name: None, description: None, @@ -32,22 +32,10 @@ impl InputNode { pub const fn output_type(&self) -> DataTypeId { self.provider.output_type() } - - /// Sets the display name. - pub fn with_name(mut self, name: impl Into) -> Self { - self.name = Some(name.into()); - self - } - - /// Sets the description. - pub fn with_description(mut self, description: impl Into) -> Self { - self.description = Some(description.into()); - self - } } -impl From for InputNode { - fn from(provider: ProviderParams) -> Self { +impl From for InputNode { + fn from(provider: InputProviderParams) -> Self { Self::new(provider) } } diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index 1d44843..b316fbc 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -1,13 +1,25 @@ -//! Workflow graph structures. +//! Workflow graph structures and node types. //! //! This module provides the graph representation for workflows: //! - [`WorkflowGraph`]: The main graph structure containing nodes and edges //! - [`WorkflowMetadata`]: Metadata about the workflow //! - [`Edge`]: Connections between nodes //! - [`EdgeData`]: Data stored on edges in the underlying petgraph +//! - [`NodeId`]: Unique identifier for nodes +//! - [`NodeData`]: Data associated with each node (Input, Transformer, Output) +mod data; mod edge; +mod id; +pub mod input; +pub mod output; +pub mod transformer; mod workflow; +pub use data::NodeData; pub use edge::Edge; +pub use id::NodeId; +pub use input::InputNode; +pub use output::OutputNode; +pub use transformer::{TransformerConfig, TransformerNode}; pub use workflow::{EdgeData, WorkflowGraph, WorkflowMetadata}; diff --git a/crates/nvisy-runtime/src/node/output/mod.rs b/crates/nvisy-runtime/src/graph/output/mod.rs similarity index 53% rename from crates/nvisy-runtime/src/node/output/mod.rs rename to crates/nvisy-runtime/src/graph/output/mod.rs index f955966..8890ea3 100644 --- a/crates/nvisy-runtime/src/node/output/mod.rs +++ b/crates/nvisy-runtime/src/graph/output/mod.rs @@ -1,8 +1,9 @@ -//! Output node types for writing data to storage backends. +//! Output node types for writing data to storage backends and vector databases. +use nvisy_dal::DataTypeId; use serde::{Deserialize, Serialize}; -use super::provider::ProviderParams; +use crate::provider::OutputProviderParams; /// A data output node that writes or consumes data. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -14,12 +15,12 @@ pub struct OutputNode { #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, /// Provider parameters (credentials referenced by ID). - pub provider: ProviderParams, + pub provider: OutputProviderParams, } impl OutputNode { /// Creates a new output node. - pub fn new(provider: ProviderParams) -> Self { + pub fn new(provider: OutputProviderParams) -> Self { Self { name: None, description: None, @@ -27,21 +28,14 @@ impl OutputNode { } } - /// Sets the display name. - pub fn with_name(mut self, name: impl Into) -> Self { - self.name = Some(name.into()); - self - } - - /// Sets the description. - pub fn with_description(mut self, description: impl Into) -> Self { - self.description = Some(description.into()); - self + /// Returns the expected input data type based on the provider kind. + pub const fn input_type(&self) -> DataTypeId { + self.provider.output_type() } } -impl From for OutputNode { - fn from(provider: ProviderParams) -> Self { +impl From for OutputNode { + fn from(provider: OutputProviderParams) -> Self { Self::new(provider) } } diff --git a/crates/nvisy-runtime/src/node/transformer/chunking.rs b/crates/nvisy-runtime/src/graph/transformer/chunking.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/chunking.rs rename to crates/nvisy-runtime/src/graph/transformer/chunking.rs diff --git a/crates/nvisy-runtime/src/node/transformer/config.rs b/crates/nvisy-runtime/src/graph/transformer/config.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/config.rs rename to crates/nvisy-runtime/src/graph/transformer/config.rs diff --git a/crates/nvisy-runtime/src/node/transformer/document.rs b/crates/nvisy-runtime/src/graph/transformer/document.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/document.rs rename to crates/nvisy-runtime/src/graph/transformer/document.rs diff --git a/crates/nvisy-runtime/src/node/transformer/embedding.rs b/crates/nvisy-runtime/src/graph/transformer/embedding.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/embedding.rs rename to crates/nvisy-runtime/src/graph/transformer/embedding.rs diff --git a/crates/nvisy-runtime/src/node/transformer/extraction.rs b/crates/nvisy-runtime/src/graph/transformer/extraction.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/extraction.rs rename to crates/nvisy-runtime/src/graph/transformer/extraction.rs diff --git a/crates/nvisy-runtime/src/node/transformer/mod.rs b/crates/nvisy-runtime/src/graph/transformer/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/mod.rs rename to crates/nvisy-runtime/src/graph/transformer/mod.rs diff --git a/crates/nvisy-runtime/src/node/transformer/processing.rs b/crates/nvisy-runtime/src/graph/transformer/processing.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/processing.rs rename to crates/nvisy-runtime/src/graph/transformer/processing.rs diff --git a/crates/nvisy-runtime/src/node/transformer/quality.rs b/crates/nvisy-runtime/src/graph/transformer/quality.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/quality.rs rename to crates/nvisy-runtime/src/graph/transformer/quality.rs diff --git a/crates/nvisy-runtime/src/node/transformer/routing.rs b/crates/nvisy-runtime/src/graph/transformer/routing.rs similarity index 100% rename from crates/nvisy-runtime/src/node/transformer/routing.rs rename to crates/nvisy-runtime/src/graph/transformer/routing.rs diff --git a/crates/nvisy-runtime/src/graph/workflow.rs b/crates/nvisy-runtime/src/graph/workflow.rs index 19b94f0..4d6ea2a 100644 --- a/crates/nvisy-runtime/src/graph/workflow.rs +++ b/crates/nvisy-runtime/src/graph/workflow.rs @@ -9,9 +9,8 @@ use petgraph::visit::EdgeRef; use semver::Version; use serde::{Deserialize, Serialize}; -use super::Edge; +use super::{Edge, NodeData, NodeId}; use crate::error::{WorkflowError, WorkflowResult}; -use crate::node::{NodeData, NodeId}; /// Workflow metadata. #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] diff --git a/crates/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs index a01375e..1edc6fe 100644 --- a/crates/nvisy-runtime/src/lib.rs +++ b/crates/nvisy-runtime/src/lib.rs @@ -5,8 +5,7 @@ pub mod engine; mod error; pub mod graph; -pub mod node; -pub mod runtime; +pub mod provider; pub use error::{WorkflowError, WorkflowResult}; diff --git a/crates/nvisy-runtime/src/node/mod.rs b/crates/nvisy-runtime/src/node/mod.rs deleted file mode 100644 index 3f3f984..0000000 --- a/crates/nvisy-runtime/src/node/mod.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Node types for workflow graphs. -//! -//! This module provides the core node abstractions: -//! - [`NodeId`]: Unique identifier for nodes -//! - [`NodeData`]: Data associated with each node (Input, Transformer, Output) - -mod data; -mod id; -pub mod input; -pub mod output; -pub mod provider; -pub mod transformer; - -pub use data::NodeData; -pub use id::NodeId; -pub use input::InputNode; -pub use output::OutputNode; -pub use provider::{ - AzblobCredentials, AzblobParams, CredentialsRegistry, GcsCredentials, GcsParams, - MysqlCredentials, MysqlParams, PostgresCredentials, PostgresParams, ProviderCredentials, - ProviderParams, S3Credentials, S3Params, -}; -pub use transformer::{TransformerConfig, TransformerNode}; diff --git a/crates/nvisy-runtime/src/node/provider/credentials.rs b/crates/nvisy-runtime/src/node/provider/credentials.rs deleted file mode 100644 index 03026be..0000000 --- a/crates/nvisy-runtime/src/node/provider/credentials.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Provider credentials (sensitive, stored per workspace). - -use derive_more::From; -use serde::{Deserialize, Serialize}; - -/// Provider credentials (sensitive). -#[derive(Debug, Clone, From, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub enum ProviderCredentials { - /// Amazon S3 credentials. - S3(S3Credentials), - /// Google Cloud Storage credentials. - Gcs(GcsCredentials), - /// Azure Blob Storage credentials. - Azblob(AzblobCredentials), - /// PostgreSQL credentials. - Postgres(PostgresCredentials), - /// MySQL credentials. - Mysql(MysqlCredentials), -} - -/// Amazon S3 credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct S3Credentials { - /// AWS region. - pub region: String, - /// Access key ID. - pub access_key_id: String, - /// Secret access key. - pub secret_access_key: String, - /// Custom endpoint URL (for S3-compatible storage like MinIO, R2). - #[serde(skip_serializing_if = "Option::is_none")] - pub endpoint: Option, -} - -/// Google Cloud Storage credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GcsCredentials { - /// Service account credentials JSON. - pub credentials_json: String, -} - -/// Azure Blob Storage credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AzblobCredentials { - /// Storage account name. - pub account_name: String, - /// Account key for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub account_key: Option, - /// SAS token for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub sas_token: Option, -} - -/// PostgreSQL credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PostgresCredentials { - /// Connection string (e.g., "postgresql://user:pass@host:5432/db"). - pub connection_string: String, -} - -/// MySQL credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MysqlCredentials { - /// Connection string (e.g., "mysql://user:pass@host:3306/db"). - pub connection_string: String, -} diff --git a/crates/nvisy-runtime/src/node/provider/params.rs b/crates/nvisy-runtime/src/node/provider/params.rs deleted file mode 100644 index 50938b3..0000000 --- a/crates/nvisy-runtime/src/node/provider/params.rs +++ /dev/null @@ -1,203 +0,0 @@ -//! Provider parameters (non-sensitive, part of node definition). - -use derive_more::From; -use nvisy_dal::DataTypeId; -use nvisy_dal::provider::{ - AzblobConfig, GcsConfig, MysqlConfig, PostgresConfig, ProviderConfig, S3Config, -}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::{ - AzblobCredentials, GcsCredentials, MysqlCredentials, PostgresCredentials, ProviderCredentials, - S3Credentials, -}; - -/// Provider parameters with credentials reference. -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub enum ProviderParams { - /// Amazon S3 storage. - S3(S3Params), - /// Google Cloud Storage. - Gcs(GcsParams), - /// Azure Blob Storage. - Azblob(AzblobParams), - /// PostgreSQL database. - Postgres(PostgresParams), - /// MySQL database. - Mysql(MysqlParams), -} - -impl ProviderParams { - /// Returns the credentials ID for this provider. - pub fn credentials_id(&self) -> Uuid { - match self { - Self::S3(p) => p.credentials_id, - Self::Gcs(p) => p.credentials_id, - Self::Azblob(p) => p.credentials_id, - Self::Postgres(p) => p.credentials_id, - Self::Mysql(p) => p.credentials_id, - } - } - - /// Returns the output data type for this provider. - pub const fn output_type(&self) -> DataTypeId { - match self { - Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, - Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, - } - } - - /// Combines params with credentials to create a full provider config. - /// - /// # Panics - /// - /// Panics if the credentials type doesn't match the params type. - pub fn into_config(self, credentials: ProviderCredentials) -> ProviderConfig { - match (self, credentials) { - (Self::S3(p), ProviderCredentials::S3(c)) => ProviderConfig::S3(p.into_config(c)), - (Self::Gcs(p), ProviderCredentials::Gcs(c)) => ProviderConfig::Gcs(p.into_config(c)), - (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { - ProviderConfig::Azblob(p.into_config(c)) - } - (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { - ProviderConfig::Postgres(p.into_config(c)) - } - (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { - ProviderConfig::Mysql(p.into_config(c)) - } - _ => panic!("credentials type mismatch"), - } - } -} - -/// Amazon S3 parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct S3Params { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Bucket name. - pub bucket: String, - /// Path prefix within the bucket. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} - -impl S3Params { - fn into_config(self, credentials: S3Credentials) -> S3Config { - let mut config = S3Config::new(self.bucket, credentials.region) - .with_credentials(credentials.access_key_id, credentials.secret_access_key); - - if let Some(endpoint) = credentials.endpoint { - config = config.with_endpoint(endpoint); - } - if let Some(prefix) = self.prefix { - config = config.with_prefix(prefix); - } - - config - } -} - -/// Google Cloud Storage parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct GcsParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Bucket name. - pub bucket: String, - /// Path prefix within the bucket. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} - -impl GcsParams { - fn into_config(self, credentials: GcsCredentials) -> GcsConfig { - let mut config = GcsConfig::new(self.bucket).with_credentials(credentials.credentials_json); - - if let Some(prefix) = self.prefix { - config = config.with_prefix(prefix); - } - - config - } -} - -/// Azure Blob Storage parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct AzblobParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Container name. - pub container: String, - /// Path prefix within the container. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} - -impl AzblobParams { - fn into_config(self, credentials: AzblobCredentials) -> AzblobConfig { - let mut config = AzblobConfig::new(credentials.account_name, self.container); - - if let Some(account_key) = credentials.account_key { - config = config.with_account_key(account_key); - } - if let Some(sas_token) = credentials.sas_token { - config = config.with_sas_token(sas_token); - } - if let Some(prefix) = self.prefix { - config = config.with_prefix(prefix); - } - - config - } -} - -/// PostgreSQL parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PostgresParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Table name. - pub table: String, - /// Schema name. - #[serde(skip_serializing_if = "Option::is_none")] - pub schema: Option, -} - -impl PostgresParams { - fn into_config(self, credentials: PostgresCredentials) -> PostgresConfig { - let mut config = PostgresConfig::new(credentials.connection_string).with_table(self.table); - - if let Some(schema) = self.schema { - config = config.with_schema(schema); - } - - config - } -} - -/// MySQL parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MysqlParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Table name. - pub table: String, - /// Database name. - #[serde(skip_serializing_if = "Option::is_none")] - pub database: Option, -} - -impl MysqlParams { - fn into_config(self, credentials: MysqlCredentials) -> MysqlConfig { - let mut config = MysqlConfig::new(credentials.connection_string).with_table(self.table); - - if let Some(database) = self.database { - config = config.with_database(database); - } - - config - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/azblob.rs b/crates/nvisy-runtime/src/provider/backend/azblob.rs new file mode 100644 index 0000000..cdeb0b7 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/azblob.rs @@ -0,0 +1,49 @@ +//! Azure Blob Storage provider. + +use nvisy_dal::provider::AzblobConfig; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Azure Blob Storage credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AzblobCredentials { + /// Storage account name. + pub account_name: String, + /// Account key for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub account_key: Option, + /// SAS token for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub sas_token: Option, +} + +/// Azure Blob Storage parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct AzblobParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Container name. + pub container: String, + /// Path prefix within the container. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl AzblobParams { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: AzblobCredentials) -> AzblobConfig { + let mut config = AzblobConfig::new(credentials.account_name, self.container); + + if let Some(account_key) = credentials.account_key { + config = config.with_account_key(account_key); + } + if let Some(sas_token) = credentials.sas_token { + config = config.with_sas_token(sas_token); + } + if let Some(prefix) = self.prefix { + config = config.with_prefix(prefix); + } + + config + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/gcs.rs b/crates/nvisy-runtime/src/provider/backend/gcs.rs new file mode 100644 index 0000000..8984f84 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/gcs.rs @@ -0,0 +1,37 @@ +//! Google Cloud Storage provider. + +use nvisy_dal::provider::GcsConfig; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Google Cloud Storage credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GcsCredentials { + /// Service account credentials JSON. + pub credentials_json: String, +} + +/// Google Cloud Storage parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct GcsParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Bucket name. + pub bucket: String, + /// Path prefix within the bucket. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl GcsParams { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: GcsCredentials) -> GcsConfig { + let mut config = GcsConfig::new(self.bucket).with_credentials(credentials.credentials_json); + + if let Some(prefix) = self.prefix { + config = config.with_prefix(prefix); + } + + config + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/milvus.rs b/crates/nvisy-runtime/src/provider/backend/milvus.rs new file mode 100644 index 0000000..5eefc42 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/milvus.rs @@ -0,0 +1,62 @@ +//! Milvus vector database provider. + +use nvisy_dal::provider::MilvusConfig; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Default Milvus port. +fn default_milvus_port() -> u16 { + 19530 +} + +/// Milvus credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MilvusCredentials { + /// Milvus server host. + pub host: String, + /// Milvus server port. + #[serde(default = "default_milvus_port")] + pub port: u16, + /// Username for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub username: Option, + /// Password for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub password: Option, +} + +/// Milvus parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MilvusParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Collection name. + pub collection: String, + /// Database name. + #[serde(skip_serializing_if = "Option::is_none")] + pub database: Option, + /// Vector dimensions. + #[serde(skip_serializing_if = "Option::is_none")] + pub dimensions: Option, +} + +impl MilvusParams { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: MilvusCredentials) -> MilvusConfig { + let mut config = MilvusConfig::new(credentials.host) + .with_port(credentials.port) + .with_collection(self.collection); + + if let Some((username, password)) = credentials.username.zip(credentials.password) { + config = config.with_credentials(username, password); + } + if let Some(database) = self.database { + config = config.with_database(database); + } + if let Some(dimensions) = self.dimensions { + config = config.with_dimensions(dimensions); + } + + config + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/mod.rs b/crates/nvisy-runtime/src/provider/backend/mod.rs new file mode 100644 index 0000000..6da1d7c --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/mod.rs @@ -0,0 +1,32 @@ +//! Backend provider implementations. +//! +//! Each provider file contains credentials and params for a specific backend: +//! - [`s3`]: Amazon S3 +//! - [`gcs`]: Google Cloud Storage +//! - [`azblob`]: Azure Blob Storage +//! - [`postgres`]: PostgreSQL +//! - [`mysql`]: MySQL +//! - [`qdrant`]: Qdrant vector database +//! - [`pinecone`]: Pinecone vector database +//! - [`milvus`]: Milvus vector database +//! - [`pgvector`]: pgvector (PostgreSQL extension) + +mod azblob; +mod gcs; +mod milvus; +mod mysql; +mod pgvector; +mod pinecone; +mod postgres; +mod qdrant; +mod s3; + +pub use azblob::{AzblobCredentials, AzblobParams}; +pub use gcs::{GcsCredentials, GcsParams}; +pub use milvus::{MilvusCredentials, MilvusParams}; +pub use mysql::{MysqlCredentials, MysqlParams}; +pub use pgvector::{PgVectorCredentials, PgVectorParams}; +pub use pinecone::{PineconeCredentials, PineconeParams}; +pub use postgres::{PostgresCredentials, PostgresParams}; +pub use qdrant::{QdrantCredentials, QdrantParams}; +pub use s3::{S3Credentials, S3Params}; diff --git a/crates/nvisy-runtime/src/provider/backend/mysql.rs b/crates/nvisy-runtime/src/provider/backend/mysql.rs new file mode 100644 index 0000000..51e1b95 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/mysql.rs @@ -0,0 +1,37 @@ +//! MySQL provider. + +use nvisy_dal::provider::MysqlConfig; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// MySQL credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MysqlCredentials { + /// Connection string (e.g., "mysql://user:pass@host:3306/db"). + pub connection_string: String, +} + +/// MySQL parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MysqlParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Table name. + pub table: String, + /// Database name. + #[serde(skip_serializing_if = "Option::is_none")] + pub database: Option, +} + +impl MysqlParams { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: MysqlCredentials) -> MysqlConfig { + let mut config = MysqlConfig::new(credentials.connection_string).with_table(self.table); + + if let Some(database) = self.database { + config = config.with_database(database); + } + + config + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/pgvector.rs b/crates/nvisy-runtime/src/provider/backend/pgvector.rs new file mode 100644 index 0000000..e051306 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/pgvector.rs @@ -0,0 +1,30 @@ +//! pgvector (PostgreSQL extension) provider. + +use nvisy_dal::provider::PgVectorConfig; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// pgvector credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PgVectorCredentials { + /// PostgreSQL connection URL. + pub connection_url: String, +} + +/// pgvector parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PgVectorParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Table name. + pub table: String, + /// Vector dimensions. + pub dimensions: usize, +} + +impl PgVectorParams { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: PgVectorCredentials) -> PgVectorConfig { + PgVectorConfig::new(credentials.connection_url, self.dimensions).with_table(self.table) + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/pinecone.rs b/crates/nvisy-runtime/src/provider/backend/pinecone.rs new file mode 100644 index 0000000..f09fb99 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/pinecone.rs @@ -0,0 +1,46 @@ +//! Pinecone vector database provider. + +use nvisy_dal::provider::PineconeConfig; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Pinecone credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PineconeCredentials { + /// Pinecone API key. + pub api_key: String, + /// Environment (e.g., "us-east-1-aws"). + pub environment: String, +} + +/// Pinecone parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PineconeParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Index name. + pub index: String, + /// Namespace. + #[serde(skip_serializing_if = "Option::is_none")] + pub namespace: Option, + /// Vector dimensions. + #[serde(skip_serializing_if = "Option::is_none")] + pub dimensions: Option, +} + +impl PineconeParams { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: PineconeCredentials) -> PineconeConfig { + let mut config = + PineconeConfig::new(credentials.api_key, credentials.environment, self.index); + + if let Some(namespace) = self.namespace { + config = config.with_namespace(namespace); + } + if let Some(dimensions) = self.dimensions { + config = config.with_dimensions(dimensions); + } + + config + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/postgres.rs b/crates/nvisy-runtime/src/provider/backend/postgres.rs new file mode 100644 index 0000000..93c79d9 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/postgres.rs @@ -0,0 +1,37 @@ +//! PostgreSQL provider. + +use nvisy_dal::provider::PostgresConfig; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// PostgreSQL credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PostgresCredentials { + /// Connection string (e.g., "postgresql://user:pass@host:5432/db"). + pub connection_string: String, +} + +/// PostgreSQL parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PostgresParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Table name. + pub table: String, + /// Schema name. + #[serde(skip_serializing_if = "Option::is_none")] + pub schema: Option, +} + +impl PostgresParams { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: PostgresCredentials) -> PostgresConfig { + let mut config = PostgresConfig::new(credentials.connection_string).with_table(self.table); + + if let Some(schema) = self.schema { + config = config.with_schema(schema); + } + + config + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/qdrant.rs b/crates/nvisy-runtime/src/provider/backend/qdrant.rs new file mode 100644 index 0000000..0a115c3 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/qdrant.rs @@ -0,0 +1,43 @@ +//! Qdrant vector database provider. + +use nvisy_dal::provider::QdrantConfig; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Qdrant credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QdrantCredentials { + /// Qdrant server URL. + pub url: String, + /// API key for authentication. + #[serde(skip_serializing_if = "Option::is_none")] + pub api_key: Option, +} + +/// Qdrant parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct QdrantParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Collection name. + pub collection: String, + /// Vector dimensions. + #[serde(skip_serializing_if = "Option::is_none")] + pub dimensions: Option, +} + +impl QdrantParams { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: QdrantCredentials) -> QdrantConfig { + let mut config = QdrantConfig::new(credentials.url).with_collection(self.collection); + + if let Some(api_key) = credentials.api_key { + config = config.with_api_key(api_key); + } + if let Some(dimensions) = self.dimensions { + config = config.with_dimensions(dimensions); + } + + config + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/s3.rs b/crates/nvisy-runtime/src/provider/backend/s3.rs new file mode 100644 index 0000000..ec1218a --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/s3.rs @@ -0,0 +1,48 @@ +//! Amazon S3 provider. + +use nvisy_dal::provider::S3Config; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Amazon S3 credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct S3Credentials { + /// AWS region. + pub region: String, + /// Access key ID. + pub access_key_id: String, + /// Secret access key. + pub secret_access_key: String, + /// Custom endpoint URL (for S3-compatible storage like MinIO, R2). + #[serde(skip_serializing_if = "Option::is_none")] + pub endpoint: Option, +} + +/// Amazon S3 parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct S3Params { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Bucket name. + pub bucket: String, + /// Path prefix within the bucket. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, +} + +impl S3Params { + /// Combines params with credentials to create a full provider config. + pub fn into_config(self, credentials: S3Credentials) -> S3Config { + let mut config = S3Config::new(self.bucket, credentials.region) + .with_credentials(credentials.access_key_id, credentials.secret_access_key); + + if let Some(endpoint) = credentials.endpoint { + config = config.with_endpoint(endpoint); + } + if let Some(prefix) = self.prefix { + config = config.with_prefix(prefix); + } + + config + } +} diff --git a/crates/nvisy-runtime/src/provider/inputs.rs b/crates/nvisy-runtime/src/provider/inputs.rs new file mode 100644 index 0000000..e34707b --- /dev/null +++ b/crates/nvisy-runtime/src/provider/inputs.rs @@ -0,0 +1,183 @@ +//! Input provider types and implementations. + +use derive_more::From; +use nvisy_dal::core::Context; +use nvisy_dal::provider::{ + AzblobConfig, AzblobProvider, GcsConfig, GcsProvider, MysqlConfig, MysqlProvider, + PostgresConfig, PostgresProvider, S3Config, S3Provider, +}; +use nvisy_dal::{AnyDataValue, DataTypeId}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::ProviderCredentials; +use super::backend::{AzblobParams, GcsParams, MysqlParams, PostgresParams, S3Params}; +use crate::error::{WorkflowError, WorkflowResult}; + +/// Input provider parameters (storage backends only, no vector DBs). +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum InputProviderParams { + /// Amazon S3 storage. + S3(S3Params), + /// Google Cloud Storage. + Gcs(GcsParams), + /// Azure Blob Storage. + Azblob(AzblobParams), + /// PostgreSQL database. + Postgres(PostgresParams), + /// MySQL database. + Mysql(MysqlParams), +} + +impl InputProviderParams { + /// Returns the credentials ID for this provider. + pub fn credentials_id(&self) -> Uuid { + match self { + Self::S3(p) => p.credentials_id, + Self::Gcs(p) => p.credentials_id, + Self::Azblob(p) => p.credentials_id, + Self::Postgres(p) => p.credentials_id, + Self::Mysql(p) => p.credentials_id, + } + } + + /// Returns the provider kind as a string. + pub const fn kind(&self) -> &'static str { + match self { + Self::S3(_) => "s3", + Self::Gcs(_) => "gcs", + Self::Azblob(_) => "azblob", + Self::Postgres(_) => "postgres", + Self::Mysql(_) => "mysql", + } + } + + /// Returns the output data type for this provider. + pub const fn output_type(&self) -> DataTypeId { + match self { + Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, + Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, + } + } + + /// Combines params with credentials to create a full provider config. + /// + /// Returns an error if the credentials type doesn't match the params type. + pub fn into_config( + self, + credentials: ProviderCredentials, + ) -> WorkflowResult { + match (self, credentials) { + (Self::S3(p), ProviderCredentials::S3(c)) => { + Ok(InputProviderConfig::S3(p.into_config(c))) + } + (Self::Gcs(p), ProviderCredentials::Gcs(c)) => { + Ok(InputProviderConfig::Gcs(p.into_config(c))) + } + (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { + Ok(InputProviderConfig::Azblob(p.into_config(c))) + } + (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { + Ok(InputProviderConfig::Postgres(p.into_config(c))) + } + (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { + Ok(InputProviderConfig::Mysql(p.into_config(c))) + } + (params, creds) => Err(WorkflowError::Internal(format!( + "credentials type mismatch: expected '{}', got '{}'", + params.kind(), + creds.kind() + ))), + } + } +} + +/// Resolved input provider config (params + credentials combined). +#[derive(Debug, Clone)] +pub enum InputProviderConfig { + S3(S3Config), + Gcs(GcsConfig), + Azblob(AzblobConfig), + Postgres(PostgresConfig), + Mysql(MysqlConfig), +} + +impl InputProviderConfig { + /// Creates an input provider from this config. + pub fn into_provider(self) -> WorkflowResult { + match self { + Self::S3(config) => S3Provider::new(&config) + .map(InputProvider::S3) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Gcs(config) => GcsProvider::new(&config) + .map(InputProvider::Gcs) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Azblob(config) => AzblobProvider::new(&config) + .map(InputProvider::Azblob) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Postgres(config) => PostgresProvider::new(&config) + .map(InputProvider::Postgres) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Mysql(config) => MysqlProvider::new(&config) + .map(InputProvider::Mysql) + .map_err(|e| WorkflowError::Internal(e.to_string())), + } + } +} + +/// Input provider instance (created from config). +#[derive(Debug, Clone)] +pub enum InputProvider { + S3(S3Provider), + Gcs(GcsProvider), + Azblob(AzblobProvider), + Postgres(PostgresProvider), + Mysql(MysqlProvider), +} + +impl InputProvider { + /// Returns the output data type for this provider. + pub const fn output_type(&self) -> DataTypeId { + match self { + Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, + Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, + } + } + + /// Reads data from the provider, returning type-erased values. + pub async fn read(&self, ctx: &Context) -> WorkflowResult> { + match self { + Self::S3(p) => read_data!(p, ctx, Blob), + Self::Gcs(p) => read_data!(p, ctx, Blob), + Self::Azblob(p) => read_data!(p, ctx, Blob), + Self::Postgres(p) => read_data!(p, ctx, Record), + Self::Mysql(p) => read_data!(p, ctx, Record), + } + } +} + +/// Helper macro to read data from a provider and convert to AnyDataValue. +macro_rules! read_data { + ($provider:expr, $ctx:expr, $variant:ident) => {{ + use futures::StreamExt; + use nvisy_dal::core::DataInput; + use nvisy_dal::datatype::$variant; + + let stream = $provider + .read($ctx) + .await + .map_err(|e| WorkflowError::Internal(e.to_string()))?; + + let items: Vec<$variant> = stream + .collect::>() + .await + .into_iter() + .collect::, _>>() + .map_err(|e| WorkflowError::Internal(e.to_string()))?; + + Ok(items.into_iter().map(AnyDataValue::$variant).collect()) + }}; +} + +use read_data; diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs new file mode 100644 index 0000000..36fae48 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -0,0 +1,73 @@ +//! Provider params, credentials, and registry. +//! +//! This module separates provider configuration into: +//! - [`ProviderCredentials`]: Sensitive credentials (stored per workspace) +//! - [`InputProviderParams`] / [`OutputProviderParams`]: Non-sensitive parameters (part of node definition) +//! - [`CredentialsRegistry`]: In-memory registry for credentials lookup +//! +//! # Module Structure +//! +//! - [`backend`]: Individual provider implementations (credentials + params) +//! - [`inputs`]: Input provider types and read operations +//! - [`outputs`]: Output provider types and write operations +//! - [`registry`]: Credentials registry for workflow execution + +pub mod backend; +mod inputs; +mod outputs; +mod registry; +pub mod runtime; + +use derive_more::From; +use serde::{Deserialize, Serialize}; + +pub use backend::{ + AzblobCredentials, AzblobParams, GcsCredentials, GcsParams, MilvusCredentials, MilvusParams, + MysqlCredentials, MysqlParams, PgVectorCredentials, PgVectorParams, PineconeCredentials, + PineconeParams, PostgresCredentials, PostgresParams, QdrantCredentials, QdrantParams, + S3Credentials, S3Params, +}; +pub use inputs::{InputProvider, InputProviderConfig, InputProviderParams}; +pub use outputs::{OutputProvider, OutputProviderConfig, OutputProviderParams}; +pub use registry::CredentialsRegistry; + +/// Provider credentials (sensitive). +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "provider", rename_all = "snake_case")] +pub enum ProviderCredentials { + /// Amazon S3 credentials. + S3(S3Credentials), + /// Google Cloud Storage credentials. + Gcs(GcsCredentials), + /// Azure Blob Storage credentials. + Azblob(AzblobCredentials), + /// PostgreSQL credentials. + Postgres(PostgresCredentials), + /// MySQL credentials. + Mysql(MysqlCredentials), + /// Qdrant credentials. + Qdrant(QdrantCredentials), + /// Pinecone credentials. + Pinecone(PineconeCredentials), + /// Milvus credentials. + Milvus(MilvusCredentials), + /// pgvector credentials. + PgVector(PgVectorCredentials), +} + +impl ProviderCredentials { + /// Returns the provider kind as a string. + pub const fn kind(&self) -> &'static str { + match self { + Self::S3(_) => "s3", + Self::Gcs(_) => "gcs", + Self::Azblob(_) => "azblob", + Self::Postgres(_) => "postgres", + Self::Mysql(_) => "mysql", + Self::Qdrant(_) => "qdrant", + Self::Pinecone(_) => "pinecone", + Self::Milvus(_) => "milvus", + Self::PgVector(_) => "pgvector", + } + } +} diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs new file mode 100644 index 0000000..ef8fc1d --- /dev/null +++ b/crates/nvisy-runtime/src/provider/outputs.rs @@ -0,0 +1,241 @@ +//! Output provider types and implementations. + +use derive_more::From; +use nvisy_dal::core::Context; +use nvisy_dal::provider::{ + AzblobConfig, AzblobProvider, GcsConfig, GcsProvider, MilvusConfig, MilvusProvider, + MysqlConfig, MysqlProvider, PgVectorConfig, PgVectorProvider, PineconeConfig, PineconeProvider, + PostgresConfig, PostgresProvider, QdrantConfig, QdrantProvider, S3Config, S3Provider, +}; +use nvisy_dal::{AnyDataValue, DataTypeId}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::ProviderCredentials; +use super::backend::{ + AzblobParams, GcsParams, MilvusParams, MysqlParams, PgVectorParams, PineconeParams, + PostgresParams, QdrantParams, S3Params, +}; +use crate::error::{WorkflowError, WorkflowResult}; + +/// Output provider parameters (storage backends + vector DBs). +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum OutputProviderParams { + /// Amazon S3 storage. + S3(S3Params), + /// Google Cloud Storage. + Gcs(GcsParams), + /// Azure Blob Storage. + Azblob(AzblobParams), + /// PostgreSQL database. + Postgres(PostgresParams), + /// MySQL database. + Mysql(MysqlParams), + /// Qdrant vector database. + Qdrant(QdrantParams), + /// Pinecone vector database. + Pinecone(PineconeParams), + /// Milvus vector database. + Milvus(MilvusParams), + /// pgvector (PostgreSQL extension). + PgVector(PgVectorParams), +} + +impl OutputProviderParams { + /// Returns the credentials ID for this provider. + pub fn credentials_id(&self) -> Uuid { + match self { + Self::S3(p) => p.credentials_id, + Self::Gcs(p) => p.credentials_id, + Self::Azblob(p) => p.credentials_id, + Self::Postgres(p) => p.credentials_id, + Self::Mysql(p) => p.credentials_id, + Self::Qdrant(p) => p.credentials_id, + Self::Pinecone(p) => p.credentials_id, + Self::Milvus(p) => p.credentials_id, + Self::PgVector(p) => p.credentials_id, + } + } + + /// Returns the provider kind as a string. + pub const fn kind(&self) -> &'static str { + match self { + Self::S3(_) => "s3", + Self::Gcs(_) => "gcs", + Self::Azblob(_) => "azblob", + Self::Postgres(_) => "postgres", + Self::Mysql(_) => "mysql", + Self::Qdrant(_) => "qdrant", + Self::Pinecone(_) => "pinecone", + Self::Milvus(_) => "milvus", + Self::PgVector(_) => "pgvector", + } + } + + /// Returns the output data type for this provider. + pub const fn output_type(&self) -> DataTypeId { + match self { + Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, + Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, + Self::Qdrant(_) | Self::Pinecone(_) | Self::Milvus(_) | Self::PgVector(_) => { + DataTypeId::Embedding + } + } + } + + /// Combines params with credentials to create a full provider config. + /// + /// Returns an error if the credentials type doesn't match the params type. + pub fn into_config( + self, + credentials: ProviderCredentials, + ) -> WorkflowResult { + match (self, credentials) { + (Self::S3(p), ProviderCredentials::S3(c)) => { + Ok(OutputProviderConfig::S3(p.into_config(c))) + } + (Self::Gcs(p), ProviderCredentials::Gcs(c)) => { + Ok(OutputProviderConfig::Gcs(p.into_config(c))) + } + (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { + Ok(OutputProviderConfig::Azblob(p.into_config(c))) + } + (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { + Ok(OutputProviderConfig::Postgres(p.into_config(c))) + } + (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { + Ok(OutputProviderConfig::Mysql(p.into_config(c))) + } + (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => { + Ok(OutputProviderConfig::Qdrant(p.into_config(c))) + } + (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => { + Ok(OutputProviderConfig::Pinecone(p.into_config(c))) + } + (Self::Milvus(p), ProviderCredentials::Milvus(c)) => { + Ok(OutputProviderConfig::Milvus(p.into_config(c))) + } + (Self::PgVector(p), ProviderCredentials::PgVector(c)) => { + Ok(OutputProviderConfig::PgVector(p.into_config(c))) + } + (params, creds) => Err(WorkflowError::Internal(format!( + "credentials type mismatch: expected '{}', got '{}'", + params.kind(), + creds.kind() + ))), + } + } +} + +/// Resolved output provider config (params + credentials combined). +#[derive(Debug, Clone)] +pub enum OutputProviderConfig { + S3(S3Config), + Gcs(GcsConfig), + Azblob(AzblobConfig), + Postgres(PostgresConfig), + Mysql(MysqlConfig), + Qdrant(QdrantConfig), + Pinecone(PineconeConfig), + Milvus(MilvusConfig), + PgVector(PgVectorConfig), +} + +impl OutputProviderConfig { + /// Creates an output provider from this config. + pub async fn into_provider(self) -> WorkflowResult { + match self { + Self::S3(config) => S3Provider::new(&config) + .map(OutputProvider::S3) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Gcs(config) => GcsProvider::new(&config) + .map(OutputProvider::Gcs) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Azblob(config) => AzblobProvider::new(&config) + .map(OutputProvider::Azblob) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Postgres(config) => PostgresProvider::new(&config) + .map(OutputProvider::Postgres) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Mysql(config) => MysqlProvider::new(&config) + .map(OutputProvider::Mysql) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Qdrant(config) => QdrantProvider::new(&config) + .await + .map(OutputProvider::Qdrant) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Pinecone(config) => PineconeProvider::new(&config) + .await + .map(OutputProvider::Pinecone) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::Milvus(config) => MilvusProvider::new(&config) + .await + .map(OutputProvider::Milvus) + .map_err(|e| WorkflowError::Internal(e.to_string())), + Self::PgVector(config) => PgVectorProvider::new(&config) + .await + .map(OutputProvider::PgVector) + .map_err(|e| WorkflowError::Internal(e.to_string())), + } + } +} + +/// Output provider instance (created from config). +#[derive(Debug)] +pub enum OutputProvider { + S3(S3Provider), + Gcs(GcsProvider), + Azblob(AzblobProvider), + Postgres(PostgresProvider), + Mysql(MysqlProvider), + Qdrant(QdrantProvider), + Pinecone(PineconeProvider), + Milvus(MilvusProvider), + PgVector(PgVectorProvider), +} + +impl OutputProvider { + /// Returns the input data type expected by this provider. + pub const fn input_type(&self) -> DataTypeId { + match self { + Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, + Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, + Self::Qdrant(_) | Self::Pinecone(_) | Self::Milvus(_) | Self::PgVector(_) => { + DataTypeId::Embedding + } + } + } + + /// Writes data to the provider, accepting type-erased values. + pub async fn write(&self, ctx: &Context, data: Vec) -> WorkflowResult<()> { + match self { + Self::S3(p) => write_data!(p, ctx, data, Blob, into_blob), + Self::Gcs(p) => write_data!(p, ctx, data, Blob, into_blob), + Self::Azblob(p) => write_data!(p, ctx, data, Blob, into_blob), + Self::Postgres(p) => write_data!(p, ctx, data, Record, into_record), + Self::Mysql(p) => write_data!(p, ctx, data, Record, into_record), + Self::Qdrant(p) => write_data!(p, ctx, data, Embedding, into_embedding), + Self::Pinecone(p) => write_data!(p, ctx, data, Embedding, into_embedding), + Self::Milvus(p) => write_data!(p, ctx, data, Embedding, into_embedding), + Self::PgVector(p) => write_data!(p, ctx, data, Embedding, into_embedding), + } + } +} + +/// Helper macro to write data to a provider from AnyDataValue. +macro_rules! write_data { + ($provider:expr, $ctx:expr, $data:expr, $type:ident, $converter:ident) => {{ + use nvisy_dal::core::DataOutput; + use nvisy_dal::datatype::$type; + + let items: Vec<$type> = $data.into_iter().filter_map(|v| v.$converter()).collect(); + + $provider + .write($ctx, items) + .await + .map_err(|e| WorkflowError::Internal(e.to_string())) + }}; +} + +use write_data; diff --git a/crates/nvisy-runtime/src/node/provider/mod.rs b/crates/nvisy-runtime/src/provider/registry.rs similarity index 74% rename from crates/nvisy-runtime/src/node/provider/mod.rs rename to crates/nvisy-runtime/src/provider/registry.rs index 5f60aba..7567781 100644 --- a/crates/nvisy-runtime/src/node/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/registry.rs @@ -1,22 +1,10 @@ -//! Provider params, credentials, and registry. -//! -//! This module separates provider configuration into: -//! - [`ProviderParams`]: Non-sensitive parameters (part of node definition) -//! - [`ProviderCredentials`]: Sensitive credentials (stored per workspace) -//! - [`CredentialsRegistry`]: In-memory registry for credentials lookup - -mod credentials; -mod params; +//! Credentials registry for workflow execution. use std::collections::HashMap; -pub use credentials::{ - AzblobCredentials, GcsCredentials, MysqlCredentials, PostgresCredentials, ProviderCredentials, - S3Credentials, -}; -pub use params::{AzblobParams, GcsParams, MysqlParams, PostgresParams, ProviderParams, S3Params}; use uuid::Uuid; +use super::ProviderCredentials; use crate::error::{WorkflowError, WorkflowResult}; /// In-memory credentials registry. diff --git a/crates/nvisy-runtime/src/runtime/config.rs b/crates/nvisy-runtime/src/provider/runtime/config.rs similarity index 100% rename from crates/nvisy-runtime/src/runtime/config.rs rename to crates/nvisy-runtime/src/provider/runtime/config.rs diff --git a/crates/nvisy-runtime/src/runtime/mod.rs b/crates/nvisy-runtime/src/provider/runtime/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/runtime/mod.rs rename to crates/nvisy-runtime/src/provider/runtime/mod.rs diff --git a/crates/nvisy-runtime/src/runtime/service.rs b/crates/nvisy-runtime/src/provider/runtime/service.rs similarity index 100% rename from crates/nvisy-runtime/src/runtime/service.rs rename to crates/nvisy-runtime/src/provider/runtime/service.rs diff --git a/crates/nvisy-runtime/stream/event.rs b/crates/nvisy-runtime/stream/event.rs new file mode 100644 index 0000000..164e3d7 --- /dev/null +++ b/crates/nvisy-runtime/stream/event.rs @@ -0,0 +1,118 @@ +//! Event types for stream processing. +//! +//! This module contains common event types and the file job type +//! used in processing pipelines. + +use jiff::Timestamp; +#[cfg(feature = "schema")] +use schemars::JsonSchema; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// File processing job. +/// +/// Represents a unit of work in a file processing pipeline. +/// Each job targets a specific file and carries a generic payload +/// that defines the processing parameters. +/// +/// The generic parameter `T` is the job-specific data payload. +/// Callers define their own payload types for different pipeline stages. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[cfg_attr(feature = "schema", derive(JsonSchema))] +#[serde(bound = "T: Serialize + DeserializeOwned")] +pub struct FileJob { + /// Unique job identifier (UUID v7 for time-ordering). + pub id: Uuid, + /// Database file ID to process. + pub file_id: Uuid, + /// Storage path in NATS object store (DocumentKey encoded). + pub object_key: String, + /// File extension for format detection. + pub file_extension: String, + /// Job-specific data payload. + pub data: T, + /// When the job was created. + pub created_at: Timestamp, + /// NATS subject to publish result to (for internal job chaining). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub callback_subject: Option, + /// Idempotency key to prevent duplicate job processing. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub idempotency_key: Option, +} + +impl FileJob { + /// Creates a new file job with the given data payload. + pub fn new(file_id: Uuid, object_key: String, file_extension: String, data: T) -> Self { + Self { + id: Uuid::now_v7(), + file_id, + object_key, + file_extension, + data, + created_at: Timestamp::now(), + callback_subject: None, + idempotency_key: None, + } + } + + /// Sets a callback subject for job chaining. + pub fn with_callback(mut self, subject: impl Into) -> Self { + self.callback_subject = Some(subject.into()); + self + } + + /// Sets an idempotency key. + pub fn with_idempotency_key(mut self, key: impl Into) -> Self { + self.idempotency_key = Some(key.into()); + self + } + + /// Returns job age since creation. + pub fn age(&self) -> std::time::Duration { + let now = Timestamp::now(); + let signed_dur = now.duration_since(self.created_at); + std::time::Duration::from_secs(signed_dur.as_secs().max(0) as u64) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] + struct TestPayload { + message: String, + } + + #[test] + fn test_serialization_roundtrip() { + let file_id = Uuid::now_v7(); + let job = FileJob::new( + file_id, + "path".to_string(), + "pdf".to_string(), + TestPayload { + message: "hello".to_string(), + }, + ); + + let json = serde_json::to_string(&job).unwrap(); + let parsed: FileJob = serde_json::from_str(&json).unwrap(); + + assert_eq!(job.file_id, parsed.file_id); + assert_eq!(job.data, parsed.data); + } + + #[test] + fn test_with_unit_payload() { + let file_id = Uuid::now_v7(); + let job: FileJob<()> = FileJob::new(file_id, "path".to_string(), "pdf".to_string(), ()); + + let json = serde_json::to_string(&job).unwrap(); + let parsed: FileJob<()> = serde_json::from_str(&json).unwrap(); + + assert_eq!(job.file_id, parsed.file_id); + } +} diff --git a/crates/nvisy-runtime/stream/event_pub.rs b/crates/nvisy-runtime/stream/event_pub.rs new file mode 100644 index 0000000..ee826b7 --- /dev/null +++ b/crates/nvisy-runtime/stream/event_pub.rs @@ -0,0 +1,76 @@ +//! Generic event stream publisher. + +use std::marker::PhantomData; + +use async_nats::jetstream::Context; +use derive_more::{Deref, DerefMut}; +use serde::Serialize; + +use super::event_stream::EventStream; +use super::stream_pub::StreamPublisher; +use crate::Result; + +/// Generic event publisher for delivering typed events to workers. +/// +/// This publisher is generic over: +/// - `T`: The event/message type to publish +/// - `S`: The stream configuration (determines stream name, subject, etc.) +#[derive(Debug, Clone, Deref, DerefMut)] +pub struct EventPublisher +where + T: Serialize + Send + Sync + 'static, + S: EventStream, +{ + #[deref] + #[deref_mut] + publisher: StreamPublisher, + _stream: PhantomData, +} + +impl EventPublisher +where + T: Serialize + Send + Sync + 'static, + S: EventStream, +{ + /// Create a new event publisher for the stream type. + pub(crate) async fn new(jetstream: &Context) -> Result { + let publisher = StreamPublisher::new(jetstream, S::NAME).await?; + Ok(Self { + publisher, + _stream: PhantomData, + }) + } + + /// Publish an event to the stream's configured subject. + pub async fn publish(&self, event: &T) -> Result<()> { + self.publisher.publish(S::SUBJECT, event).await + } + + /// Publish an event with a sub-subject appended to the stream subject. + /// + /// Events are published to `{stream_subject}.{sub_subject}`. + pub async fn publish_to(&self, sub_subject: &str, event: &T) -> Result<()> { + let subject = format!("{}.{}", S::SUBJECT, sub_subject); + self.publisher.publish(&subject, event).await + } + + /// Publish multiple events to the stream's configured subject. + pub async fn publish_batch(&self, events: &[T]) -> Result<()> + where + T: Clone, + { + self.publisher.publish_batch(S::SUBJECT, events).await + } + + /// Returns the stream name. + #[inline] + pub fn stream_name(&self) -> &'static str { + S::NAME + } + + /// Returns the subject. + #[inline] + pub fn subject(&self) -> &'static str { + S::SUBJECT + } +} diff --git a/crates/nvisy-runtime/stream/event_stream.rs b/crates/nvisy-runtime/stream/event_stream.rs new file mode 100644 index 0000000..3fb0efb --- /dev/null +++ b/crates/nvisy-runtime/stream/event_stream.rs @@ -0,0 +1,74 @@ +//! Event stream configuration for NATS JetStream. + +use std::time::Duration; + +/// Marker trait for event streams. +/// +/// This trait defines the configuration for a NATS JetStream stream. +pub trait EventStream: Clone + Send + Sync + 'static { + /// Stream name used in NATS JetStream. + const NAME: &'static str; + + /// Subject pattern for publishing/subscribing to this stream. + const SUBJECT: &'static str; + + /// Maximum age for messages in this stream. + /// Returns `None` for streams where messages should not expire. + const MAX_AGE: Option; + + /// Default consumer name for this stream. + const CONSUMER_NAME: &'static str; +} + +/// Stream for file processing jobs. +/// +/// Messages expire after 7 days. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct FileStream; + +impl EventStream for FileStream { + const CONSUMER_NAME: &'static str = "file-worker"; + const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); + const NAME: &'static str = "FILE_JOBS"; + const SUBJECT: &'static str = "file.jobs"; +} + +/// Stream for webhook delivery. +/// +/// Messages expire after 1 day. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub struct WebhookStream; + +impl EventStream for WebhookStream { + const CONSUMER_NAME: &'static str = "webhook-worker"; + const MAX_AGE: Option = Some(Duration::from_secs(24 * 60 * 60)); + const NAME: &'static str = "WEBHOOKS"; + const SUBJECT: &'static str = "webhooks"; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_file_stream() { + assert_eq!(FileStream::NAME, "FILE_JOBS"); + assert_eq!(FileStream::SUBJECT, "file.jobs"); + assert_eq!( + FileStream::MAX_AGE, + Some(Duration::from_secs(7 * 24 * 60 * 60)) + ); + assert_eq!(FileStream::CONSUMER_NAME, "file-worker"); + } + + #[test] + fn test_webhook_stream() { + assert_eq!(WebhookStream::NAME, "WEBHOOKS"); + assert_eq!(WebhookStream::SUBJECT, "webhooks"); + assert_eq!( + WebhookStream::MAX_AGE, + Some(Duration::from_secs(24 * 60 * 60)) + ); + assert_eq!(WebhookStream::CONSUMER_NAME, "webhook-worker"); + } +} diff --git a/crates/nvisy-runtime/stream/event_sub.rs b/crates/nvisy-runtime/stream/event_sub.rs new file mode 100644 index 0000000..974fd59 --- /dev/null +++ b/crates/nvisy-runtime/stream/event_sub.rs @@ -0,0 +1,63 @@ +//! Generic event stream subscriber. + +use std::marker::PhantomData; + +use async_nats::jetstream::Context; +use derive_more::{Deref, DerefMut}; +use serde::de::DeserializeOwned; + +use super::event_stream::EventStream; +use super::stream_sub::StreamSubscriber; +use crate::Result; + +/// Generic event subscriber for consuming typed events. +/// +/// This subscriber is generic over: +/// - `T`: The event/message type to consume +/// - `S`: The stream configuration (determines stream name, subject, consumer name) +#[derive(Debug, Deref, DerefMut)] +pub struct EventSubscriber +where + T: DeserializeOwned + Send + Sync + 'static, + S: EventStream, +{ + #[deref] + #[deref_mut] + subscriber: StreamSubscriber, + _stream: PhantomData, +} + +impl EventSubscriber +where + T: DeserializeOwned + Send + Sync + 'static, + S: EventStream, +{ + /// Create a new event subscriber using the stream's default consumer name. + pub(crate) async fn new(jetstream: &Context) -> Result { + let subscriber = StreamSubscriber::new(jetstream, S::NAME, S::CONSUMER_NAME) + .await? + .with_filter_subject(format!("{}.>", S::NAME)); + Ok(Self { + subscriber, + _stream: PhantomData, + }) + } + + /// Returns the stream name. + #[inline] + pub fn stream_name(&self) -> &'static str { + S::NAME + } + + /// Returns the subject. + #[inline] + pub fn subject(&self) -> &'static str { + S::SUBJECT + } + + /// Returns the consumer name. + #[inline] + pub fn consumer_name(&self) -> &'static str { + S::CONSUMER_NAME + } +} diff --git a/crates/nvisy-runtime/stream/mod.rs b/crates/nvisy-runtime/stream/mod.rs new file mode 100644 index 0000000..1a939c2 --- /dev/null +++ b/crates/nvisy-runtime/stream/mod.rs @@ -0,0 +1,20 @@ +//! JetStream streams for real-time updates and distributed job processing. +//! +//! This module provides type-safe streaming capabilities for: +//! +//! - File processing jobs via [`FileJob`], [`EventPublisher`], [`EventSubscriber`] +//! - Generic event publishing and subscribing with stream configuration via [`EventStream`] + +mod event; +mod event_pub; +mod event_stream; +mod event_sub; +mod stream_pub; +mod stream_sub; + +pub use event::FileJob; +pub use event_pub::EventPublisher; +pub use event_stream::{EventStream, FileStream, WebhookStream}; +pub use event_sub::EventSubscriber; +pub use stream_pub::StreamPublisher; +pub use stream_sub::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; diff --git a/crates/nvisy-runtime/stream/stream_pub.rs b/crates/nvisy-runtime/stream/stream_pub.rs new file mode 100644 index 0000000..4dde9e0 --- /dev/null +++ b/crates/nvisy-runtime/stream/stream_pub.rs @@ -0,0 +1,232 @@ +//! Type-safe publisher for JetStream streams. + +use std::marker::PhantomData; +use std::sync::Arc; + +use async_nats::jetstream::{Context, stream}; +use serde::Serialize; +use tokio::sync::Semaphore; +use tracing::{debug, instrument}; + +use crate::{Error, Result, TRACING_TARGET_STREAM}; + +/// Inner data for StreamPublisher +#[derive(Debug)] +struct StreamPublisherInner { + jetstream: Context, + stream_name: String, +} + +/// Type-safe stream publisher with compile-time guarantees +/// +/// This publisher provides a generic interface over JetStream for a specific +/// serializable data type T, ensuring compile-time type safety for all publish +/// operations. The type parameter prevents mixing different message types. +#[derive(Debug, Clone)] +pub struct StreamPublisher { + inner: Arc, + _marker: PhantomData, +} + +impl StreamPublisher +where + T: Serialize + Send + Sync + 'static, +{ + /// Create a new type-safe stream publisher + #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] + pub(crate) async fn new(jetstream: &Context, stream_name: &str) -> Result { + let stream_config = stream::Config { + name: stream_name.to_string(), + description: Some(format!("Type-safe stream: {}", stream_name)), + subjects: vec![format!("{}.>", stream_name)], + max_age: std::time::Duration::from_secs(3600), // Keep messages for 1 hour + ..Default::default() + }; + + // Try to get existing stream first + match jetstream.get_stream(stream_name).await { + Ok(_) => { + debug!( + target: TRACING_TARGET_STREAM, + stream = %stream_name, + type_name = std::any::type_name::(), + "Using existing stream" + ); + } + Err(_) => { + // Stream doesn't exist, create it + debug!( + target: TRACING_TARGET_STREAM, + stream = %stream_name, + type_name = std::any::type_name::(), + max_age_secs = 3600, + "Creating new stream" + ); + jetstream + .create_stream(stream_config) + .await + .map_err(|e| Error::operation("stream_create", e.to_string()))?; + } + } + + Ok(Self { + inner: Arc::new(StreamPublisherInner { + jetstream: jetstream.clone(), + stream_name: stream_name.to_string(), + }), + _marker: PhantomData, + }) + } + + /// Publish an event to the stream + #[instrument(skip(self, event), target = TRACING_TARGET_STREAM)] + pub async fn publish(&self, subject: &str, event: &T) -> Result<()> { + let full_subject = format!("{}.{}", self.inner.stream_name, subject); + let payload = serde_json::to_vec(event).map_err(Error::Serialization)?; + let payload_size = payload.len(); + + self.inner + .jetstream + .publish(full_subject.clone(), payload.into()) + .await + .map_err(|e| Error::delivery_failed(&full_subject, e.to_string()))? + .await + .map_err(|e| Error::operation("stream_publish", e.to_string()))?; + + debug!( + target: TRACING_TARGET_STREAM, + subject = %full_subject, + payload_size = payload_size, + type_name = std::any::type_name::(), + "Published typed event" + ); + Ok(()) + } + + /// Publish multiple events in batch with parallel processing + #[instrument(skip(self, events), target = TRACING_TARGET_STREAM)] + pub async fn publish_batch(&self, subject: &str, events: &[T]) -> Result<()> + where + T: Clone, + { + self.publish_batch_parallel(subject, events, 10).await + } + + /// Publish multiple events in batch with configurable parallelism + #[instrument(skip(self, events), target = TRACING_TARGET_STREAM)] + pub async fn publish_batch_parallel( + &self, + subject: &str, + events: &[T], + parallelism: usize, + ) -> Result<()> + where + T: Clone, + { + if events.is_empty() { + return Ok(()); + } + + let count = events.len(); + let semaphore = Arc::new(Semaphore::new(parallelism)); + let mut tasks = Vec::with_capacity(events.len()); + + for event in events.iter() { + let event = event.clone(); + let subject = subject.to_string(); + let publisher = self.clone(); + let permit = semaphore.clone(); + + let task = tokio::spawn(async move { + let _permit = permit + .acquire() + .await + .map_err(|_| Error::operation("semaphore", "Failed to acquire permit"))?; + publisher.publish(&subject, &event).await + }); + + tasks.push(task); + } + + // Wait for all tasks and collect errors + let mut errors = Vec::new(); + for task in tasks { + match task.await { + Ok(Ok(())) => {} // Success + Ok(Err(e)) => errors.push(e), + Err(e) => errors.push(Error::operation("task_join", e.to_string())), + } + } + + if !errors.is_empty() { + return Err(Error::operation( + "batch_publish", + format!("Failed to publish {} out of {} events", errors.len(), count), + )); + } + + debug!( + target: TRACING_TARGET_STREAM, + count = count, + parallelism = parallelism, + stream = %self.inner.stream_name, + subject = %subject, + "Published batch of typed events in parallel" + ); + Ok(()) + } + + /// Get the stream name + pub fn stream_name(&self) -> &str { + &self.inner.stream_name + } + + /// Check if the stream is healthy and accessible + #[instrument(skip(self), target = TRACING_TARGET_STREAM)] + pub async fn health_check(&self) -> Result { + match self + .inner + .jetstream + .get_stream(&self.inner.stream_name) + .await + { + Ok(_) => { + debug!( + target: TRACING_TARGET_STREAM, + stream = %self.inner.stream_name, + "Stream health check passed" + ); + Ok(true) + } + Err(e) => { + debug!( + target: TRACING_TARGET_STREAM, + stream = %self.inner.stream_name, + error = %e, + "Stream health check failed" + ); + Ok(false) + } + } + } + + /// Get stream information + #[instrument(skip(self), target = TRACING_TARGET_STREAM)] + pub async fn stream_info(&self) -> Result { + let mut stream = self + .inner + .jetstream + .get_stream(&self.inner.stream_name) + .await + .map_err(|e| Error::stream_error(&self.inner.stream_name, e.to_string()))?; + + stream + .info() + .await + .map_err(|e| Error::operation("stream_info", e.to_string())) + .map(|info| (*info).clone()) + } +} + +#[cfg(test)] +mod tests {} diff --git a/crates/nvisy-runtime/stream/stream_sub.rs b/crates/nvisy-runtime/stream/stream_sub.rs new file mode 100644 index 0000000..458448d --- /dev/null +++ b/crates/nvisy-runtime/stream/stream_sub.rs @@ -0,0 +1,535 @@ +//! Type-safe subscriber for JetStream streams. + +use std::marker::PhantomData; +use std::sync::Arc; + +use async_nats::jetstream::consumer::{self, Consumer}; +use async_nats::jetstream::{self, Context, Message}; +use futures::StreamExt; +use serde::de::DeserializeOwned; +use tracing::{debug, instrument, warn}; + +use crate::{Error, Result, TRACING_TARGET_STREAM}; + +/// Inner data for StreamSubscriber. +#[derive(Debug, Clone)] +struct StreamSubscriberInner { + jetstream: Context, + stream_name: String, + consumer_name: String, + filter_subject: Option, +} + +/// Type-safe stream subscriber with compile-time guarantees. +/// +/// This subscriber provides a generic interface over JetStream for a specific +/// deserializable data type T, ensuring compile-time type safety for all receive +/// operations. The type parameter prevents mixing different message types. +#[derive(Debug, Clone)] +pub struct StreamSubscriber { + inner: Arc, + _marker: PhantomData, +} + +impl StreamSubscriber +where + T: DeserializeOwned + Send + Sync + 'static, +{ + /// Create a new type-safe stream subscriber. + #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] + pub(crate) async fn new( + jetstream: &Context, + stream_name: &str, + consumer_name: &str, + ) -> Result { + // Verify stream exists + jetstream + .get_stream(stream_name) + .await + .map_err(|e| Error::stream_error(stream_name, e.to_string()))?; + + debug!( + target: TRACING_TARGET_STREAM, + stream = %stream_name, + consumer = %consumer_name, + type_name = std::any::type_name::(), + "Created type-safe stream subscriber" + ); + + Ok(Self { + inner: Arc::new(StreamSubscriberInner { + jetstream: jetstream.clone(), + stream_name: stream_name.to_string(), + consumer_name: consumer_name.to_string(), + filter_subject: None, + }), + _marker: PhantomData, + }) + } + + /// Add a subject filter to the subscriber (builder pattern). + pub fn with_filter_subject(self, filter: impl Into) -> Self { + let mut inner = Arc::try_unwrap(self.inner).unwrap_or_else(|arc| (*arc).clone()); + inner.filter_subject = Some(filter.into()); + Self { + inner: Arc::new(inner), + _marker: PhantomData, + } + } + + /// Subscribe to the stream and get a typed message stream. + #[instrument(skip(self), target = TRACING_TARGET_STREAM)] + pub async fn subscribe(&self) -> Result> { + let mut consumer_config = consumer::pull::Config { + durable_name: Some(self.inner.consumer_name.clone()), + description: Some(format!("Consumer for stream {}", self.inner.stream_name)), + ack_policy: consumer::AckPolicy::Explicit, + ..Default::default() + }; + + if let Some(filter) = &self.inner.filter_subject { + consumer_config.filter_subject = filter.clone(); + } + + // Get or create consumer + let stream = self + .inner + .jetstream + .get_stream(&self.inner.stream_name) + .await + .map_err(|e| { + Error::stream_error( + &self.inner.stream_name, + format!("Failed to get stream: {}", e), + ) + })?; + + let consumer = stream + .get_or_create_consumer(&self.inner.consumer_name, consumer_config) + .await + .map_err(|e| { + Error::consumer_error( + &self.inner.consumer_name, + format!("Failed to create consumer: {}", e), + ) + })?; + + debug!( + target: TRACING_TARGET_STREAM, + stream = %self.inner.stream_name, + consumer = %self.inner.consumer_name, + "Subscribed to stream" + ); + + Ok(TypedMessageStream { + consumer, + _marker: PhantomData, + }) + } + + /// Subscribe with a batch size for fetching messages. + #[instrument(skip(self), target = TRACING_TARGET_STREAM)] + pub async fn subscribe_batch(&self, batch_size: usize) -> Result> { + let mut consumer_config = consumer::pull::Config { + durable_name: Some(self.inner.consumer_name.clone()), + description: Some(format!( + "Batch consumer for stream {}", + self.inner.stream_name + )), + ack_policy: consumer::AckPolicy::Explicit, + ..Default::default() + }; + + if let Some(filter) = &self.inner.filter_subject { + consumer_config.filter_subject = filter.clone(); + } + + let stream = self + .inner + .jetstream + .get_stream(&self.inner.stream_name) + .await + .map_err(|e| { + Error::stream_error( + &self.inner.stream_name, + format!("Failed to get stream: {}", e), + ) + })?; + + let consumer = stream + .get_or_create_consumer(&self.inner.consumer_name, consumer_config) + .await + .map_err(|e| { + Error::consumer_error( + &self.inner.consumer_name, + format!("Failed to create consumer: {}", e), + ) + })?; + + debug!( + target: TRACING_TARGET_STREAM, + stream = %self.inner.stream_name, + consumer = %self.inner.consumer_name, + batch_size = batch_size, + "Subscribed to stream with batching" + ); + + Ok(TypedBatchStream { + consumer, + batch_size, + _marker: PhantomData, + }) + } + + /// Get the stream name. + #[inline] + pub fn stream_name(&self) -> &str { + &self.inner.stream_name + } + + /// Get the consumer name. + #[inline] + pub fn consumer_name(&self) -> &str { + &self.inner.consumer_name + } + + /// Check if the stream and consumer are healthy and accessible. + #[instrument(skip(self), target = TRACING_TARGET_STREAM)] + pub async fn health_check(&self) -> Result { + match self + .inner + .jetstream + .get_stream(&self.inner.stream_name) + .await + { + Ok(stream) => match stream + .get_consumer::(&self.inner.consumer_name) + .await + { + Ok(_) => { + debug!( + target: TRACING_TARGET_STREAM, + stream = %self.inner.stream_name, + consumer = %self.inner.consumer_name, + "Subscriber health check passed" + ); + Ok(true) + } + Err(e) => { + debug!( + target: TRACING_TARGET_STREAM, + stream = %self.inner.stream_name, + consumer = %self.inner.consumer_name, + error = %e, + "Consumer health check failed" + ); + Ok(false) + } + }, + Err(e) => { + debug!( + target: TRACING_TARGET_STREAM, + stream = %self.inner.stream_name, + error = %e, + "Stream health check failed" + ); + Ok(false) + } + } + } + + /// Get consumer information. + #[instrument(skip(self), target = TRACING_TARGET_STREAM)] + pub async fn consumer_info(&self) -> Result { + let stream = self + .inner + .jetstream + .get_stream(&self.inner.stream_name) + .await + .map_err(|e| Error::stream_error(&self.inner.stream_name, e.to_string()))?; + + let mut consumer = stream + .get_consumer::(&self.inner.consumer_name) + .await + .map_err(|e| Error::consumer_error(&self.inner.consumer_name, e.to_string()))?; + + consumer + .info() + .await + .map_err(|e| Error::operation("consumer_info", e.to_string())) + .map(|info| (*info).clone()) + } +} + +/// Type-safe message stream wrapper. +pub struct TypedMessageStream { + consumer: Consumer, + _marker: PhantomData, +} + +impl TypedMessageStream +where + T: DeserializeOwned + Send + 'static, +{ + /// Fetch the next message from the stream with timeout. + pub async fn next_with_timeout( + &mut self, + timeout: std::time::Duration, + ) -> Result>> { + let result = tokio::time::timeout(timeout, self.next()).await; + match result { + Ok(msg_result) => msg_result, + Err(_) => Ok(None), // Timeout occurred + } + } + + /// Fetch the next message from the stream. + pub async fn next(&mut self) -> Result>> { + match self.consumer.messages().await { + Ok(mut messages) => { + if let Some(msg) = messages.next().await { + match msg { + Ok(message) => { + let payload: T = serde_json::from_slice(&message.payload)?; + + debug!( + target: TRACING_TARGET_STREAM, + subject = %message.subject, + "Received typed message" + ); + + Ok(Some(TypedMessage { payload, message })) + } + Err(e) => { + warn!( + target: TRACING_TARGET_STREAM, + error = %e, + "Error receiving message" + ); + Err(Error::operation("message_receive", e.to_string())) + } + } + } else { + Ok(None) + } + } + Err(e) => Err(Error::operation("messages_stream", e.to_string())), + } + } +} + +/// Type-safe batch message stream wrapper. +pub struct TypedBatchStream { + consumer: Consumer, + batch_size: usize, + _marker: PhantomData, +} + +impl TypedBatchStream +where + T: DeserializeOwned, +{ + /// Fetch the next batch of messages with timeout. + pub async fn next_batch_with_timeout( + &mut self, + timeout: std::time::Duration, + ) -> Result>> { + let result = tokio::time::timeout(timeout, self.next_batch()).await; + match result { + Ok(batch_result) => batch_result, + Err(_) => Ok(Vec::new()), // Timeout occurred, return empty batch + } + } + + /// Fetch the next batch of messages with custom batch size. + pub async fn next_batch_sized(&mut self, batch_size: usize) -> Result>> { + let mut batch = Vec::with_capacity(batch_size); + + match self + .consumer + .fetch() + .max_messages(batch_size) + .messages() + .await + { + Ok(mut messages) => { + while let Some(msg_result) = messages.next().await { + match msg_result { + Ok(message) => match serde_json::from_slice::(&message.payload) { + Ok(payload) => { + batch.push(TypedMessage { payload, message }); + } + Err(e) => { + warn!( + target: TRACING_TARGET_STREAM, + error = %e, + "Failed to deserialize message payload in custom batch" + ); + // Continue processing other messages + } + }, + Err(e) => { + warn!( + target: TRACING_TARGET_STREAM, + error = %e, + "Error receiving message in custom batch" + ); + } + } + } + + debug!( + target: TRACING_TARGET_STREAM, + batch_size = batch.len(), + requested_size = batch_size, + "Received custom-sized batch of typed messages" + ); + + Ok(batch) + } + Err(e) => Err(Error::operation("custom_batch_fetch", e.to_string())), + } + } + + /// Fetch the next batch of messages. + pub async fn next_batch(&mut self) -> Result>> { + let mut batch = Vec::with_capacity(self.batch_size); + + match self + .consumer + .fetch() + .max_messages(self.batch_size) + .messages() + .await + { + Ok(mut messages) => { + while let Some(msg_result) = messages.next().await { + match msg_result { + Ok(message) => match serde_json::from_slice::(&message.payload) { + Ok(payload) => { + batch.push(TypedMessage { payload, message }); + } + Err(e) => { + warn!( + target: TRACING_TARGET_STREAM, + error = %e, + "Failed to deserialize message payload" + ); + // Continue processing other messages + } + }, + Err(e) => { + warn!( + target: TRACING_TARGET_STREAM, + error = %e, + "Error receiving message in batch" + ); + } + } + } + + debug!( + target: TRACING_TARGET_STREAM, + batch_size = batch.len(), + "Received batch of typed messages" + ); + + Ok(batch) + } + Err(e) => Err(Error::operation("batch_fetch", e.to_string())), + } + } +} + +/// A typed message from the stream. +pub struct TypedMessage { + /// The deserialized payload. + pub payload: T, + /// The underlying NATS message for metadata and acknowledgment. + message: Message, +} + +impl TypedMessage { + /// Get the message subject. + pub fn subject(&self) -> &str { + &self.message.subject + } + + /// Get the message metadata. + pub fn info(&self) -> Result> { + self.message + .info() + .map_err(|e| Error::operation("message_info", e.to_string())) + } + + /// Acknowledge the message. + pub async fn ack(&mut self) -> Result<()> { + self.message + .ack() + .await + .map_err(|e| Error::operation("message_ack", e.to_string())) + } + + /// Negative acknowledge the message (trigger redelivery). + pub async fn nack(&mut self) -> Result<()> { + self.message + .ack_with(jetstream::AckKind::Nak(None)) + .await + .map_err(|e| Error::operation("message_nack", e.to_string())) + } + + /// Get a reference to the typed payload. + pub fn payload(&self) -> &T { + &self.payload + } + + /// Consume the message and return the payload. + pub fn into_payload(self) -> T { + self.payload + } + + /// Get message headers if available. + pub fn headers(&self) -> Option<&async_nats::HeaderMap> { + self.message.headers.as_ref() + } + + /// Get message sequence number. + pub fn sequence(&self) -> Result { + self.info() + .map(|info| info.stream_sequence) + .map_err(|e| Error::operation("get_sequence", e.to_string())) + } + + /// Check if this message is a redelivery. + pub fn is_redelivery(&self) -> Result { + self.info() + .map(|info| info.delivered > 1) + .map_err(|e| Error::operation("check_redelivery", e.to_string())) + } + + /// Get the number of delivery attempts. + pub fn delivery_count(&self) -> Result { + self.info() + .map(|info| info.delivered as usize) + .map_err(|e| Error::operation("get_delivery_count", e.to_string())) + } + + /// Acknowledge with explicit acknowledgment kind. + pub async fn ack_with(&mut self, ack_kind: jetstream::AckKind) -> Result<()> { + self.message + .ack_with(ack_kind) + .await + .map_err(|e| Error::operation("message_ack_with", e.to_string())) + } + + /// Double acknowledge (useful for at-least-once processing). + pub async fn double_ack(&mut self) -> Result<()> { + self.message + .double_ack() + .await + .map_err(|e| Error::operation("message_double_ack", e.to_string())) + } +} + +#[cfg(test)] +mod tests {} diff --git a/migrations/2025-05-21-222840_workspaces/up.sql b/migrations/2025-05-21-222840_workspaces/up.sql index 79d47dd..646dc84 100644 --- a/migrations/2025-05-21-222840_workspaces/up.sql +++ b/migrations/2025-05-21-222840_workspaces/up.sql @@ -498,9 +498,11 @@ CREATE TABLE workspace_webhooks ( -- Event configuration events WEBHOOK_EVENT[] NOT NULL DEFAULT '{}', headers JSONB NOT NULL DEFAULT '{}', + secret TEXT NOT NULL DEFAULT encode(gen_random_bytes(32), 'hex'), CONSTRAINT workspace_webhooks_events_not_empty CHECK (array_length(events, 1) > 0), CONSTRAINT workspace_webhooks_headers_size CHECK (length(headers::TEXT) BETWEEN 2 AND 4096), + CONSTRAINT workspace_webhooks_secret_length CHECK (length(secret) = 64), -- Webhook status status WEBHOOK_STATUS NOT NULL DEFAULT 'active', From 93e2aaf3cfe10d99111e18744a2d2e43e4dcbe26 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Wed, 21 Jan 2026 18:08:49 +0100 Subject: [PATCH 13/28] feat(rig): refactor provider module with type-safe models and feature-gated Ollama - Add provider/embedding module with EmbeddingModel, EmbeddingCredentials, EmbeddingProvider - Add provider/completion module with CompletionModel, CompletionCredentials - Add provider/splitting module (moved from rag/splitter) with improved observability - Feature-gate Ollama support behind 'ollama' feature flag (default enabled) - Remove old config.rs and registry.rs (replaced by type-safe enums) - Simplify error.rs with EmbeddingError variant - Update rig-core to 0.29 - Store rig EmbeddingModel directly in EmbeddingProvider variants (except Ollama) --- Cargo.lock | 7 +- Cargo.toml | 2 +- crates/nvisy-postgres/src/model/pipeline.rs | 31 +- crates/nvisy-postgres/src/query/pipeline.rs | 8 +- .../src/types/enums/pipeline_status.rs | 26 +- crates/nvisy-rig/Cargo.toml | 7 +- crates/nvisy-rig/src/chat/agent/context.rs | 3 +- crates/nvisy-rig/src/chat/agent/executor.rs | 31 +- crates/nvisy-rig/src/chat/agent/mod.rs | 44 +-- crates/nvisy-rig/src/chat/service.rs | 52 +--- crates/nvisy-rig/src/chat/stream.rs | 22 +- crates/nvisy-rig/src/error.rs | 49 +-- .../src/provider/completion/credentials.rs | 21 ++ .../nvisy-rig/src/provider/completion/mod.rs | 10 + .../src/provider/completion/model.rs | 156 ++++++++++ crates/nvisy-rig/src/provider/config.rs | 155 ---------- crates/nvisy-rig/src/provider/embedding.rs | 92 ------ .../src/provider/embedding/credentials.rs | 18 ++ .../nvisy-rig/src/provider/embedding/mod.rs | 11 + .../nvisy-rig/src/provider/embedding/model.rs | 154 ++++++++++ .../src/provider/embedding/provider.rs | 194 ++++++++++++ crates/nvisy-rig/src/provider/mod.rs | 24 +- crates/nvisy-rig/src/provider/registry.rs | 204 ------------- .../nvisy-rig/src/provider/splitting/chunk.rs | 43 +++ .../src/provider/splitting/metadata.rs | 40 +++ .../nvisy-rig/src/provider/splitting/mod.rs | 9 + .../src/provider/splitting/splitter.rs | 179 +++++++++++ crates/nvisy-rig/src/rag/indexer/mod.rs | 48 ++- crates/nvisy-rig/src/rag/mod.rs | 32 +- crates/nvisy-rig/src/rag/searcher/mod.rs | 6 +- crates/nvisy-rig/src/rag/splitter/chunk.rs | 33 -- crates/nvisy-rig/src/rag/splitter/metadata.rs | 32 -- crates/nvisy-rig/src/rag/splitter/mod.rs | 105 ------- crates/nvisy-rig/src/service/config.rs | 30 +- crates/nvisy-rig/src/service/rig.rs | 13 +- crates/nvisy-runtime/Cargo.toml | 2 + crates/nvisy-runtime/src/engine/context.rs | 61 +++- crates/nvisy-runtime/src/engine/executor.rs | 99 ++++-- crates/nvisy-runtime/src/graph/data.rs | 60 ---- crates/nvisy-runtime/src/graph/edge.rs | 11 + crates/nvisy-runtime/src/graph/id.rs | 61 ---- crates/nvisy-runtime/src/graph/input/mod.rs | 77 +++-- crates/nvisy-runtime/src/graph/mod.rs | 22 +- crates/nvisy-runtime/src/graph/node.rs | 139 +++++++++ crates/nvisy-runtime/src/graph/output/mod.rs | 77 +++-- crates/nvisy-runtime/src/graph/route/cache.rs | 33 ++ crates/nvisy-runtime/src/graph/route/mod.rs | 11 + .../nvisy-runtime/src/graph/route/switch.rs | 143 +++++++++ .../src/graph/transform/chunk.rs | 63 ++++ .../src/graph/transform/embedding.rs | 12 + .../src/graph/transform/enrich.rs | 14 + .../nvisy-runtime/src/graph/transform/mod.rs | 27 ++ .../src/graph/transform/partition.rs | 10 + .../src/graph/transformer/chunking.rs | 288 ------------------ .../src/graph/transformer/config.rs | 93 ------ .../src/graph/transformer/document.rs | 104 ------- .../src/graph/transformer/embedding.rs | 270 ---------------- .../src/graph/transformer/extraction.rs | 136 --------- .../src/graph/transformer/mod.rs | 88 ------ .../src/graph/transformer/processing.rs | 131 -------- .../src/graph/transformer/quality.rs | 147 --------- .../src/graph/transformer/routing.rs | 134 -------- crates/nvisy-runtime/src/graph/workflow.rs | 30 +- .../src/provider/runtime/config.rs | 4 +- crates/nvisy-server/Cargo.toml | 1 + crates/nvisy-server/src/handler/pipelines.rs | 9 +- .../src/handler/request/pipelines.rs | 24 +- .../src/handler/response/pipelines.rs | 34 +-- .../nvisy-server/src/middleware/constants.rs | 4 +- migrations/2026-01-19-045012_pipelines/up.sql | 2 +- 70 files changed, 1720 insertions(+), 2592 deletions(-) create mode 100644 crates/nvisy-rig/src/provider/completion/credentials.rs create mode 100644 crates/nvisy-rig/src/provider/completion/mod.rs create mode 100644 crates/nvisy-rig/src/provider/completion/model.rs delete mode 100644 crates/nvisy-rig/src/provider/config.rs delete mode 100644 crates/nvisy-rig/src/provider/embedding.rs create mode 100644 crates/nvisy-rig/src/provider/embedding/credentials.rs create mode 100644 crates/nvisy-rig/src/provider/embedding/mod.rs create mode 100644 crates/nvisy-rig/src/provider/embedding/model.rs create mode 100644 crates/nvisy-rig/src/provider/embedding/provider.rs delete mode 100644 crates/nvisy-rig/src/provider/registry.rs create mode 100644 crates/nvisy-rig/src/provider/splitting/chunk.rs create mode 100644 crates/nvisy-rig/src/provider/splitting/metadata.rs create mode 100644 crates/nvisy-rig/src/provider/splitting/mod.rs create mode 100644 crates/nvisy-rig/src/provider/splitting/splitter.rs delete mode 100644 crates/nvisy-rig/src/rag/splitter/chunk.rs delete mode 100644 crates/nvisy-rig/src/rag/splitter/metadata.rs delete mode 100644 crates/nvisy-rig/src/rag/splitter/mod.rs delete mode 100644 crates/nvisy-runtime/src/graph/data.rs delete mode 100644 crates/nvisy-runtime/src/graph/id.rs create mode 100644 crates/nvisy-runtime/src/graph/node.rs create mode 100644 crates/nvisy-runtime/src/graph/route/cache.rs create mode 100644 crates/nvisy-runtime/src/graph/route/mod.rs create mode 100644 crates/nvisy-runtime/src/graph/route/switch.rs create mode 100644 crates/nvisy-runtime/src/graph/transform/chunk.rs create mode 100644 crates/nvisy-runtime/src/graph/transform/embedding.rs create mode 100644 crates/nvisy-runtime/src/graph/transform/enrich.rs create mode 100644 crates/nvisy-runtime/src/graph/transform/mod.rs create mode 100644 crates/nvisy-runtime/src/graph/transform/partition.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/chunking.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/config.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/document.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/embedding.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/extraction.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/mod.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/processing.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/quality.rs delete mode 100644 crates/nvisy-runtime/src/graph/transformer/routing.rs diff --git a/Cargo.lock b/Cargo.lock index b2540c4..6a50ed9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3471,8 +3471,10 @@ dependencies = [ "derive_builder", "derive_more", "futures", + "jiff", "nvisy-core", "nvisy-dal", + "nvisy-rig", "nvisy-rt-core", "nvisy-rt-engine", "petgraph 0.8.3", @@ -3508,6 +3510,7 @@ dependencies = [ "jsonwebtoken 10.2.0", "nvisy-nats", "nvisy-postgres", + "nvisy-runtime", "nvisy-webhook", "rand 0.10.0-rc.6", "regex", @@ -4545,9 +4548,9 @@ dependencies = [ [[package]] name = "rig-core" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b1a48121c1ecd6f6ce59d64ec353c791aac6fc07bf4aa353380e8185659e6eb" +checksum = "7207790134ee24d87ac3d022c308e1a7c871219d139acf70d13be76c1f6919c5" dependencies = [ "as-any", "async-stream", diff --git a/Cargo.toml b/Cargo.toml index 576fc6d..8477672 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -136,7 +136,7 @@ text-splitter = { version = "0.29", features = [] } woothee = { version = "0.13", features = [] } # AI/ML frameworks -rig-core = { version = "0.28", default-features = false, features = ["reqwest-rustls"] } +rig-core = { version = "0.29", default-features = false, features = ["reqwest-rustls"] } # Storage abstractions and providers opendal = { version = "0.53", features = [] } diff --git a/crates/nvisy-postgres/src/model/pipeline.rs b/crates/nvisy-postgres/src/model/pipeline.rs index 399e067..b239bb0 100644 --- a/crates/nvisy-postgres/src/model/pipeline.rs +++ b/crates/nvisy-postgres/src/model/pipeline.rs @@ -87,9 +87,9 @@ impl Pipeline { self.status.is_draft() } - /// Returns whether the pipeline is active. - pub fn is_active(&self) -> bool { - self.status.is_active() + /// Returns whether the pipeline is enabled. + pub fn is_enabled(&self) -> bool { + self.status.is_enabled() } /// Returns whether the pipeline is disabled. @@ -97,35 +97,10 @@ impl Pipeline { self.status.is_disabled() } - /// Returns whether the pipeline can be executed. - pub fn is_runnable(&self) -> bool { - self.status.is_runnable() && !self.is_deleted() - } - - /// Returns whether the pipeline can be edited. - pub fn is_editable(&self) -> bool { - self.status.is_editable() && !self.is_deleted() - } - /// Returns whether the pipeline has a description. pub fn has_description(&self) -> bool { self.description.as_ref().is_some_and(|d| !d.is_empty()) } - - /// Returns whether the pipeline has custom metadata. - pub fn has_metadata(&self) -> bool { - !self.metadata.as_object().is_none_or(|obj| obj.is_empty()) - } - - /// Returns the steps from the definition, if any. - pub fn steps(&self) -> Option<&Vec> { - self.definition.get("steps")?.as_array() - } - - /// Returns the number of steps in the pipeline. - pub fn step_count(&self) -> usize { - self.steps().map_or(0, |s| s.len()) - } } impl HasCreatedAt for Pipeline { diff --git a/crates/nvisy-postgres/src/query/pipeline.rs b/crates/nvisy-postgres/src/query/pipeline.rs index c45332e..1789cc6 100644 --- a/crates/nvisy-postgres/src/query/pipeline.rs +++ b/crates/nvisy-postgres/src/query/pipeline.rs @@ -60,8 +60,8 @@ pub trait PipelineRepository { pagination: OffsetPagination, ) -> impl Future>> + Send; - /// Lists active pipelines in a workspace. - fn list_active_workspace_pipelines( + /// Lists enabled pipelines in a workspace. + fn list_enabled_workspace_pipelines( &mut self, workspace_id: Uuid, ) -> impl Future>> + Send; @@ -269,7 +269,7 @@ impl PipelineRepository for PgConnection { Ok(pipelines) } - async fn list_active_workspace_pipelines( + async fn list_enabled_workspace_pipelines( &mut self, workspace_id: Uuid, ) -> PgResult> { @@ -277,7 +277,7 @@ impl PipelineRepository for PgConnection { let pipelines = pipelines::table .filter(dsl::workspace_id.eq(workspace_id)) - .filter(dsl::status.eq(PipelineStatus::Active)) + .filter(dsl::status.eq(PipelineStatus::Enabled)) .filter(dsl::deleted_at.is_null()) .order(dsl::name.asc()) .select(Pipeline::as_select()) diff --git a/crates/nvisy-postgres/src/types/enums/pipeline_status.rs b/crates/nvisy-postgres/src/types/enums/pipeline_status.rs index a0e5a84..d41d586 100644 --- a/crates/nvisy-postgres/src/types/enums/pipeline_status.rs +++ b/crates/nvisy-postgres/src/types/enums/pipeline_status.rs @@ -9,7 +9,7 @@ use strum::{Display, EnumIter, EnumString}; /// Defines the lifecycle status of a pipeline definition. /// /// This enumeration corresponds to the `PIPELINE_STATUS` PostgreSQL enum and is used -/// to track whether a pipeline is being configured, active and ready to run, or disabled. +/// to track whether a pipeline is being configured, enabled and ready to run, or disabled. #[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] #[cfg_attr(feature = "schema", derive(JsonSchema))] #[derive(Serialize, Deserialize, DbEnum, Display, EnumIter, EnumString)] @@ -22,9 +22,9 @@ pub enum PipelineStatus { Draft, /// Pipeline is ready to run - #[db_rename = "active"] - #[serde(rename = "active")] - Active, + #[db_rename = "enabled"] + #[serde(rename = "enabled")] + Enabled, /// Pipeline is disabled #[db_rename = "disabled"] @@ -39,10 +39,10 @@ impl PipelineStatus { matches!(self, PipelineStatus::Draft) } - /// Returns whether the pipeline is active. + /// Returns whether the pipeline is enabled. #[inline] - pub fn is_active(self) -> bool { - matches!(self, PipelineStatus::Active) + pub fn is_enabled(self) -> bool { + matches!(self, PipelineStatus::Enabled) } /// Returns whether the pipeline is disabled. @@ -50,16 +50,4 @@ impl PipelineStatus { pub fn is_disabled(self) -> bool { matches!(self, PipelineStatus::Disabled) } - - /// Returns whether the pipeline can be executed. - #[inline] - pub fn is_runnable(self) -> bool { - matches!(self, PipelineStatus::Active) - } - - /// Returns whether the pipeline can be edited. - #[inline] - pub fn is_editable(self) -> bool { - matches!(self, PipelineStatus::Draft | PipelineStatus::Disabled) - } } diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml index dc85011..e532293 100644 --- a/crates/nvisy-rig/Cargo.toml +++ b/crates/nvisy-rig/Cargo.toml @@ -15,13 +15,16 @@ keywords = ["rig", "llm", "ai", "client", "rag"] categories = ["api-bindings", "web-programming::http-client"] [features] -## Default feature set (none for minimal dependencies) -default = [] +## Default feature set includes Ollama for local development +default = ["ollama"] ## CLI configuration support: enables clap derives for config types ## This allows config types to be used with command-line argument parsing config = ["dep:clap"] +## Ollama local model support +ollama = [] + [dependencies] # Internal crates nvisy-nats = { workspace = true } diff --git a/crates/nvisy-rig/src/chat/agent/context.rs b/crates/nvisy-rig/src/chat/agent/context.rs index d7468e4..9d9256d 100644 --- a/crates/nvisy-rig/src/chat/agent/context.rs +++ b/crates/nvisy-rig/src/chat/agent/context.rs @@ -1,6 +1,7 @@ //! Agent context for a single request. -use crate::rag::{RetrievedChunk, estimate_tokens}; +use crate::provider::estimate_tokens; +use crate::rag::RetrievedChunk; use crate::session::Session; /// Context for an agent request. diff --git a/crates/nvisy-rig/src/chat/agent/executor.rs b/crates/nvisy-rig/src/chat/agent/executor.rs index 02572fd..4bdb38e 100644 --- a/crates/nvisy-rig/src/chat/agent/executor.rs +++ b/crates/nvisy-rig/src/chat/agent/executor.rs @@ -7,57 +7,38 @@ use futures::stream::BoxStream; use super::{AgentConfig, AgentContext, ChatEvent}; use crate::Result; -use crate::provider::{ModelRef, ProviderRegistry}; +use crate::provider::CompletionModel; use crate::tool::ToolRegistry; /// Executor for running the agent loop. pub struct AgentExecutor { config: AgentConfig, - providers: Arc, tools: Arc, context: AgentContext, - model_override: Option, + model: CompletionModel, } impl AgentExecutor { /// Creates a new executor. pub fn new( config: AgentConfig, - providers: Arc, tools: Arc, context: AgentContext, - model_override: Option, + model: CompletionModel, ) -> Self { Self { config, - providers, tools, context, - model_override, + model, } } /// Runs the agent loop and returns a stream of events. pub async fn run(self) -> Result>> { - // TODO: Implement the actual agent loop: - // 1. Build the prompt with system message, context, and history - // 2. Stream completion from the provider - // 3. Parse tool calls from the response - // 4. Execute tools and collect results - // 5. If tools were called, loop back to step 2 - // 6. Extract proposed edits from tool results - // 7. Apply auto-apply policies - // 8. Emit final Done event + // TODO: Implement the actual agent loop + let _ = (&self.config, &self.tools, &self.context, &self.model); - let _ = ( - &self.config, - &self.providers, - &self.tools, - &self.context, - &self.model_override, - ); - - // For now, return an empty stream let stream = futures::stream::empty(); Ok(stream.boxed()) } diff --git a/crates/nvisy-rig/src/chat/agent/mod.rs b/crates/nvisy-rig/src/chat/agent/mod.rs index e0277e1..929d18d 100644 --- a/crates/nvisy-rig/src/chat/agent/mod.rs +++ b/crates/nvisy-rig/src/chat/agent/mod.rs @@ -1,10 +1,4 @@ //! Agent module for orchestrating AI-powered document processing. -//! -//! The agent is responsible for: -//! - Managing the conversation loop with the LLM -//! - Executing tool calls -//! - Proposing and applying edits -//! - Streaming responses back to the client mod context; mod executor; @@ -20,7 +14,7 @@ use uuid::Uuid; use super::ChatEvent; use crate::Result; -use crate::provider::{ModelRef, ProviderRegistry}; +use crate::provider::CompletionModel; use crate::rag::RetrievedChunk; use crate::session::Session; use crate::tool::ToolRegistry; @@ -40,6 +34,9 @@ pub struct AgentConfig { /// Whether to include thinking in output. pub include_thinking: bool, + + /// Default completion model. + pub default_model: CompletionModel, } impl Default for AgentConfig { @@ -49,6 +46,7 @@ impl Default for AgentConfig { max_tokens: 4096, temperature: 0.7, include_thinking: false, + default_model: CompletionModel::Ollama("llama3.2".to_string()), } } } @@ -56,54 +54,34 @@ impl Default for AgentConfig { /// The core agent that processes chat messages. pub struct Agent { config: AgentConfig, - providers: Arc, tools: Arc, } impl Agent { /// Creates a new agent. - pub fn new( - config: AgentConfig, - providers: Arc, - tools: Arc, - ) -> Self { - Self { - config, - providers, - tools, - } + pub fn new(config: AgentConfig, tools: Arc) -> Self { + Self { config, tools } } /// Processes a chat message and returns a stream of events. - /// - /// The `retrieved_chunks` should be pre-fetched using the RAG system. pub async fn process( &self, session: &Session, message: &str, retrieved_chunks: Vec, - model_override: Option<&ModelRef>, + model_override: Option, ) -> Result>> { - // Build context for this request let context = AgentContext::new(session.clone(), message.to_string(), retrieved_chunks); - // Create executor - let executor = AgentExecutor::new( - self.config.clone(), - self.providers.clone(), - self.tools.clone(), - context, - model_override.cloned(), - ); + let model = model_override.unwrap_or_else(|| self.config.default_model.clone()); + + let executor = AgentExecutor::new(self.config.clone(), self.tools.clone(), context, model); - // Run the agent loop executor.run().await } /// Returns proposed edits from an agent run. pub fn extract_edits(&self, _events: &[ChatEvent]) -> Vec { - // Extract proposed edits from the event stream - // This is called after processing to collect all edits Vec::new() } } diff --git a/crates/nvisy-rig/src/chat/service.rs b/crates/nvisy-rig/src/chat/service.rs index 698ad1a..3985ea1 100644 --- a/crates/nvisy-rig/src/chat/service.rs +++ b/crates/nvisy-rig/src/chat/service.rs @@ -7,40 +7,33 @@ use uuid::Uuid; use super::ChatStream; use crate::Result; -use crate::provider::{ModelRef, ProviderRegistry}; +use crate::provider::{CompletionModel, EmbeddingProvider}; use crate::session::{CreateSession, Session, SessionStore}; use crate::tool::ToolRegistry; use crate::tool::edit::ApplyResult; /// Inner state for [`ChatService`]. struct ChatServiceInner { - providers: ProviderRegistry, + embedding_provider: EmbeddingProvider, tools: ToolRegistry, sessions: SessionStore, } /// Chat service for AI-powered document conversations. -/// -/// This type is cheap to clone and can be shared across threads. -/// -/// Provides a high-level API for: -/// - Creating and managing chat sessions -/// - Streaming chat responses with tool use -/// - Approving and applying document edits #[derive(Clone)] pub struct ChatService { inner: Arc, } impl ChatService { - /// Creates a new ChatService with automatic ToolRegistry and SessionStore. - pub async fn new(providers: ProviderRegistry, nats: NatsClient) -> Result { + /// Creates a new ChatService. + pub async fn new(embedding_provider: EmbeddingProvider, nats: NatsClient) -> Result { let tools = ToolRegistry::with_defaults(); let sessions = SessionStore::new(nats).await?; Ok(Self { inner: Arc::new(ChatServiceInner { - providers, + embedding_provider, tools, sessions, }), @@ -49,13 +42,13 @@ impl ChatService { /// Creates a new ChatService with custom tools and session store. pub fn with_components( - providers: ProviderRegistry, + embedding_provider: EmbeddingProvider, tools: ToolRegistry, sessions: SessionStore, ) -> Self { Self { inner: Arc::new(ChatServiceInner { - providers, + embedding_provider, tools, sessions, }), @@ -75,14 +68,9 @@ impl ChatService { } /// Sends a chat message and returns a streaming response. - /// - /// The stream emits [`ChatEvent`](super::ChatEvent)s as the agent processes the request, - /// including thinking, tool calls, proposed edits, and text deltas. pub async fn chat(&self, session_id: Uuid, message: &str) -> Result { - // Touch session to reset TTL self.inner.sessions.touch(session_id).await?; - // Get session let session = self .inner .sessions @@ -90,7 +78,6 @@ impl ChatService { .await? .ok_or_else(|| crate::Error::session("session not found"))?; - // Create chat stream ChatStream::new(session, message.to_string(), self.clone()).await } @@ -99,12 +86,10 @@ impl ChatService { &self, session_id: Uuid, message: &str, - model: ModelRef, + model: CompletionModel, ) -> Result { - // Touch session to reset TTL self.inner.sessions.touch(session_id).await?; - // Get session let session = self .inner .sessions @@ -112,7 +97,6 @@ impl ChatService { .await? .ok_or_else(|| crate::Error::session("session not found"))?; - // Create chat stream with model override ChatStream::with_model(session, message.to_string(), Some(model), self.clone()).await } @@ -152,22 +136,14 @@ impl ChatService { } /// Generates embeddings for text. - /// - /// Used for indexing documents into the vector store. - pub async fn embed(&self, text: &str, model: Option<&ModelRef>) -> Result> { - let (_provider, _model_name) = self.inner.providers.resolve_embedding(model)?; - - // TODO: Implement using rig-core embedding - let _ = text; - Err(crate::Error::provider( - "rig", - "embedding not yet implemented", - )) + pub async fn embed(&self, text: &str) -> Result> { + let embedding = self.inner.embedding_provider.embed_text(text).await?; + Ok(embedding.vec) } - /// Returns a reference to the provider registry. - pub fn providers(&self) -> &ProviderRegistry { - &self.inner.providers + /// Returns a reference to the embedding provider. + pub fn embedding_provider(&self) -> &EmbeddingProvider { + &self.inner.embedding_provider } /// Returns a reference to the tool registry. diff --git a/crates/nvisy-rig/src/chat/stream.rs b/crates/nvisy-rig/src/chat/stream.rs index bff367b..a6758e5 100644 --- a/crates/nvisy-rig/src/chat/stream.rs +++ b/crates/nvisy-rig/src/chat/stream.rs @@ -8,20 +8,17 @@ use uuid::Uuid; use super::{ChatEvent, ChatResponse, ChatService, UsageStats}; use crate::Result; -use crate::provider::ModelRef; +use crate::provider::CompletionModel; use crate::session::Session; use crate::tool::edit::ProposedEdit; /// Streaming chat response. -/// -/// Implements `Stream>` for async iteration. pub struct ChatStream { session: Session, message: String, - model_override: Option, + model_override: Option, service: ChatService, - // State started: bool, finished: bool, accumulated_content: String, @@ -49,7 +46,7 @@ impl ChatStream { pub async fn with_model( session: Session, message: String, - model_override: Option, + model_override: Option, service: ChatService, ) -> Result { Ok(Self { @@ -75,7 +72,6 @@ impl ChatStream { self.session.document_id() } - /// Polls the underlying agent for the next event. fn poll_next_event(&mut self, _cx: &mut Context<'_>) -> Poll>> { if self.finished { return Poll::Ready(None); @@ -84,24 +80,14 @@ impl ChatStream { if !self.started { self.started = true; - // TODO: Start the actual agent pipeline: - // 1. Retrieve relevant context via RAG - // 2. Build prompt with tools, context, and history - // 3. Stream completion from provider - // 4. Handle tool calls and proposed edits - // 5. Apply auto-apply policies - - // For now, emit a placeholder response - // These references silence unused warnings until the pipeline is implemented let _ = (&self.message, &self.service, &self.accumulated_content); - // Emit done event with placeholder self.finished = true; let model = self .model_override .as_ref() - .map(|m| m.to_string()) + .map(|m| m.as_str().to_string()) .unwrap_or_else(|| "default".to_string()); let response = ChatResponse::new( diff --git a/crates/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs index 263cb51..75d6284 100644 --- a/crates/nvisy-rig/src/error.rs +++ b/crates/nvisy-rig/src/error.rs @@ -2,6 +2,8 @@ use std::fmt; +use rig::embeddings::EmbeddingError; + /// Result type alias for rig operations. pub type Result = std::result::Result; @@ -16,37 +18,17 @@ pub enum Error { #[error("session error: {0}")] Session(String), - /// Agent execution error. - #[error("agent error: {0}")] - Agent(String), - - /// Tool execution error. - #[error("tool error: {tool}: {message}")] - Tool { tool: String, message: String }, - /// RAG retrieval error. #[error("retrieval error: {0}")] Retrieval(String), /// Embedding error. #[error("embedding error: {0}")] - Embedding(String), - - /// Edit error. - #[error("edit error: {0}")] - Edit(String), + Embedding(#[from] EmbeddingError), /// Configuration error. #[error("configuration error: {0}")] Config(String), - - /// Serialization error. - #[error("serialization error: {0}")] - Serialization(#[from] serde_json::Error), - - /// I/O error. - #[error("io error: {0}")] - Io(#[from] std::io::Error), } impl Error { @@ -63,34 +45,11 @@ impl Error { Self::Session(message.to_string()) } - /// Creates an agent error. - pub fn agent(message: impl fmt::Display) -> Self { - Self::Agent(message.to_string()) - } - - /// Creates a tool error. - pub fn tool(tool: impl fmt::Display, message: impl fmt::Display) -> Self { - Self::Tool { - tool: tool.to_string(), - message: message.to_string(), - } - } - /// Creates a retrieval error. pub fn retrieval(message: impl fmt::Display) -> Self { Self::Retrieval(message.to_string()) } - /// Creates an embedding error. - pub fn embedding(message: impl fmt::Display) -> Self { - Self::Embedding(message.to_string()) - } - - /// Creates an edit error. - pub fn edit(message: impl fmt::Display) -> Self { - Self::Edit(message.to_string()) - } - /// Creates a configuration error. pub fn config(message: impl fmt::Display) -> Self { Self::Config(message.to_string()) @@ -98,6 +57,6 @@ impl Error { /// Returns true if this error is retryable. pub fn is_retryable(&self) -> bool { - matches!(self, Self::Provider { .. } | Self::Io(_)) + matches!(self, Self::Provider { .. }) } } diff --git a/crates/nvisy-rig/src/provider/completion/credentials.rs b/crates/nvisy-rig/src/provider/completion/credentials.rs new file mode 100644 index 0000000..795dea5 --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/credentials.rs @@ -0,0 +1,21 @@ +//! Completion provider credentials. + +use serde::{Deserialize, Serialize}; + +/// Credentials for completion providers. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "provider", rename_all = "snake_case")] +pub enum CompletionCredentials { + /// OpenAI credentials. + OpenAi { api_key: String }, + /// Anthropic credentials. + Anthropic { api_key: String }, + /// Cohere credentials. + Cohere { api_key: String }, + /// Google Gemini credentials. + Gemini { api_key: String }, + /// Perplexity credentials. + Perplexity { api_key: String }, + /// Ollama credentials (local, no API key required). + Ollama { base_url: String }, +} diff --git a/crates/nvisy-rig/src/provider/completion/mod.rs b/crates/nvisy-rig/src/provider/completion/mod.rs new file mode 100644 index 0000000..6422c6e --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/mod.rs @@ -0,0 +1,10 @@ +//! Completion models and providers. + +mod credentials; +mod model; + +pub use credentials::CompletionCredentials; +pub use model::{ + AnthropicModel, CohereCompletionModel, CompletionModel, GeminiCompletionModel, + OpenAiCompletionModel, PerplexityModel, +}; diff --git a/crates/nvisy-rig/src/provider/completion/model.rs b/crates/nvisy-rig/src/provider/completion/model.rs new file mode 100644 index 0000000..33d874b --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/model.rs @@ -0,0 +1,156 @@ +//! Type-safe completion model references. + +use serde::{Deserialize, Serialize}; + +/// Reference to a completion/chat model. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "provider", content = "model", rename_all = "snake_case")] +pub enum CompletionModel { + /// OpenAI completion models. + OpenAi(OpenAiCompletionModel), + /// Anthropic models. + Anthropic(AnthropicModel), + /// Cohere completion models. + Cohere(CohereCompletionModel), + /// Google Gemini completion models. + Gemini(GeminiCompletionModel), + /// Perplexity models. + Perplexity(PerplexityModel), + /// Ollama local models (model name as string). + Ollama(String), +} + +/// OpenAI completion models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum OpenAiCompletionModel { + /// GPT-4o (multimodal flagship) + Gpt4o, + /// GPT-4o mini (fast, affordable) + Gpt4oMini, + /// GPT-4 Turbo + Gpt4Turbo, + /// o1 (reasoning) + O1, + /// o1 mini (fast reasoning) + O1Mini, + /// o3 mini (latest reasoning) + O3Mini, +} + +impl OpenAiCompletionModel { + pub fn as_str(&self) -> &'static str { + match self { + Self::Gpt4o => "gpt-4o", + Self::Gpt4oMini => "gpt-4o-mini", + Self::Gpt4Turbo => "gpt-4-turbo", + Self::O1 => "o1", + Self::O1Mini => "o1-mini", + Self::O3Mini => "o3-mini", + } + } +} + +/// Anthropic models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum AnthropicModel { + /// Claude Opus 4 (most capable) + ClaudeOpus4, + /// Claude Sonnet 4 (balanced) + ClaudeSonnet4, + /// Claude Haiku 3.5 (fast) + ClaudeHaiku35, +} + +impl AnthropicModel { + pub fn as_str(&self) -> &'static str { + match self { + Self::ClaudeOpus4 => "claude-opus-4-20250514", + Self::ClaudeSonnet4 => "claude-sonnet-4-20250514", + Self::ClaudeHaiku35 => "claude-3-5-haiku-20241022", + } + } +} + +/// Cohere completion models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum CohereCompletionModel { + /// Command R+ (most capable) + CommandRPlus, + /// Command R (balanced) + CommandR, + /// Command (legacy) + Command, +} + +impl CohereCompletionModel { + pub fn as_str(&self) -> &'static str { + match self { + Self::CommandRPlus => "command-r-plus", + Self::CommandR => "command-r", + Self::Command => "command", + } + } +} + +/// Google Gemini completion models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum GeminiCompletionModel { + /// Gemini 2.0 Flash (fast, multimodal) + Gemini20Flash, + /// Gemini 2.0 Flash Thinking (reasoning) + Gemini20FlashThinking, + /// Gemini 1.5 Pro (long context) + Gemini15Pro, + /// Gemini 1.5 Flash (fast) + Gemini15Flash, +} + +impl GeminiCompletionModel { + pub fn as_str(&self) -> &'static str { + match self { + Self::Gemini20Flash => "gemini-2.0-flash", + Self::Gemini20FlashThinking => "gemini-2.0-flash-thinking-exp", + Self::Gemini15Pro => "gemini-1.5-pro", + Self::Gemini15Flash => "gemini-1.5-flash", + } + } +} + +/// Perplexity models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum PerplexityModel { + /// Sonar (online, search-augmented) + Sonar, + /// Sonar Pro (online, more capable) + SonarPro, + /// Sonar Reasoning (online, reasoning) + SonarReasoning, +} + +impl PerplexityModel { + pub fn as_str(&self) -> &'static str { + match self { + Self::Sonar => "sonar", + Self::SonarPro => "sonar-pro", + Self::SonarReasoning => "sonar-reasoning", + } + } +} + +impl CompletionModel { + pub fn as_str(&self) -> &str { + match self { + Self::OpenAi(m) => m.as_str(), + Self::Anthropic(m) => m.as_str(), + Self::Cohere(m) => m.as_str(), + Self::Gemini(m) => m.as_str(), + Self::Perplexity(m) => m.as_str(), + Self::Ollama(m) => m.as_str(), + } + } +} diff --git a/crates/nvisy-rig/src/provider/config.rs b/crates/nvisy-rig/src/provider/config.rs deleted file mode 100644 index 1eb6e89..0000000 --- a/crates/nvisy-rig/src/provider/config.rs +++ /dev/null @@ -1,155 +0,0 @@ -//! Provider configuration types. - -use serde::{Deserialize, Serialize}; - -/// Supported AI providers. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum ProviderKind { - /// OpenAI (GPT-4, embeddings, etc.) - OpenAi, - /// Anthropic (Claude models) - Anthropic, - /// Cohere (Command, embeddings) - Cohere, - /// Google Gemini - Gemini, - /// Perplexity - Perplexity, -} - -impl ProviderKind { - /// Returns the provider name as a string. - pub fn as_str(&self) -> &'static str { - match self { - Self::OpenAi => "openai", - Self::Anthropic => "anthropic", - Self::Cohere => "cohere", - Self::Gemini => "gemini", - Self::Perplexity => "perplexity", - } - } - - /// Default completion model for this provider. - pub fn default_completion_model(&self) -> &'static str { - match self { - Self::OpenAi => "gpt-4o", - Self::Anthropic => "claude-sonnet-4-20250514", - Self::Cohere => "command-r-plus", - Self::Gemini => "gemini-2.0-flash", - Self::Perplexity => "sonar", - } - } - - /// Default embedding model for this provider. - pub fn default_embedding_model(&self) -> &'static str { - match self { - Self::OpenAi => "text-embedding-3-small", - Self::Anthropic => "text-embedding-3-small", // Uses OpenAI - Self::Cohere => "embed-english-v3.0", - Self::Gemini => "text-embedding-004", - Self::Perplexity => "text-embedding-3-small", // Uses OpenAI - } - } -} - -impl std::fmt::Display for ProviderKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.as_str()) - } -} - -/// Configuration for a single provider. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProviderConfig { - /// Unique identifier for this provider instance. - pub id: String, - - /// The provider type. - pub kind: ProviderKind, - - /// API key for authentication. - pub api_key: String, - - /// Optional base URL override. - #[serde(default)] - pub base_url: Option, - - /// Model configuration. - #[serde(default)] - pub models: ModelConfig, -} - -impl ProviderConfig { - /// Creates a new provider configuration. - pub fn new(id: impl Into, kind: ProviderKind, api_key: impl Into) -> Self { - Self { - id: id.into(), - kind, - api_key: api_key.into(), - base_url: None, - models: ModelConfig::default_for(kind), - } - } - - /// Sets the base URL. - pub fn with_base_url(mut self, url: impl Into) -> Self { - self.base_url = Some(url.into()); - self - } - - /// Sets the model configuration. - pub fn with_models(mut self, models: ModelConfig) -> Self { - self.models = models; - self - } -} - -/// Model configuration for a provider. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ModelConfig { - /// Model for completions/chat. - pub completion: String, - - /// Model for embeddings. - pub embedding: String, - - /// Model for vision tasks. - #[serde(default)] - pub vision: Option, - - /// Maximum tokens for completions. - #[serde(default = "default_max_tokens")] - pub max_tokens: usize, - - /// Temperature for completions (0.0 - 2.0). - #[serde(default = "default_temperature")] - pub temperature: f32, -} - -fn default_max_tokens() -> usize { - 4096 -} - -fn default_temperature() -> f32 { - 0.7 -} - -impl ModelConfig { - /// Creates default model config for a provider. - pub fn default_for(kind: ProviderKind) -> Self { - Self { - completion: kind.default_completion_model().to_string(), - embedding: kind.default_embedding_model().to_string(), - vision: None, - max_tokens: default_max_tokens(), - temperature: default_temperature(), - } - } -} - -impl Default for ModelConfig { - fn default() -> Self { - Self::default_for(ProviderKind::OpenAi) - } -} diff --git a/crates/nvisy-rig/src/provider/embedding.rs b/crates/nvisy-rig/src/provider/embedding.rs deleted file mode 100644 index e59fa0d..0000000 --- a/crates/nvisy-rig/src/provider/embedding.rs +++ /dev/null @@ -1,92 +0,0 @@ -//! Embedding provider abstraction. -//! -//! Wraps different embedding model providers into a unified enum, -//! eliminating the need for generic parameters throughout the codebase. - -use nvisy_postgres::types::EMBEDDING_DIMENSIONS; -use rig::client::Nothing; -use rig::embeddings::{Embedding, EmbeddingError, EmbeddingModel}; -use rig::providers::ollama; - -/// Embedding provider that wraps different model implementations. -/// -/// This enum provides a concrete type for embedding operations, -/// removing the need for generic `M: EmbeddingModel` parameters. -/// -/// All providers use [`EMBEDDING_DIMENSIONS`] to ensure consistency with the -/// `document_chunks` table schema. -#[derive(Clone)] -pub enum EmbeddingProvider { - /// Ollama embedding model. - Ollama { - client: ollama::Client, - model: String, - }, -} - -impl EmbeddingProvider { - /// Creates a new Ollama embedding provider. - pub fn ollama(base_url: &str, model: &str) -> Self { - let client = ollama::Client::builder() - .api_key(Nothing) - .base_url(base_url) - .build() - .expect("Failed to create Ollama client"); - - Self::Ollama { - client, - model: model.to_string(), - } - } - - /// Returns the model name. - pub fn model_name(&self) -> &str { - match self { - Self::Ollama { model, .. } => model, - } - } - - /// Returns the number of dimensions. - /// - /// This always returns [`EMBEDDING_DIMENSIONS`] to ensure consistency with the database schema. - pub fn ndims(&self) -> usize { - EMBEDDING_DIMENSIONS - } - - /// Embed a single text document. - pub async fn embed_text(&self, text: &str) -> Result { - match self { - Self::Ollama { client, model } => { - let embedding_model = - ollama::EmbeddingModel::new(client.clone(), model, EMBEDDING_DIMENSIONS); - embedding_model.embed_text(text).await - } - } - } - - /// Embed multiple text documents. - pub async fn embed_texts( - &self, - texts: impl IntoIterator + Send, - ) -> Result, EmbeddingError> { - match self { - Self::Ollama { client, model } => { - let embedding_model = - ollama::EmbeddingModel::new(client.clone(), model, EMBEDDING_DIMENSIONS); - embedding_model.embed_texts(texts).await - } - } - } -} - -impl std::fmt::Debug for EmbeddingProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Ollama { model, .. } => f - .debug_struct("EmbeddingProvider::Ollama") - .field("model", model) - .field("ndims", &EMBEDDING_DIMENSIONS) - .finish(), - } - } -} diff --git a/crates/nvisy-rig/src/provider/embedding/credentials.rs b/crates/nvisy-rig/src/provider/embedding/credentials.rs new file mode 100644 index 0000000..dc1fd53 --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/credentials.rs @@ -0,0 +1,18 @@ +//! Embedding provider credentials. + +use serde::{Deserialize, Serialize}; + +/// Credentials for embedding providers. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "provider", rename_all = "snake_case")] +pub enum EmbeddingCredentials { + /// OpenAI credentials. + OpenAi { api_key: String }, + /// Cohere credentials. + Cohere { api_key: String }, + /// Google Gemini credentials. + Gemini { api_key: String }, + /// Ollama credentials. + #[cfg(feature = "ollama")] + Ollama { base_url: String }, +} diff --git a/crates/nvisy-rig/src/provider/embedding/mod.rs b/crates/nvisy-rig/src/provider/embedding/mod.rs new file mode 100644 index 0000000..760bfd7 --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/mod.rs @@ -0,0 +1,11 @@ +//! Embedding models and providers. + +mod credentials; +mod model; +mod provider; + +pub use credentials::EmbeddingCredentials; +#[cfg(feature = "ollama")] +pub use model::OllamaEmbeddingModel; +pub use model::{CohereEmbeddingModel, EmbeddingModel, GeminiEmbeddingModel, OpenAiEmbeddingModel}; +pub use provider::EmbeddingProvider; diff --git a/crates/nvisy-rig/src/provider/embedding/model.rs b/crates/nvisy-rig/src/provider/embedding/model.rs new file mode 100644 index 0000000..db24296 --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/model.rs @@ -0,0 +1,154 @@ +//! Type-safe embedding model references. + +use serde::{Deserialize, Serialize}; + +/// Reference to an embedding model. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "provider", content = "model", rename_all = "snake_case")] +pub enum EmbeddingModel { + /// OpenAI embedding models. + OpenAi(OpenAiEmbeddingModel), + /// Cohere embedding models. + Cohere(CohereEmbeddingModel), + /// Google Gemini embedding models. + Gemini(GeminiEmbeddingModel), + /// Ollama local models. + #[cfg(feature = "ollama")] + Ollama(OllamaEmbeddingModel), +} + +/// OpenAI embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum OpenAiEmbeddingModel { + /// text-embedding-3-small (1536 dimensions) + TextEmbedding3Small, + /// text-embedding-3-large (3072 dimensions) + TextEmbedding3Large, + /// text-embedding-ada-002 (legacy, 1536 dimensions) + TextEmbeddingAda002, +} + +impl OpenAiEmbeddingModel { + pub fn as_str(&self) -> &'static str { + match self { + Self::TextEmbedding3Small => "text-embedding-3-small", + Self::TextEmbedding3Large => "text-embedding-3-large", + Self::TextEmbeddingAda002 => "text-embedding-ada-002", + } + } + + pub fn dimensions(&self) -> usize { + match self { + Self::TextEmbedding3Small => 1536, + Self::TextEmbedding3Large => 3072, + Self::TextEmbeddingAda002 => 1536, + } + } +} + +/// Cohere embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum CohereEmbeddingModel { + /// embed-english-v3.0 (1024 dimensions) + EmbedEnglishV3, + /// embed-multilingual-v3.0 (1024 dimensions) + EmbedMultilingualV3, + /// embed-english-light-v3.0 (384 dimensions) + EmbedEnglishLightV3, + /// embed-multilingual-light-v3.0 (384 dimensions) + EmbedMultilingualLightV3, +} + +impl CohereEmbeddingModel { + pub fn as_str(&self) -> &'static str { + match self { + Self::EmbedEnglishV3 => "embed-english-v3.0", + Self::EmbedMultilingualV3 => "embed-multilingual-v3.0", + Self::EmbedEnglishLightV3 => "embed-english-light-v3.0", + Self::EmbedMultilingualLightV3 => "embed-multilingual-light-v3.0", + } + } + + pub fn dimensions(&self) -> usize { + match self { + Self::EmbedEnglishV3 | Self::EmbedMultilingualV3 => 1024, + Self::EmbedEnglishLightV3 | Self::EmbedMultilingualLightV3 => 384, + } + } +} + +/// Google Gemini embedding models. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +pub enum GeminiEmbeddingModel { + /// text-embedding-004 (768 dimensions) + TextEmbedding004, +} + +impl GeminiEmbeddingModel { + pub fn as_str(&self) -> &'static str { + match self { + Self::TextEmbedding004 => "text-embedding-004", + } + } + + pub fn dimensions(&self) -> usize { + 768 + } +} + +/// Ollama embedding model configuration. +#[cfg(feature = "ollama")] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct OllamaEmbeddingModel { + /// Model name (e.g., "nomic-embed-text", "mxbai-embed-large"). + pub name: String, + /// Embedding dimensions. + pub dimensions: usize, +} + +#[cfg(feature = "ollama")] +impl OllamaEmbeddingModel { + pub fn new(name: impl Into, dimensions: usize) -> Self { + Self { + name: name.into(), + dimensions, + } + } + + pub fn nomic_embed_text() -> Self { + Self::new("nomic-embed-text", 768) + } + + pub fn mxbai_embed_large() -> Self { + Self::new("mxbai-embed-large", 1024) + } + + pub fn all_minilm() -> Self { + Self::new("all-minilm", 384) + } +} + +impl EmbeddingModel { + pub fn as_str(&self) -> &str { + match self { + Self::OpenAi(m) => m.as_str(), + Self::Cohere(m) => m.as_str(), + Self::Gemini(m) => m.as_str(), + #[cfg(feature = "ollama")] + Self::Ollama(m) => &m.name, + } + } + + pub fn dimensions(&self) -> usize { + match self { + Self::OpenAi(m) => m.dimensions(), + Self::Cohere(m) => m.dimensions(), + Self::Gemini(m) => m.dimensions(), + #[cfg(feature = "ollama")] + Self::Ollama(m) => m.dimensions, + } + } +} diff --git a/crates/nvisy-rig/src/provider/embedding/provider.rs b/crates/nvisy-rig/src/provider/embedding/provider.rs new file mode 100644 index 0000000..062d6d5 --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/provider.rs @@ -0,0 +1,194 @@ +//! Embedding provider abstraction. + +use super::credentials::EmbeddingCredentials; +use super::model::EmbeddingModel; +#[cfg(feature = "ollama")] +use super::model::OllamaEmbeddingModel; +use crate::{Error, Result}; +#[cfg(feature = "ollama")] +use rig::client::Nothing; +use rig::embeddings::{Embedding, EmbeddingModel as RigEmbeddingModel}; +use rig::prelude::EmbeddingsClient; +#[cfg(feature = "ollama")] +use rig::providers::ollama; +use rig::providers::{cohere, gemini, openai}; + +/// Embedding provider that wraps different rig embedding model implementations. +#[derive(Clone)] +pub enum EmbeddingProvider { + OpenAi { + model: openai::EmbeddingModel, + model_name: String, + }, + Cohere { + model: cohere::EmbeddingModel, + model_name: String, + }, + Gemini { + model: gemini::embedding::EmbeddingModel, + model_name: String, + }, + #[cfg(feature = "ollama")] + Ollama { + client: ollama::Client, + model_name: String, + ndims: usize, + }, +} + +impl EmbeddingProvider { + /// Creates a new embedding provider from credentials and model. + pub fn new(credentials: &EmbeddingCredentials, model: &EmbeddingModel) -> Result { + match (credentials, model) { + (EmbeddingCredentials::OpenAi { api_key }, EmbeddingModel::OpenAi(m)) => { + let client = openai::Client::new(api_key) + .map_err(|e| Error::provider("openai", e.to_string()))?; + Ok(Self::OpenAi { + model: client.embedding_model_with_ndims(m.as_str(), m.dimensions()), + model_name: m.as_str().to_string(), + }) + } + (EmbeddingCredentials::Cohere { api_key }, EmbeddingModel::Cohere(m)) => { + let client = cohere::Client::new(api_key) + .map_err(|e| Error::provider("cohere", e.to_string()))?; + Ok(Self::Cohere { + model: client.embedding_model_with_ndims( + m.as_str(), + "search_document", + m.dimensions(), + ), + model_name: m.as_str().to_string(), + }) + } + (EmbeddingCredentials::Gemini { api_key }, EmbeddingModel::Gemini(m)) => { + let client = gemini::Client::new(api_key) + .map_err(|e| Error::provider("gemini", e.to_string()))?; + Ok(Self::Gemini { + model: client.embedding_model_with_ndims(m.as_str(), m.dimensions()), + model_name: m.as_str().to_string(), + }) + } + #[cfg(feature = "ollama")] + (EmbeddingCredentials::Ollama { base_url }, EmbeddingModel::Ollama(m)) => { + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(base_url) + .build() + .map_err(|e| Error::provider("ollama", e.to_string()))?; + Ok(Self::Ollama { + client, + model_name: m.name.clone(), + ndims: m.dimensions, + }) + } + #[allow(unreachable_patterns)] + _ => Err(Error::config("mismatched credentials and model provider")), + } + } + + /// Creates an Ollama embedding provider (convenience for local development). + #[cfg(feature = "ollama")] + pub fn ollama(base_url: &str, model: OllamaEmbeddingModel) -> Result { + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(base_url) + .build() + .map_err(|e| Error::provider("ollama", e.to_string()))?; + Ok(Self::Ollama { + client, + model_name: model.name, + ndims: model.dimensions, + }) + } + + /// Returns the model name. + pub fn model_name(&self) -> &str { + match self { + Self::OpenAi { model_name, .. } => model_name, + Self::Cohere { model_name, .. } => model_name, + Self::Gemini { model_name, .. } => model_name, + #[cfg(feature = "ollama")] + Self::Ollama { model_name, .. } => model_name, + } + } + + /// Returns the number of dimensions. + pub fn ndims(&self) -> usize { + match self { + Self::OpenAi { model, .. } => model.ndims(), + Self::Cohere { model, .. } => model.ndims(), + Self::Gemini { model, .. } => model.ndims(), + #[cfg(feature = "ollama")] + Self::Ollama { ndims, .. } => *ndims, + } + } + + /// Embed a single text document. + pub async fn embed_text(&self, text: &str) -> Result { + match self { + Self::OpenAi { model, .. } => Ok(model.embed_text(text).await?), + Self::Cohere { model, .. } => Ok(model.embed_text(text).await?), + Self::Gemini { model, .. } => Ok(model.embed_text(text).await?), + #[cfg(feature = "ollama")] + Self::Ollama { + client, + model_name, + ndims, + } => { + let model = ollama::EmbeddingModel::new(client.clone(), model_name, *ndims); + Ok(model.embed_text(text).await?) + } + } + } + + /// Embed multiple text documents. + pub async fn embed_texts( + &self, + texts: impl IntoIterator + Send, + ) -> Result> { + match self { + Self::OpenAi { model, .. } => Ok(model.embed_texts(texts).await?), + Self::Cohere { model, .. } => Ok(model.embed_texts(texts).await?), + Self::Gemini { model, .. } => Ok(model.embed_texts(texts).await?), + #[cfg(feature = "ollama")] + Self::Ollama { + client, + model_name, + ndims, + } => { + let model = ollama::EmbeddingModel::new(client.clone(), model_name, *ndims); + Ok(model.embed_texts(texts).await?) + } + } + } +} + +impl std::fmt::Debug for EmbeddingProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::OpenAi { model, model_name } => f + .debug_struct("EmbeddingProvider::OpenAi") + .field("model", model_name) + .field("ndims", &model.ndims()) + .finish(), + Self::Cohere { model, model_name } => f + .debug_struct("EmbeddingProvider::Cohere") + .field("model", model_name) + .field("ndims", &model.ndims()) + .finish(), + Self::Gemini { model, model_name } => f + .debug_struct("EmbeddingProvider::Gemini") + .field("model", model_name) + .field("ndims", &model.ndims()) + .finish(), + #[cfg(feature = "ollama")] + Self::Ollama { + model_name, ndims, .. + } => f + .debug_struct("EmbeddingProvider::Ollama") + .field("model", model_name) + .field("ndims", ndims) + .finish(), + } + } +} diff --git a/crates/nvisy-rig/src/provider/mod.rs b/crates/nvisy-rig/src/provider/mod.rs index 17abbde..30ec0f9 100644 --- a/crates/nvisy-rig/src/provider/mod.rs +++ b/crates/nvisy-rig/src/provider/mod.rs @@ -1,15 +1,17 @@ //! Multi-provider management for AI inference. -//! -//! This module provides: -//! - [`ProviderRegistry`] - Registry of configured providers -//! - [`ProviderConfig`] - Configuration for individual providers -//! - [`ModelRef`] - Reference to a specific model (provider/model) -//! - [`EmbeddingProvider`] - Unified embedding provider enum -mod config; +mod completion; mod embedding; -mod registry; +pub mod splitting; -pub use config::{ModelConfig, ProviderConfig, ProviderKind}; -pub use embedding::EmbeddingProvider; -pub use registry::{ModelRef, ProviderRegistry}; +pub use completion::{ + AnthropicModel, CohereCompletionModel, CompletionCredentials, CompletionModel, + GeminiCompletionModel, OpenAiCompletionModel, PerplexityModel, +}; +#[cfg(feature = "ollama")] +pub use embedding::OllamaEmbeddingModel; +pub use embedding::{ + CohereEmbeddingModel, EmbeddingCredentials, EmbeddingModel, EmbeddingProvider, + GeminiEmbeddingModel, OpenAiEmbeddingModel, +}; +pub use splitting::{Chunk, ChunkMetadata, OwnedChunk, TextSplitter, estimate_tokens}; diff --git a/crates/nvisy-rig/src/provider/registry.rs b/crates/nvisy-rig/src/provider/registry.rs deleted file mode 100644 index 9bb3bb3..0000000 --- a/crates/nvisy-rig/src/provider/registry.rs +++ /dev/null @@ -1,204 +0,0 @@ -//! Provider registry for managing multiple AI providers. - -use std::collections::HashMap; -use std::str::FromStr; -use std::sync::Arc; - -use serde::{Deserialize, Serialize}; - -use super::config::ProviderConfig; -use crate::{Error, Result}; - -/// Reference to a specific model in format "provider_id/model_name". -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct ModelRef { - /// Provider ID. - pub provider_id: String, - /// Model name. - pub model: String, -} - -impl ModelRef { - /// Creates a new model reference. - pub fn new(provider_id: impl Into, model: impl Into) -> Self { - Self { - provider_id: provider_id.into(), - model: model.into(), - } - } -} - -impl FromStr for ModelRef { - type Err = Error; - - fn from_str(s: &str) -> Result { - let (provider_id, model) = s.split_once('/').ok_or_else(|| { - Error::config(format!( - "invalid model reference '{}': expected 'provider/model'", - s - )) - })?; - - Ok(Self::new(provider_id, model)) - } -} - -impl std::fmt::Display for ModelRef { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}/{}", self.provider_id, self.model) - } -} - -/// Default models for different tasks. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DefaultModels { - /// Default model for embeddings. - pub embedding: ModelRef, - /// Default model for completions/chat. - pub completion: ModelRef, - /// Default model for vision tasks. - pub vision: ModelRef, -} - -/// Registry of configured AI providers. -/// -/// Allows selecting providers per-request from a set of globally configured providers. -pub struct ProviderRegistry { - providers: HashMap>, - defaults: DefaultModels, -} - -impl ProviderRegistry { - /// Creates an empty provider registry with placeholder defaults. - /// - /// This is useful for testing or when providers will be configured later. - /// Note: Attempting to resolve models will fail until providers are added. - pub fn empty() -> Self { - let placeholder = ModelRef::new("none", "none"); - Self { - providers: HashMap::new(), - defaults: DefaultModels { - embedding: placeholder.clone(), - completion: placeholder.clone(), - vision: placeholder, - }, - } - } - - /// Creates a new provider registry. - pub fn new(providers: Vec, defaults: DefaultModels) -> Result { - let mut provider_map = HashMap::new(); - - for config in providers { - if provider_map.contains_key(&config.id) { - return Err(Error::config(format!( - "duplicate provider id: {}", - config.id - ))); - } - provider_map.insert(config.id.clone(), Arc::new(config)); - } - - // Validate defaults exist - if !provider_map.contains_key(&defaults.embedding.provider_id) { - return Err(Error::config(format!( - "default embedding provider not found: {}", - defaults.embedding.provider_id - ))); - } - if !provider_map.contains_key(&defaults.completion.provider_id) { - return Err(Error::config(format!( - "default completion provider not found: {}", - defaults.completion.provider_id - ))); - } - if !provider_map.contains_key(&defaults.vision.provider_id) { - return Err(Error::config(format!( - "default vision provider not found: {}", - defaults.vision.provider_id - ))); - } - - Ok(Self { - providers: provider_map, - defaults, - }) - } - - /// Gets a provider by ID. - pub fn get(&self, id: &str) -> Option<&ProviderConfig> { - self.providers.get(id).map(|p| p.as_ref()) - } - - /// Gets the provider for a model reference, falling back to defaults. - pub fn resolve_embedding( - &self, - model_ref: Option<&ModelRef>, - ) -> Result<(&ProviderConfig, String)> { - let model_ref = model_ref.unwrap_or(&self.defaults.embedding); - self.resolve(model_ref) - } - - /// Gets the provider for a completion model reference, falling back to defaults. - pub fn resolve_completion( - &self, - model_ref: Option<&ModelRef>, - ) -> Result<(&ProviderConfig, String)> { - let model_ref = model_ref.unwrap_or(&self.defaults.completion); - self.resolve(model_ref) - } - - /// Gets the provider for a vision model reference, falling back to defaults. - pub fn resolve_vision( - &self, - model_ref: Option<&ModelRef>, - ) -> Result<(&ProviderConfig, String)> { - let model_ref = model_ref.unwrap_or(&self.defaults.vision); - self.resolve(model_ref) - } - - /// Resolves a model reference to provider config and model name. - fn resolve(&self, model_ref: &ModelRef) -> Result<(&ProviderConfig, String)> { - let provider = self.providers.get(&model_ref.provider_id).ok_or_else(|| { - Error::config(format!("provider not found: {}", model_ref.provider_id)) - })?; - - Ok((provider.as_ref(), model_ref.model.clone())) - } - - /// Returns all registered provider IDs. - pub fn provider_ids(&self) -> impl Iterator { - self.providers.keys().map(|s| s.as_str()) - } - - /// Returns the default models. - pub fn defaults(&self) -> &DefaultModels { - &self.defaults - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_model_ref_parse() { - let model_ref: ModelRef = "openai/gpt-4o" - .parse() - .expect("valid model ref format should parse"); - assert_eq!(model_ref.provider_id, "openai"); - assert_eq!(model_ref.model, "gpt-4o"); - } - - #[test] - fn test_model_ref_display() { - let model_ref = ModelRef::new("anthropic", "claude-sonnet-4-20250514"); - assert_eq!(model_ref.to_string(), "anthropic/claude-sonnet-4-20250514"); - } - - #[test] - fn test_model_ref_invalid() { - let result: Result = "invalid".parse(); - assert!(result.is_err()); - } -} diff --git a/crates/nvisy-rig/src/provider/splitting/chunk.rs b/crates/nvisy-rig/src/provider/splitting/chunk.rs new file mode 100644 index 0000000..87471fe --- /dev/null +++ b/crates/nvisy-rig/src/provider/splitting/chunk.rs @@ -0,0 +1,43 @@ +//! Split chunk types. + +use super::ChunkMetadata; + +/// A chunk produced by the text splitter (borrows from source text). +#[derive(Debug)] +pub struct Chunk<'a> { + /// The chunk text content (borrowed from original). + pub text: &'a str, + /// Metadata about the chunk's position. + pub metadata: ChunkMetadata, +} + +impl<'a> Chunk<'a> { + /// Creates a new chunk. + pub fn new(text: &'a str, metadata: ChunkMetadata) -> Self { + Self { text, metadata } + } + + /// Converts to an owned chunk. + pub fn into_owned(self) -> OwnedChunk { + OwnedChunk { + text: self.text.to_string(), + metadata: self.metadata, + } + } +} + +/// An owned version of Chunk. +#[derive(Debug, Clone)] +pub struct OwnedChunk { + /// The chunk text content. + pub text: String, + /// Metadata about the chunk's position. + pub metadata: ChunkMetadata, +} + +impl OwnedChunk { + /// Creates a new owned chunk. + pub fn new(text: String, metadata: ChunkMetadata) -> Self { + Self { text, metadata } + } +} diff --git a/crates/nvisy-rig/src/provider/splitting/metadata.rs b/crates/nvisy-rig/src/provider/splitting/metadata.rs new file mode 100644 index 0000000..9cc9b81 --- /dev/null +++ b/crates/nvisy-rig/src/provider/splitting/metadata.rs @@ -0,0 +1,40 @@ +//! Split chunk metadata. + +use serde::{Deserialize, Serialize}; + +/// Metadata about a split chunk's location in the source text. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct ChunkMetadata { + /// Chunk index within the source (0-based). + pub index: u32, + /// Start byte offset in the source text. + pub start_offset: u32, + /// End byte offset in the source text. + pub end_offset: u32, + /// Page number (1-indexed, if applicable). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub page: Option, +} + +impl ChunkMetadata { + /// Creates metadata with offset information. + pub fn new(index: u32, start_offset: u32, end_offset: u32) -> Self { + Self { + index, + start_offset, + end_offset, + page: None, + } + } + + /// Sets the page number. + pub fn with_page(mut self, page: u32) -> Self { + self.page = Some(page); + self + } + + /// Returns the byte length of the chunk. + pub fn byte_len(&self) -> u32 { + self.end_offset - self.start_offset + } +} diff --git a/crates/nvisy-rig/src/provider/splitting/mod.rs b/crates/nvisy-rig/src/provider/splitting/mod.rs new file mode 100644 index 0000000..59602ac --- /dev/null +++ b/crates/nvisy-rig/src/provider/splitting/mod.rs @@ -0,0 +1,9 @@ +//! Text splitting for chunk creation. + +mod chunk; +mod metadata; +mod splitter; + +pub use chunk::{Chunk, OwnedChunk}; +pub use metadata::ChunkMetadata; +pub use splitter::{TextSplitter, estimate_tokens}; diff --git a/crates/nvisy-rig/src/provider/splitting/splitter.rs b/crates/nvisy-rig/src/provider/splitting/splitter.rs new file mode 100644 index 0000000..8959967 --- /dev/null +++ b/crates/nvisy-rig/src/provider/splitting/splitter.rs @@ -0,0 +1,179 @@ +//! Text splitting implementation. + +use text_splitter::{ChunkConfig, TextSplitter as TextSplitterImpl}; +use tracing::{debug, instrument}; + +use super::{Chunk, ChunkMetadata, OwnedChunk}; + +/// Text splitter for creating document chunks. +#[derive(Debug, Clone)] +pub struct TextSplitter { + max_characters: u32, + overlap: u32, + trim: bool, +} + +impl TextSplitter { + /// Creates a new text splitter. + pub fn new(max_characters: u32, overlap: u32, trim: bool) -> Self { + debug!(max_characters, overlap, trim, "created text splitter"); + Self { + max_characters, + overlap, + trim, + } + } + + /// Creates a splitter with default settings (512 chars, no overlap, trimmed). + pub fn with_defaults() -> Self { + Self::new(512, 0, true) + } + + /// Returns the maximum characters per chunk. + pub fn max_characters(&self) -> u32 { + self.max_characters + } + + /// Returns the overlap between chunks. + pub fn overlap(&self) -> u32 { + self.overlap + } + + /// Splits text into chunks with byte offset tracking. + #[instrument(skip(self, text), fields(text_len = text.len()))] + pub fn split<'a>(&self, text: &'a str) -> Vec> { + let chunk_config = ChunkConfig::new(self.max_characters as usize) + .with_overlap(self.overlap as usize) + .expect("overlap must be less than max_characters") + .with_trim(self.trim); + + let splitter = TextSplitterImpl::new(chunk_config); + + let chunks: Vec<_> = splitter + .chunk_indices(text) + .enumerate() + .map(|(index, (byte_offset, chunk_text))| { + let end_offset = byte_offset + chunk_text.len(); + Chunk::new( + chunk_text, + ChunkMetadata::new(index as u32, byte_offset as u32, end_offset as u32), + ) + }) + .collect(); + + debug!(chunk_count = chunks.len(), "split text into chunks"); + chunks + } + + /// Splits text and returns owned chunks. + #[instrument(skip(self, text), fields(text_len = text.len()))] + pub fn split_owned(&self, text: &str) -> Vec { + self.split(text) + .into_iter() + .map(|c| c.into_owned()) + .collect() + } + + /// Splits text with page awareness. + /// + /// Page breaks are indicated by form feed characters (`\x0c`). + #[instrument(skip(self, text), fields(text_len = text.len()))] + pub fn split_with_pages<'a>(&self, text: &'a str) -> Vec> { + let page_breaks: Vec = text + .char_indices() + .filter(|(_, c)| *c == '\x0c') + .map(|(i, _)| i as u32) + .collect(); + + debug!(page_count = page_breaks.len() + 1, "detected pages"); + + self.split(text) + .into_iter() + .map(|chunk| { + let page = page_breaks + .iter() + .take_while(|&&pos| pos < chunk.metadata.start_offset) + .count() as u32 + + 1; + + Chunk { + text: chunk.text, + metadata: chunk.metadata.with_page(page), + } + }) + .collect() + } + + /// Splits text with page awareness and returns owned chunks. + #[instrument(skip(self, text), fields(text_len = text.len()))] + pub fn split_with_pages_owned(&self, text: &str) -> Vec { + self.split_with_pages(text) + .into_iter() + .map(|c| c.into_owned()) + .collect() + } +} + +impl Default for TextSplitter { + fn default() -> Self { + Self::with_defaults() + } +} + +/// Estimates the token count (~4 chars per token). +pub fn estimate_tokens(text: &str) -> u32 { + (text.len() / 4) as u32 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_basic() { + let splitter = TextSplitter::new(50, 0, true); + let text = "Hello world. This is a test. Another sentence here."; + let chunks = splitter.split(text); + + assert!(!chunks.is_empty()); + for chunk in &chunks { + assert!(chunk.text.len() <= 50); + } + } + + #[test] + fn test_split_with_overlap() { + let splitter = TextSplitter::new(20, 5, true); + let text = "The quick brown fox jumps over the lazy dog."; + let chunks = splitter.split(text); + + assert!(chunks.len() > 1); + } + + #[test] + fn test_split_with_pages() { + let splitter = TextSplitter::new(100, 0, true); + let text = "Page one content.\x0cPage two content.\x0cPage three."; + let chunks = splitter.split_with_pages(text); + + assert!(!chunks.is_empty()); + assert_eq!(chunks[0].metadata.page, Some(1)); + } + + #[test] + fn test_metadata_offsets() { + let splitter = TextSplitter::new(500, 0, false); + let text = "Hello world"; + let chunks = splitter.split(text); + + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0].metadata.start_offset, 0); + assert_eq!(chunks[0].metadata.end_offset, text.len() as u32); + } + + #[test] + fn test_estimate_tokens() { + assert_eq!(estimate_tokens("hello"), 1); + assert_eq!(estimate_tokens("hello world"), 2); + } +} diff --git a/crates/nvisy-rig/src/rag/indexer/mod.rs b/crates/nvisy-rig/src/rag/indexer/mod.rs index b2927b0..f3d5c04 100644 --- a/crates/nvisy-rig/src/rag/indexer/mod.rs +++ b/crates/nvisy-rig/src/rag/indexer/mod.rs @@ -1,6 +1,4 @@ //! Document chunk indexing pipeline. -//! -//! Provides batch embedding and storage of document chunks using pgvector. mod indexed; @@ -8,20 +6,18 @@ use nvisy_postgres::model::NewFileChunk; use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; use sha2::{Digest, Sha256}; +use tracing::{debug, instrument}; use uuid::Uuid; pub use self::indexed::IndexedChunk; -use super::splitter::{OwnedSplitChunk, Splitter, estimate_tokens}; -use crate::provider::EmbeddingProvider; +use crate::provider::{EmbeddingProvider, OwnedChunk, TextSplitter, estimate_tokens}; use crate::{Error, Result}; /// Indexer for batch-embedding and storing document chunks. -/// -/// Handles text splitting, embedding, and storage in PostgreSQL. pub struct Indexer { provider: EmbeddingProvider, db: PgClient, - splitter: Splitter, + splitter: TextSplitter, file_id: Uuid, } @@ -30,7 +26,7 @@ impl Indexer { pub(crate) fn new( provider: EmbeddingProvider, db: PgClient, - splitter: Splitter, + splitter: TextSplitter, file_id: Uuid, ) -> Self { Self { @@ -47,55 +43,53 @@ impl Indexer { } /// Indexes text by splitting, embedding, and storing chunks. + #[instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn index(&self, text: &str) -> Result> { let chunks = self.splitter.split_owned(text); self.index_chunks(chunks).await } /// Indexes text with page awareness. - /// - /// Page breaks should be indicated by form feed characters (`\x0c`). + #[instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn index_with_pages(&self, text: &str) -> Result> { let chunks = self.splitter.split_with_pages_owned(text); self.index_chunks(chunks).await } /// Deletes all existing chunks for the file before indexing. + #[instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn reindex(&self, text: &str) -> Result> { let chunks = self.splitter.split_owned(text); self.reindex_chunks(chunks).await } /// Deletes all existing chunks for the file before indexing with page awareness. + #[instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn reindex_with_pages(&self, text: &str) -> Result> { let chunks = self.splitter.split_with_pages_owned(text); self.reindex_chunks(chunks).await } - async fn index_chunks(&self, chunks: Vec) -> Result> { + async fn index_chunks(&self, chunks: Vec) -> Result> { if chunks.is_empty() { + debug!("no chunks to index"); return Ok(vec![]); } - // Extract texts for embedding let texts: Vec = chunks.iter().map(|c| c.text.clone()).collect(); + let chunk_count = texts.len(); - // Batch embed all texts - let embeddings = self - .provider - .embed_texts(texts) - .await - .map_err(|e| Error::embedding(format!("failed to embed chunks: {e}")))?; + debug!(chunk_count, "embedding chunks"); + let embeddings = self.provider.embed_texts(texts).await?; - if embeddings.len() != chunks.len() { - return Err(Error::embedding(format!( + if embeddings.len() != chunk_count { + return Err(Error::config(format!( "embedding count mismatch: expected {}, got {}", - chunks.len(), + chunk_count, embeddings.len() ))); } - // Prepare new chunk records let model_name = self.provider.model_name(); let new_chunks: Vec = chunks @@ -107,10 +101,10 @@ impl Indexer { let content_sha256 = Sha256::digest(content_bytes).to_vec(); let content_size = content_bytes.len() as i32; - // Convert f64 embeddings to f32 for pgvector let embedding_vec: Vec = embedding.vec.iter().map(|&x| x as f32).collect(); let metadata = serde_json::json!({ + "index": chunk.metadata.index, "start_offset": chunk.metadata.start_offset, "end_offset": chunk.metadata.end_offset, "page": chunk.metadata.page, @@ -129,7 +123,6 @@ impl Indexer { }) .collect(); - // Store in database let mut conn = self .db .get_connection() @@ -141,11 +134,11 @@ impl Indexer { .await .map_err(|e| Error::retrieval(format!("failed to create chunks: {e}")))?; + debug!(created_count = created.len(), "stored chunks"); Ok(created.into_iter().map(IndexedChunk::from).collect()) } - async fn reindex_chunks(&self, chunks: Vec) -> Result> { - // Delete existing chunks first + async fn reindex_chunks(&self, chunks: Vec) -> Result> { let mut conn = self .db .get_connection() @@ -158,11 +151,10 @@ impl Indexer { .map_err(|e| Error::retrieval(format!("failed to delete chunks: {e}")))?; if deleted > 0 { - tracing::debug!(file_id = %self.file_id, deleted, "Deleted existing chunks"); + debug!(deleted, "deleted existing chunks"); } drop(conn); - self.index_chunks(chunks).await } } diff --git a/crates/nvisy-rig/src/rag/mod.rs b/crates/nvisy-rig/src/rag/mod.rs index 2a9ffd8..62024aa 100644 --- a/crates/nvisy-rig/src/rag/mod.rs +++ b/crates/nvisy-rig/src/rag/mod.rs @@ -1,32 +1,10 @@ //! RAG (Retrieval-Augmented Generation) module. //! //! Provides document indexing and semantic search over document chunks. -//! -//! # Security -//! -//! All searches must be scoped to specific files or documents via [`SearchScope`]. -//! -//! # Example -//! -//! ```ignore -//! use nvisy_rig::rag::{RagService, SearchScope}; -//! -//! let rag = RagService::new(embedding_provider, pg, &nats).await?; -//! -//! // Index a file -//! let indexed = rag.indexer(file_id).index(&content).await?; -//! -//! // Search within a document -//! let results = rag -//! .search(SearchScope::document(doc_id)) -//! .query("How does auth work?", 5) -//! .await?; -//! ``` mod config; mod indexer; mod searcher; -mod splitter; use std::sync::Arc; @@ -37,15 +15,11 @@ use uuid::Uuid; pub use self::config::RagConfig; pub use self::indexer::{IndexedChunk, Indexer}; -pub use self::searcher::{ChunkMetadata, RetrievedChunk, SearchScope, Searcher}; -use self::splitter::Splitter; -pub use self::splitter::estimate_tokens; +pub use self::searcher::{RetrievedChunk, SearchScope, Searcher}; use crate::Result; -use crate::provider::EmbeddingProvider; +use crate::provider::{EmbeddingProvider, TextSplitter}; /// High-level RAG service for document indexing and semantic search. -/// -/// The service is cheap to clone and can be shared across threads. #[derive(Clone)] pub struct RagService { inner: Arc, @@ -90,7 +64,7 @@ impl RagService { /// Creates an indexer for a specific file. pub fn indexer(&self, file_id: Uuid) -> Indexer { - let splitter = Splitter::new( + let splitter = TextSplitter::new( self.inner.config.max_chunk_characters, self.inner.config.chunk_overlap, self.inner.config.trim_chunks, diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs index 61f24a5..a90ef00 100644 --- a/crates/nvisy-rig/src/rag/searcher/mod.rs +++ b/crates/nvisy-rig/src/rag/searcher/mod.rs @@ -60,11 +60,7 @@ impl Searcher { /// Searches for relevant chunks without loading content. pub async fn query(&self, query: &str, limit: u32) -> Result> { - let embedding = self - .provider - .embed_text(query) - .await - .map_err(|e| Error::embedding(format!("failed to embed query: {e}")))?; + let embedding = self.provider.embed_text(query).await?; let query_vector: Vector = embedding .vec diff --git a/crates/nvisy-rig/src/rag/splitter/chunk.rs b/crates/nvisy-rig/src/rag/splitter/chunk.rs deleted file mode 100644 index b2b7cd4..0000000 --- a/crates/nvisy-rig/src/rag/splitter/chunk.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! Split chunk types. - -use super::SplitMetadata; - -/// A chunk produced by the text splitter (borrows from source text). -#[derive(Debug)] -pub struct SplitChunk<'a> { - /// The chunk text content (borrowed from original). - pub text: &'a str, - - /// Metadata about the chunk's position. - pub metadata: SplitMetadata, -} - -impl SplitChunk<'_> { - /// Converts to an owned chunk. - pub fn into_owned(self) -> OwnedSplitChunk { - OwnedSplitChunk { - text: self.text.to_string(), - metadata: self.metadata, - } - } -} - -/// An owned version of SplitChunk. -#[derive(Debug, Clone)] -pub struct OwnedSplitChunk { - /// The chunk text content. - pub text: String, - - /// Metadata about the chunk's position. - pub metadata: SplitMetadata, -} diff --git a/crates/nvisy-rig/src/rag/splitter/metadata.rs b/crates/nvisy-rig/src/rag/splitter/metadata.rs deleted file mode 100644 index 4201323..0000000 --- a/crates/nvisy-rig/src/rag/splitter/metadata.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! Split chunk metadata. - -use serde::{Deserialize, Serialize}; - -/// Metadata about a split chunk's location in the source text. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct SplitMetadata { - /// Page number (1-indexed, if applicable). - #[serde(skip_serializing_if = "Option::is_none")] - pub page: Option, - - /// Start byte offset in the source text. - pub start_offset: u32, - - /// End byte offset in the source text. - pub end_offset: u32, - - /// Chunk index within the source (0-based). - pub chunk_index: u32, -} - -impl SplitMetadata { - /// Creates metadata with offset information. - pub fn new(chunk_index: u32, start_offset: u32, end_offset: u32) -> Self { - Self { - page: None, - start_offset, - end_offset, - chunk_index, - } - } -} diff --git a/crates/nvisy-rig/src/rag/splitter/mod.rs b/crates/nvisy-rig/src/rag/splitter/mod.rs deleted file mode 100644 index e51ad66..0000000 --- a/crates/nvisy-rig/src/rag/splitter/mod.rs +++ /dev/null @@ -1,105 +0,0 @@ -//! Text splitting for chunk creation. - -mod chunk; -mod metadata; - -use text_splitter::{ChunkConfig, TextSplitter}; - -pub(crate) use self::chunk::{OwnedSplitChunk, SplitChunk}; -pub(crate) use self::metadata::SplitMetadata; - -/// Estimates the token count (~4 chars per token). -pub fn estimate_tokens(text: &str) -> u32 { - (text.len() / 4) as u32 -} - -/// Text splitter service for creating document chunks. -#[derive(Clone)] -pub struct Splitter { - max_characters: u32, - overlap: u32, - trim: bool, -} - -impl Splitter { - /// Creates a new text splitter. - pub fn new(max_characters: u32, overlap: u32, trim: bool) -> Self { - Self { - max_characters, - overlap, - trim, - } - } - - /// Splits text into chunks with byte offset tracking. - pub fn split<'a>(&self, text: &'a str) -> Vec> { - let chunk_config = ChunkConfig::new(self.max_characters as usize) - .with_overlap(self.overlap as usize) - .expect("overlap must be less than max_characters") - .with_trim(self.trim); - let splitter = TextSplitter::new(chunk_config); - - splitter - .chunk_indices(text) - .enumerate() - .map(|(chunk_index, (byte_offset, chunk_text))| { - let end_offset = byte_offset + chunk_text.len(); - - SplitChunk { - text: chunk_text, - metadata: SplitMetadata::new( - chunk_index as u32, - byte_offset as u32, - end_offset as u32, - ), - } - }) - .collect() - } - - /// Splits text and returns owned chunks. - pub fn split_owned(&self, text: &str) -> Vec { - self.split(text) - .into_iter() - .map(|c| c.into_owned()) - .collect() - } - - /// Splits text with page awareness. - /// - /// Page breaks should be indicated by form feed characters (`\x0c`). - pub fn split_with_pages<'a>(&self, text: &'a str) -> Vec> { - let page_breaks: Vec = text - .char_indices() - .filter(|(_, c)| *c == '\x0c') - .map(|(i, _)| i as u32) - .collect(); - - self.split(text) - .into_iter() - .map(|mut chunk| { - let page = page_breaks - .iter() - .take_while(|&&pos| pos < chunk.metadata.start_offset) - .count() as u32 - + 1; - chunk.metadata.page = Some(page); - chunk - }) - .collect() - } - - /// Splits text with page awareness and returns owned chunks. - pub fn split_with_pages_owned(&self, text: &str) -> Vec { - self.split_with_pages(text) - .into_iter() - .map(|c| c.into_owned()) - .collect() - } -} - -impl Default for Splitter { - fn default() -> Self { - Self::new(512, 0, true) - } -} diff --git a/crates/nvisy-rig/src/service/config.rs b/crates/nvisy-rig/src/service/config.rs index 399054d..1fc7407 100644 --- a/crates/nvisy-rig/src/service/config.rs +++ b/crates/nvisy-rig/src/service/config.rs @@ -4,13 +4,17 @@ use clap::Args; use serde::{Deserialize, Serialize}; -use crate::provider::EmbeddingProvider; +#[cfg(feature = "ollama")] +use crate::Result; +#[cfg(feature = "ollama")] +use crate::provider::{EmbeddingProvider, OllamaEmbeddingModel}; /// Configuration for AI services (chat and RAG). #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "config", derive(Args))] pub struct RigConfig { /// Ollama base URL for embeddings. + #[cfg(feature = "ollama")] #[cfg_attr( feature = "config", arg( @@ -22,6 +26,7 @@ pub struct RigConfig { pub ollama_base_url: String, /// Ollama embedding model name. + #[cfg(feature = "ollama")] #[cfg_attr( feature = "config", arg( @@ -31,20 +36,37 @@ pub struct RigConfig { ) )] pub ollama_embedding_model: String, + + /// Ollama embedding model dimensions. + #[cfg(feature = "ollama")] + #[cfg_attr( + feature = "config", + arg(long, env = "OLLAMA_EMBEDDING_DIMENSIONS", default_value = "768") + )] + pub ollama_embedding_dimensions: usize, } impl Default for RigConfig { fn default() -> Self { Self { + #[cfg(feature = "ollama")] ollama_base_url: "http://localhost:11434".to_string(), + #[cfg(feature = "ollama")] ollama_embedding_model: "nomic-embed-text".to_string(), + #[cfg(feature = "ollama")] + ollama_embedding_dimensions: 768, } } } +#[cfg(feature = "ollama")] impl RigConfig { - /// Creates an embedding provider from this configuration. - pub(crate) fn embedding_provider(&self) -> EmbeddingProvider { - EmbeddingProvider::ollama(&self.ollama_base_url, &self.ollama_embedding_model) + /// Creates an Ollama embedding provider from this configuration. + pub(crate) fn embedding_provider(&self) -> Result { + let model = OllamaEmbeddingModel::new( + &self.ollama_embedding_model, + self.ollama_embedding_dimensions, + ); + EmbeddingProvider::ollama(&self.ollama_base_url, model) } } diff --git a/crates/nvisy-rig/src/service/rig.rs b/crates/nvisy-rig/src/service/rig.rs index adf81e3..e251709 100644 --- a/crates/nvisy-rig/src/service/rig.rs +++ b/crates/nvisy-rig/src/service/rig.rs @@ -8,7 +8,6 @@ use nvisy_postgres::PgClient; use super::RigConfig; use crate::Result; use crate::chat::ChatService; -use crate::provider::ProviderRegistry; use crate::rag::{RagConfig, RagService}; /// Inner state for [`RigService`]. @@ -18,8 +17,6 @@ struct RigServiceInner { } /// Unified AI service providing chat and RAG capabilities. -/// -/// This type is cheap to clone and can be shared across threads. #[derive(Clone)] pub struct RigService { inner: Arc, @@ -28,14 +25,12 @@ pub struct RigService { impl RigService { /// Creates a new RigService from configuration. pub async fn new(config: RigConfig, db: PgClient, nats: NatsClient) -> Result { - // Initialize RAG service - let embedding_provider = config.embedding_provider(); + let embedding_provider = config.embedding_provider()?; + let rag_config = RagConfig::default(); - let rag = RagService::new(rag_config, embedding_provider, db, nats.clone()).await?; + let rag = RagService::new(rag_config, embedding_provider.clone(), db, nats.clone()).await?; - // Initialize Chat service - let providers = ProviderRegistry::empty(); - let chat = ChatService::new(providers, nats).await?; + let chat = ChatService::new(embedding_provider, nats).await?; Ok(Self { inner: Arc::new(RigServiceInner { chat, rag }), diff --git a/crates/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml index ef1f90b..9eab6e2 100644 --- a/crates/nvisy-runtime/Cargo.toml +++ b/crates/nvisy-runtime/Cargo.toml @@ -22,6 +22,7 @@ rustdoc-args = ["--cfg", "docsrs"] # Internal crates nvisy-core = { workspace = true } nvisy-dal = { workspace = true } +nvisy-rig = { workspace = true } # Runtime crates nvisy-rt-core = { workspace = true } @@ -46,6 +47,7 @@ derive_builder = { workspace = true, features = [] } # Data types uuid = { workspace = true, features = ["v7", "serde"] } +jiff = { workspace = true, features = ["serde"] } # Graph data structures petgraph = { workspace = true, features = [] } diff --git a/crates/nvisy-runtime/src/engine/context.rs b/crates/nvisy-runtime/src/engine/context.rs index 1f66c93..5920d8a 100644 --- a/crates/nvisy-runtime/src/engine/context.rs +++ b/crates/nvisy-runtime/src/engine/context.rs @@ -1,37 +1,65 @@ //! Execution context for workflow runs. +use std::collections::HashMap; + +use derive_builder::Builder; use nvisy_dal::AnyDataValue; use crate::provider::CredentialsRegistry; /// Execution context for a workflow run. /// -/// Manages the current data items flowing through the pipeline and holds -/// credentials for provider access. Execution is pipe-based: each input item -/// flows through the entire pipeline before the next item is processed. +/// Manages the current data items flowing through the pipeline, holds +/// credentials for provider access, and provides named cache slots for +/// data sharing between workflow branches. /// /// A single input can produce multiple outputs (e.g., 1 document → 1000 embeddings), /// so the context holds a `Vec` of values at each stage. -#[derive(Debug)] +#[derive(Debug, Builder)] +#[builder( + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] pub struct ExecutionContext { /// Credentials registry for provider authentication. credentials: CredentialsRegistry, /// Current data items being processed (can expand: 1 input → N outputs). + #[builder(default)] current: Vec, + /// Named cache slots for data sharing between workflow branches. + #[builder(default)] + cache: HashMap>, /// Total input items processed in this execution. + #[builder(default)] items_processed: usize, } +impl ExecutionContextBuilder { + fn validate(&self) -> Result<(), String> { + if self.credentials.is_none() { + return Err("credentials is required".into()); + } + Ok(()) + } +} + impl ExecutionContext { /// Creates a new execution context with the given credentials. pub fn new(credentials: CredentialsRegistry) -> Self { Self { credentials, current: Vec::new(), + cache: HashMap::new(), items_processed: 0, } } + /// Returns a builder for creating an execution context. + pub fn builder() -> ExecutionContextBuilder { + ExecutionContextBuilder::default() + } + /// Returns a reference to the credentials registry. pub fn credentials(&self) -> &CredentialsRegistry { &self.credentials @@ -81,4 +109,29 @@ impl ExecutionContext { pub fn clear(&mut self) { self.current.clear(); } + + /// Writes data to a named cache slot. + pub fn write_cache(&mut self, name: &str, data: Vec) { + self.cache.entry(name.to_string()).or_default().extend(data); + } + + /// Reads data from a named cache slot (returns empty vec if not found). + pub fn read_cache(&self, name: &str) -> Vec { + self.cache.get(name).cloned().unwrap_or_default() + } + + /// Clears a named cache slot. + pub fn clear_cache(&mut self, name: &str) { + self.cache.remove(name); + } + + /// Clears all cache slots. + pub fn clear_all_caches(&mut self) { + self.cache.clear(); + } + + /// Returns the names of all cache slots. + pub fn cache_names(&self) -> Vec<&str> { + self.cache.keys().map(|s| s.as_str()).collect() + } } diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index 4fef41f..609cc20 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -8,7 +8,7 @@ use tokio::sync::Semaphore; use super::EngineConfig; use super::context::ExecutionContext; use crate::error::{WorkflowError, WorkflowResult}; -use crate::graph::{NodeData, NodeId, WorkflowGraph}; +use crate::graph::{InputSource, NodeData, NodeId, OutputDestination, WorkflowGraph}; use crate::provider::{CredentialsRegistry, InputProvider, OutputProvider}; /// Tracing target for engine operations. @@ -105,8 +105,8 @@ impl Engine { order: &[NodeId], ctx: &ExecutionContext, ) -> WorkflowResult { - let mut input_providers = Vec::new(); - let mut output_providers = Vec::new(); + let mut inputs = Vec::new(); + let mut outputs = Vec::new(); for node_id in order { let Some(node) = workflow.get_node(*node_id) else { @@ -115,18 +115,30 @@ impl Engine { match node { NodeData::Input(input_node) => { - let credentials_id = input_node.provider.credentials_id(); - let credentials = ctx.credentials().get(credentials_id)?.clone(); - let config = input_node.provider.clone().into_config(credentials)?; - let provider = config.into_provider()?; - input_providers.push((*node_id, provider)); + let input = match &input_node.source { + InputSource::Provider(params) => { + let credentials_id = params.credentials_id(); + let credentials = ctx.credentials().get(credentials_id)?.clone(); + let config = params.clone().into_config(credentials)?; + let provider = config.into_provider()?; + PipelineInput::Provider(provider) + } + InputSource::Cache(slot) => PipelineInput::Cache(slot.slot.clone()), + }; + inputs.push((*node_id, input)); } NodeData::Output(output_node) => { - let credentials_id = output_node.provider.credentials_id(); - let credentials = ctx.credentials().get(credentials_id)?.clone(); - let config = output_node.provider.clone().into_config(credentials)?; - let provider = config.into_provider().await?; - output_providers.push((*node_id, provider)); + let output = match &output_node.destination { + OutputDestination::Provider(params) => { + let credentials_id = params.credentials_id(); + let credentials = ctx.credentials().get(credentials_id)?.clone(); + let config = params.clone().into_config(credentials)?; + let provider = config.into_provider().await?; + PipelineOutput::Provider(provider) + } + OutputDestination::Cache(slot) => PipelineOutput::Cache(slot.slot.clone()), + }; + outputs.push((*node_id, output)); } NodeData::Transformer(_) => { // Transformers don't need pre-built providers @@ -134,10 +146,7 @@ impl Engine { } } - Ok(Pipeline { - input_providers, - output_providers, - }) + Ok(Pipeline { inputs, outputs }) } /// Executes the pipeline by streaming items through. @@ -153,16 +162,21 @@ impl Engine { pipeline: &Pipeline, ctx: &mut ExecutionContext, ) -> WorkflowResult<()> { - // For each input provider, stream items through the pipeline - for (input_node_id, input_provider) in &pipeline.input_providers { + // For each input, stream items through the pipeline + for (input_node_id, input) in &pipeline.inputs { tracing::debug!( target: TRACING_TARGET, node_id = %input_node_id, - "Reading from input provider" + "Reading from input" ); - let dal_ctx = Context::default(); - let items = input_provider.read(&dal_ctx).await?; + let items = match input { + PipelineInput::Provider(provider) => { + let dal_ctx = Context::default(); + provider.read(&dal_ctx).await? + } + PipelineInput::Cache(name) => ctx.read_cache(name), + }; // Process each input item through the pipeline for item in items { @@ -180,19 +194,26 @@ impl Engine { } } - // Write all resulting items to output providers + // Write all resulting items to outputs let output_data = ctx.take_current(); if !output_data.is_empty() { - for (output_node_id, output_provider) in &pipeline.output_providers { + for (output_node_id, output) in &pipeline.outputs { tracing::trace!( target: TRACING_TARGET, node_id = %output_node_id, item_count = output_data.len(), - "Writing to output provider" + "Writing to output" ); - let dal_ctx = Context::default(); - output_provider.write(&dal_ctx, output_data.clone()).await?; + match output { + PipelineOutput::Provider(provider) => { + let dal_ctx = Context::default(); + provider.write(&dal_ctx, output_data.clone()).await?; + } + PipelineOutput::Cache(name) => { + ctx.write_cache(name, output_data.clone()); + } + } } } @@ -208,7 +229,7 @@ impl Engine { fn execute_transformer( &self, node_id: NodeId, - _transformer_node: &crate::graph::TransformerNode, + _transformer_config: &crate::graph::TransformerConfig, ctx: &mut ExecutionContext, ) -> WorkflowResult<()> { // TODO: Apply transformation based on transformer_node.config @@ -230,10 +251,26 @@ impl Engine { } } -/// Pre-built pipeline with providers ready for execution. +/// Pre-built pipeline with inputs and outputs ready for execution. struct Pipeline { - input_providers: Vec<(NodeId, InputProvider)>, - output_providers: Vec<(NodeId, OutputProvider)>, + inputs: Vec<(NodeId, PipelineInput)>, + outputs: Vec<(NodeId, PipelineOutput)>, +} + +/// Input source in the pipeline. +enum PipelineInput { + /// Read from a storage provider. + Provider(InputProvider), + /// Read from a named cache slot. + Cache(String), +} + +/// Output destination in the pipeline. +enum PipelineOutput { + /// Write to a storage provider. + Provider(OutputProvider), + /// Write to a named cache slot. + Cache(String), } impl std::fmt::Debug for Engine { diff --git a/crates/nvisy-runtime/src/graph/data.rs b/crates/nvisy-runtime/src/graph/data.rs deleted file mode 100644 index ea538a2..0000000 --- a/crates/nvisy-runtime/src/graph/data.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! Core node data enum. - -use derive_more::From; -use serde::{Deserialize, Serialize}; - -use super::input::InputNode; -use super::output::OutputNode; -use super::transformer::TransformerNode; - -/// Data associated with a workflow node. -/// -/// Nodes are categorized by their role in data flow: -/// - **Input**: Reads/produces data (entry points) -/// - **Transformer**: Processes/transforms data (intermediate) -/// - **Output**: Writes/consumes data (exit points) -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, From)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum NodeData { - /// Data input node, reads or produces data. - Input(InputNode), - /// Data transformer node, processes or transforms data. - Transformer(TransformerNode), - /// Data output node, writes or consumes data. - Output(OutputNode), -} - -impl NodeData { - /// Returns the node's display name if set. - pub fn name(&self) -> Option<&str> { - match self { - NodeData::Input(n) => n.name.as_deref(), - NodeData::Transformer(n) => n.name.as_deref(), - NodeData::Output(n) => n.name.as_deref(), - } - } - - /// Returns the node's description if set. - pub fn description(&self) -> Option<&str> { - match self { - NodeData::Input(n) => n.description.as_deref(), - NodeData::Transformer(n) => n.description.as_deref(), - NodeData::Output(n) => n.description.as_deref(), - } - } - - /// Returns whether this is an input node. - pub const fn is_input(&self) -> bool { - matches!(self, NodeData::Input(_)) - } - - /// Returns whether this is a transformer node. - pub const fn is_transformer(&self) -> bool { - matches!(self, NodeData::Transformer(_)) - } - - /// Returns whether this is an output node. - pub const fn is_output(&self) -> bool { - matches!(self, NodeData::Output(_)) - } -} diff --git a/crates/nvisy-runtime/src/graph/edge.rs b/crates/nvisy-runtime/src/graph/edge.rs index c67f2a9..44d16f6 100644 --- a/crates/nvisy-runtime/src/graph/edge.rs +++ b/crates/nvisy-runtime/src/graph/edge.rs @@ -57,3 +57,14 @@ impl Edge { self } } + +/// Edge data stored in the graph. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] +pub struct EdgeData { + /// Optional port/slot name on the source node. + #[serde(skip_serializing_if = "Option::is_none")] + pub from_port: Option, + /// Optional port/slot name on the target node. + #[serde(skip_serializing_if = "Option::is_none")] + pub to_port: Option, +} diff --git a/crates/nvisy-runtime/src/graph/id.rs b/crates/nvisy-runtime/src/graph/id.rs deleted file mode 100644 index 9f6e9c3..0000000 --- a/crates/nvisy-runtime/src/graph/id.rs +++ /dev/null @@ -1,61 +0,0 @@ -//! Node identifier type. - -use std::str::FromStr; - -use derive_more::{Debug, Display, From, Into}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Unique identifier for a node in a workflow graph. -#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[derive(Debug, Display, From, Into)] -#[debug("{_0}")] -#[display("{_0}")] -#[serde(transparent)] -pub struct NodeId(Uuid); - -impl NodeId { - /// Creates a new random node ID. - #[inline] - pub fn new() -> Self { - Self(Uuid::now_v7()) - } - - /// Creates a node ID from an existing UUID. - #[inline] - pub const fn from_uuid(uuid: Uuid) -> Self { - Self(uuid) - } - - /// Returns the underlying UUID. - #[inline] - pub const fn as_uuid(&self) -> Uuid { - self.0 - } - - /// Returns the UUID as bytes. - #[inline] - pub const fn as_bytes(&self) -> &[u8; 16] { - self.0.as_bytes() - } -} - -impl Default for NodeId { - fn default() -> Self { - Self::new() - } -} - -impl FromStr for NodeId { - type Err = uuid::Error; - - fn from_str(s: &str) -> Result { - Ok(Self(Uuid::from_str(s)?)) - } -} - -impl AsRef for NodeId { - fn as_ref(&self) -> &Uuid { - &self.0 - } -} diff --git a/crates/nvisy-runtime/src/graph/input/mod.rs b/crates/nvisy-runtime/src/graph/input/mod.rs index 289a12b..df6e49c 100644 --- a/crates/nvisy-runtime/src/graph/input/mod.rs +++ b/crates/nvisy-runtime/src/graph/input/mod.rs @@ -1,41 +1,82 @@ -//! Input node types for reading data from storage backends. +//! Input node types for reading data from storage backends or cache. +use derive_more::From; use nvisy_dal::DataTypeId; use serde::{Deserialize, Serialize}; +use super::route::CacheSlot; use crate::provider::InputProviderParams; +/// Source of input data. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[serde(tag = "source", rename_all = "snake_case")] +pub enum InputSource { + /// Read from a storage provider. + Provider(InputProviderParams), + /// Read from a cache slot. + Cache(CacheSlot), +} + /// A data input node that reads or produces data. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct InputNode { - /// Display name of the input. - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - /// Description of what this input does. - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, - /// Provider parameters (credentials referenced by ID). - pub provider: InputProviderParams, + /// Input source (provider or cache). + #[serde(flatten)] + pub source: InputSource, } impl InputNode { - /// Creates a new input node. - pub fn new(provider: InputProviderParams) -> Self { + /// Creates a new input node from a provider. + pub fn from_provider(provider: InputProviderParams) -> Self { Self { - name: None, - description: None, - provider, + source: InputSource::Provider(provider), + } + } + + /// Creates a new input node from a cache slot. + pub fn from_cache(slot: CacheSlot) -> Self { + Self { + source: InputSource::Cache(slot), + } + } + + /// Returns the output data type based on the source kind. + /// + /// For cache slots, the type is unknown at compile time. + pub fn output_type(&self) -> Option { + match &self.source { + InputSource::Provider(p) => Some(p.output_type()), + InputSource::Cache(_) => None, } } - /// Returns the output data type based on the provider kind. - pub const fn output_type(&self) -> DataTypeId { - self.provider.output_type() + /// Returns whether this input reads from a provider. + pub const fn is_provider(&self) -> bool { + matches!(self.source, InputSource::Provider(_)) + } + + /// Returns whether this input reads from a cache slot. + pub const fn is_cache(&self) -> bool { + matches!(self.source, InputSource::Cache(_)) + } + + /// Returns the cache slot name if this is a cache input. + pub fn cache_slot(&self) -> Option<&str> { + match &self.source { + InputSource::Cache(slot) => Some(&slot.slot), + _ => None, + } } } impl From for InputNode { fn from(provider: InputProviderParams) -> Self { - Self::new(provider) + Self::from_provider(provider) + } +} + +impl From for InputNode { + fn from(slot: CacheSlot) -> Self { + Self::from_cache(slot) } } diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index b316fbc..c77629e 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -7,19 +7,21 @@ //! - [`EdgeData`]: Data stored on edges in the underlying petgraph //! - [`NodeId`]: Unique identifier for nodes //! - [`NodeData`]: Data associated with each node (Input, Transformer, Output) +//! - [`CacheSlot`]: Named cache slot for in-memory data passing +//! - [`SwitchNode`]: Conditional routing based on data properties -mod data; mod edge; -mod id; pub mod input; +mod node; pub mod output; -pub mod transformer; +pub mod route; +pub mod transform; mod workflow; -pub use data::NodeData; -pub use edge::Edge; -pub use id::NodeId; -pub use input::InputNode; -pub use output::OutputNode; -pub use transformer::{TransformerConfig, TransformerNode}; -pub use workflow::{EdgeData, WorkflowGraph, WorkflowMetadata}; +pub use edge::{Edge, EdgeData}; +pub use input::{InputNode, InputSource}; +pub use node::{Node, NodeCommon, NodeData, NodeId}; +pub use output::{OutputDestination, OutputNode}; +pub use route::{CacheSlot, SwitchBranch, SwitchCondition, SwitchNode}; +pub use transform::TransformerConfig; +pub use workflow::{WorkflowGraph, WorkflowMetadata}; diff --git a/crates/nvisy-runtime/src/graph/node.rs b/crates/nvisy-runtime/src/graph/node.rs new file mode 100644 index 0000000..945e279 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/node.rs @@ -0,0 +1,139 @@ +//! Generic node wrapper, node identifier, and node data types. + +use std::str::FromStr; + +use derive_more::{Debug, Display, From, Into}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::input::InputNode; +use super::output::OutputNode; +use super::transform::TransformerConfig; + +/// Unique identifier for a node in a workflow graph. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Debug, Display, From, Into)] +#[debug("{_0}")] +#[display("{_0}")] +#[serde(transparent)] +pub struct NodeId(Uuid); + +impl NodeId { + /// Creates a new random node ID. + #[inline] + pub fn new() -> Self { + Self(Uuid::now_v7()) + } + + /// Creates a node ID from an existing UUID. + #[inline] + pub const fn from_uuid(uuid: Uuid) -> Self { + Self(uuid) + } + + /// Returns the underlying UUID. + #[inline] + pub const fn as_uuid(&self) -> Uuid { + self.0 + } + + /// Returns the UUID as bytes. + #[inline] + pub const fn as_bytes(&self) -> &[u8; 16] { + self.0.as_bytes() + } +} + +impl Default for NodeId { + fn default() -> Self { + Self::new() + } +} + +impl FromStr for NodeId { + type Err = uuid::Error; + + fn from_str(s: &str) -> Result { + Ok(Self(Uuid::from_str(s)?)) + } +} + +impl AsRef for NodeId { + fn as_ref(&self) -> &Uuid { + &self.0 + } +} + +/// A generic node wrapper that adds optional name and description to any inner type. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct NodeCommon { + /// Display name of the node. + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + /// Description of what this node does. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Inner node configuration. + #[serde(flatten)] + pub inner: T, +} + +impl NodeCommon { + /// Creates a new node with the given inner value. + pub fn new(inner: T) -> Self { + Self { + name: None, + description: None, + inner, + } + } + + /// Sets the display name. + pub fn with_name(mut self, name: impl Into) -> Self { + self.name = Some(name.into()); + self + } + + /// Sets the description. + pub fn with_description(mut self, description: impl Into) -> Self { + self.description = Some(description.into()); + self + } +} + +/// A workflow node with id, name, description, and node data. +pub type Node = NodeCommon; + +/// Data associated with a workflow node. +/// +/// Nodes are categorized by their role in data flow: +/// - **Input**: Reads/produces data (entry points) +/// - **Transformer**: Processes/transforms data (intermediate) +/// - **Output**: Writes/consumes data (exit points) +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, From)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum NodeData { + /// Data input node, reads or produces data. + Input(InputNode), + /// Data transformer node, processes or transforms data. + Transformer(TransformerConfig), + /// Data output node, writes or consumes data. + Output(OutputNode), +} + +impl NodeData { + /// Returns whether this is an input node. + pub const fn is_input(&self) -> bool { + matches!(self, NodeData::Input(_)) + } + + /// Returns whether this is a transformer node. + pub const fn is_transformer(&self) -> bool { + matches!(self, NodeData::Transformer(_)) + } + + /// Returns whether this is an output node. + pub const fn is_output(&self) -> bool { + matches!(self, NodeData::Output(_)) + } +} diff --git a/crates/nvisy-runtime/src/graph/output/mod.rs b/crates/nvisy-runtime/src/graph/output/mod.rs index 8890ea3..0fa5f4a 100644 --- a/crates/nvisy-runtime/src/graph/output/mod.rs +++ b/crates/nvisy-runtime/src/graph/output/mod.rs @@ -1,41 +1,82 @@ -//! Output node types for writing data to storage backends and vector databases. +//! Output node types for writing data to storage backends, vector databases, or cache. +use derive_more::From; use nvisy_dal::DataTypeId; use serde::{Deserialize, Serialize}; +use super::route::CacheSlot; use crate::provider::OutputProviderParams; +/// Destination for output data. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[serde(tag = "destination", rename_all = "snake_case")] +pub enum OutputDestination { + /// Write to a storage provider or vector database. + Provider(OutputProviderParams), + /// Write to a cache slot. + Cache(CacheSlot), +} + /// A data output node that writes or consumes data. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct OutputNode { - /// Display name of the output. - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - /// Description of what this output does. - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, - /// Provider parameters (credentials referenced by ID). - pub provider: OutputProviderParams, + /// Output destination (provider or cache). + #[serde(flatten)] + pub destination: OutputDestination, } impl OutputNode { - /// Creates a new output node. - pub fn new(provider: OutputProviderParams) -> Self { + /// Creates a new output node from a provider. + pub fn from_provider(provider: OutputProviderParams) -> Self { Self { - name: None, - description: None, - provider, + destination: OutputDestination::Provider(provider), + } + } + + /// Creates a new output node from a cache slot. + pub fn from_cache(slot: CacheSlot) -> Self { + Self { + destination: OutputDestination::Cache(slot), + } + } + + /// Returns the expected input data type based on the destination kind. + /// + /// For cache slots, the type is unknown at compile time. + pub fn input_type(&self) -> Option { + match &self.destination { + OutputDestination::Provider(p) => Some(p.output_type()), + OutputDestination::Cache(_) => None, } } - /// Returns the expected input data type based on the provider kind. - pub const fn input_type(&self) -> DataTypeId { - self.provider.output_type() + /// Returns whether this output writes to a provider. + pub const fn is_provider(&self) -> bool { + matches!(self.destination, OutputDestination::Provider(_)) + } + + /// Returns whether this output writes to a cache slot. + pub const fn is_cache(&self) -> bool { + matches!(self.destination, OutputDestination::Cache(_)) + } + + /// Returns the cache slot name if this is a cache output. + pub fn cache_slot(&self) -> Option<&str> { + match &self.destination { + OutputDestination::Cache(slot) => Some(&slot.slot), + _ => None, + } } } impl From for OutputNode { fn from(provider: OutputProviderParams) -> Self { - Self::new(provider) + Self::from_provider(provider) + } +} + +impl From for OutputNode { + fn from(slot: CacheSlot) -> Self { + Self::from_cache(slot) } } diff --git a/crates/nvisy-runtime/src/graph/route/cache.rs b/crates/nvisy-runtime/src/graph/route/cache.rs new file mode 100644 index 0000000..4b28dfc --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/cache.rs @@ -0,0 +1,33 @@ +//! Cache slot node type for in-memory data passing. + +use serde::{Deserialize, Serialize}; + +/// A cache slot node that can store and retrieve data within a workflow. +/// +/// Cache slots act as named temporary storage that can be used as both +/// input (read from cache) and output (write to cache) within the same workflow. +/// This enables data sharing between different branches of a workflow graph. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct CacheSlot { + /// Slot identifier (used as the key for storage/retrieval). + pub slot: String, + /// Priority for ordering when multiple slots are available. + #[serde(skip_serializing_if = "Option::is_none")] + pub priority: Option, +} + +impl CacheSlot { + /// Creates a new cache slot with the given slot name. + pub fn new(slot: impl Into) -> Self { + Self { + slot: slot.into(), + priority: None, + } + } + + /// Sets the priority. + pub fn with_priority(mut self, priority: u32) -> Self { + self.priority = Some(priority); + self + } +} diff --git a/crates/nvisy-runtime/src/graph/route/mod.rs b/crates/nvisy-runtime/src/graph/route/mod.rs new file mode 100644 index 0000000..6659fb7 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/mod.rs @@ -0,0 +1,11 @@ +//! Routing nodes for conditional data flow. +//! +//! This module provides nodes for controlling data flow in workflows: +//! - [`CacheSlot`]: Named temporary storage for data sharing between branches +//! - [`SwitchNode`]: Conditional routing based on data properties + +mod cache; +mod switch; + +pub use cache::CacheSlot; +pub use switch::{ContentTypeCategory, DateField, SwitchBranch, SwitchCondition, SwitchNode}; diff --git a/crates/nvisy-runtime/src/graph/route/switch.rs b/crates/nvisy-runtime/src/graph/route/switch.rs new file mode 100644 index 0000000..fa80510 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/switch.rs @@ -0,0 +1,143 @@ +//! Switch node for conditional routing. + +use serde::{Deserialize, Serialize}; + +/// A switch node that routes data to different branches based on conditions. +/// +/// Switch nodes evaluate conditions against incoming data and route it +/// to the appropriate output branch. Each branch has a condition and a +/// target cache slot or output. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SwitchNode { + /// Branches to evaluate in order. + pub branches: Vec, + /// Default branch if no conditions match. + #[serde(skip_serializing_if = "Option::is_none")] + pub default: Option, +} + +/// A single branch in a switch node. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SwitchBranch { + /// Condition to evaluate. + pub condition: SwitchCondition, + /// Target cache slot name to route matching data. + pub target: String, +} + +/// Condition for switch branch evaluation. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum SwitchCondition { + /// Match by content type category. + ContentType { + /// Content type category to match. + category: ContentTypeCategory, + }, + /// Match when file size exceeds threshold. + FileSizeAbove { + /// Size threshold in bytes. + threshold_bytes: u64, + }, + /// Match when page count exceeds threshold. + PageCountAbove { + /// Page count threshold. + threshold_pages: u32, + }, + /// Match when duration exceeds threshold (for audio/video). + DurationAbove { + /// Duration threshold in seconds. + threshold_seconds: u64, + }, + /// Match by detected content language. + Language { + /// Language code to match (e.g., "en", "es", "fr"). + language_code: String, + /// Minimum confidence threshold (0.0 to 1.0). + #[serde(default = "default_confidence")] + min_confidence: f32, + }, + /// Match when file date is newer than threshold. + DateNewerThan { + /// Which date field to use. + #[serde(default)] + date_field: DateField, + /// Threshold as ISO 8601 datetime or relative duration (e.g., "7d", "30d", "1y"). + threshold: String, + }, + /// Match by filename regex pattern. + FileNameMatches { + /// Regex pattern to match against filename. + pattern: String, + }, + /// Match by file extension. + FileExtension { + /// Extension to match (without dot, e.g., "pdf", "docx"). + extension: String, + }, +} + +/// Content type categories for routing. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ContentTypeCategory { + /// Text files (plain text, markdown, etc.). + Text, + /// Image files (JPEG, PNG, GIF, etc.). + Image, + /// Audio files (MP3, WAV, FLAC, etc.). + Audio, + /// Video files (MP4, WebM, etc.). + Video, + /// Document files (PDF, DOCX, etc.). + Document, + /// Archive files (ZIP, TAR, etc.). + Archive, + /// Spreadsheet files (XLSX, CSV, etc.). + Spreadsheet, + /// Presentation files (PPTX, etc.). + Presentation, + /// Code/source files. + Code, +} + +/// Date field to use for routing. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DateField { + /// File creation date. + #[default] + Created, + /// File modification date. + Modified, +} + +fn default_confidence() -> f32 { + 0.8 +} + +impl SwitchNode { + /// Creates a new switch node with the given branches. + pub fn new(branches: Vec) -> Self { + Self { + branches, + default: None, + } + } + + /// Sets the default target for unmatched data. + pub fn with_default(mut self, target: impl Into) -> Self { + self.default = Some(target.into()); + self + } +} + +impl SwitchBranch { + /// Creates a new branch with the given condition and target. + pub fn new(condition: SwitchCondition, target: impl Into) -> Self { + Self { + condition, + target: target.into(), + } + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/chunk.rs b/crates/nvisy-runtime/src/graph/transform/chunk.rs new file mode 100644 index 0000000..88d5d74 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/chunk.rs @@ -0,0 +1,63 @@ +//! Chunk transformer configuration. + +use serde::{Deserialize, Serialize}; + +/// Configuration for chunking content. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct ChunkConfig { + /// Chunking strategy. + #[serde(flatten)] + pub strategy: ChunkStrategy, +} + +/// Chunking strategy. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "strategy", rename_all = "snake_case")] +pub enum ChunkStrategy { + /// Chunk by character count. + Character { + /// Maximum chunk size in characters. + max_characters: u32, + /// Overlap between chunks in characters. + #[serde(default)] + overlap: u32, + }, + /// Chunk by page boundaries. + Page { + /// Maximum pages per chunk. + #[serde(default = "default_max_pages")] + max_pages: u32, + /// Overlap between chunks in pages. + #[serde(default)] + overlap: u32, + }, + /// Chunk by document sections/headings. + Section { + /// Maximum sections per chunk. + #[serde(default = "default_max_sections")] + max_sections: u32, + /// Overlap between chunks in sections. + #[serde(default)] + overlap: u32, + }, + /// Chunk by semantic similarity. + Similarity { + /// Maximum chunk size in characters. + max_characters: u32, + /// Similarity score threshold (0.0 to 1.0). + #[serde(default = "default_score")] + score: f32, + }, +} + +fn default_max_pages() -> u32 { + 1 +} + +fn default_max_sections() -> u32 { + 1 +} + +fn default_score() -> f32 { + 0.5 +} diff --git a/crates/nvisy-runtime/src/graph/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs new file mode 100644 index 0000000..cbb841a --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/embedding.rs @@ -0,0 +1,12 @@ +//! Embedding transformer configuration. + +use nvisy_rig::provider::EmbeddingModel; +use serde::{Deserialize, Serialize}; + +/// Configuration for generating embeddings. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct EmbeddingConfig { + /// Model to use for embedding generation. + #[serde(flatten)] + pub model: EmbeddingModel, +} diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs new file mode 100644 index 0000000..5e8272a --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -0,0 +1,14 @@ +//! Enrich transformer configuration. + +use nvisy_rig::provider::CompletionModel; +use serde::{Deserialize, Serialize}; + +/// Configuration for enriching data. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct EnrichConfig { + /// Model to use for enrichment. + #[serde(flatten)] + pub model: CompletionModel, + /// Prompt template for enrichment. + pub prompt: String, +} diff --git a/crates/nvisy-runtime/src/graph/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs new file mode 100644 index 0000000..fd40fa9 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/mod.rs @@ -0,0 +1,27 @@ +//! Transformer node types for processing and transforming data. + +mod chunk; +mod embedding; +mod enrich; +mod partition; + +pub use chunk::{ChunkConfig, ChunkStrategy}; +pub use embedding::EmbeddingConfig; +pub use enrich::EnrichConfig; +pub use partition::PartitionConfig; + +use serde::{Deserialize, Serialize}; + +/// Transformer node configuration. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum TransformerConfig { + /// Partition data into multiple outputs. + Partition(PartitionConfig), + /// Chunk content into smaller pieces. + Chunk(ChunkConfig), + /// Enrich data with additional information. + Enrich(EnrichConfig), + /// Generate vector embeddings. + Embedding(EmbeddingConfig), +} diff --git a/crates/nvisy-runtime/src/graph/transform/partition.rs b/crates/nvisy-runtime/src/graph/transform/partition.rs new file mode 100644 index 0000000..619c457 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/partition.rs @@ -0,0 +1,10 @@ +//! Partition transformer configuration. + +use serde::{Deserialize, Serialize}; + +/// Configuration for partitioning data. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PartitionConfig { + /// Field to partition by. + pub field: String, +} diff --git a/crates/nvisy-runtime/src/graph/transformer/chunking.rs b/crates/nvisy-runtime/src/graph/transformer/chunking.rs deleted file mode 100644 index d43f2de..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/chunking.rs +++ /dev/null @@ -1,288 +0,0 @@ -//! Chunking strategy configurations for text splitting. - -use serde::{Deserialize, Serialize}; - -/// Chunking strategy configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "strategy", rename_all = "snake_case")] -pub enum ChunkingStrategy { - /// Split by character count. - Character(CharacterChunkingConfig), - /// Split by sentences. - Sentence(SentenceChunkingConfig), - /// Split by paragraphs. - Paragraph(ParagraphChunkingConfig), - /// Split by page boundaries (for PDFs). - Page(PageChunkingConfig), - /// Split by document structure/titles. - Title(TitleChunkingConfig), - /// Recursive splitting with fallback strategies. - Recursive(RecursiveChunkingConfig), - /// Semantic/similarity-based chunking. - Semantic(SemanticChunkingConfig), - /// Contextual chunking with LLM-assisted boundaries. - Contextual(ContextualChunkingConfig), -} - -/// Character-based chunking configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct CharacterChunkingConfig { - /// Maximum chunk size in characters. - pub max_size: usize, - /// Overlap between chunks in characters. - #[serde(default)] - pub overlap: usize, - /// Separator to split on (defaults to whitespace). - #[serde(skip_serializing_if = "Option::is_none")] - pub separator: Option, - /// Whether to trim whitespace from chunks. - #[serde(default = "default_true")] - pub trim: bool, -} - -impl CharacterChunkingConfig { - /// Creates a new character chunking config. - pub fn new(max_size: usize) -> Self { - Self { - max_size, - overlap: 0, - separator: None, - trim: true, - } - } - - /// Sets the overlap. - pub fn with_overlap(mut self, overlap: usize) -> Self { - self.overlap = overlap; - self - } - - /// Sets the separator. - pub fn with_separator(mut self, separator: impl Into) -> Self { - self.separator = Some(separator.into()); - self - } -} - -/// Sentence-based chunking configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SentenceChunkingConfig { - /// Maximum number of sentences per chunk. - pub max_sentences: usize, - /// Overlap in sentences. - #[serde(default)] - pub overlap_sentences: usize, - /// Maximum chunk size in characters (soft limit). - #[serde(skip_serializing_if = "Option::is_none")] - pub max_size: Option, -} - -impl SentenceChunkingConfig { - /// Creates a new sentence chunking config. - pub fn new(max_sentences: usize) -> Self { - Self { - max_sentences, - overlap_sentences: 0, - max_size: None, - } - } - - /// Sets the overlap. - pub fn with_overlap(mut self, overlap: usize) -> Self { - self.overlap_sentences = overlap; - self - } -} - -/// Paragraph-based chunking configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct ParagraphChunkingConfig { - /// Maximum number of paragraphs per chunk. - pub max_paragraphs: usize, - /// Maximum chunk size in characters (soft limit). - #[serde(skip_serializing_if = "Option::is_none")] - pub max_size: Option, - /// Minimum paragraph length to consider (filters short lines). - #[serde(default)] - pub min_paragraph_length: usize, -} - -impl ParagraphChunkingConfig { - /// Creates a new paragraph chunking config. - pub fn new(max_paragraphs: usize) -> Self { - Self { - max_paragraphs, - max_size: None, - min_paragraph_length: 0, - } - } -} - -/// Page-based chunking configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PageChunkingConfig { - /// Maximum number of pages per chunk. - #[serde(default = "default_one")] - pub max_pages: usize, - /// Whether to preserve page boundaries exactly. - #[serde(default = "default_true")] - pub preserve_boundaries: bool, -} - -impl Default for PageChunkingConfig { - fn default() -> Self { - Self { - max_pages: 1, - preserve_boundaries: true, - } - } -} - -/// Title/heading-based chunking configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct TitleChunkingConfig { - /// Heading levels to split on (1 = h1, 2 = h2, etc.). - #[serde(default = "default_heading_levels")] - pub heading_levels: Vec, - /// Whether to include the heading in each chunk. - #[serde(default = "default_true")] - pub include_heading: bool, - /// Maximum chunk size in characters (soft limit). - #[serde(skip_serializing_if = "Option::is_none")] - pub max_size: Option, -} - -impl Default for TitleChunkingConfig { - fn default() -> Self { - Self { - heading_levels: default_heading_levels(), - include_heading: true, - max_size: None, - } - } -} - -/// Recursive chunking configuration with fallback strategies. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct RecursiveChunkingConfig { - /// Maximum chunk size in characters. - pub max_size: usize, - /// Overlap between chunks. - #[serde(default)] - pub overlap: usize, - /// Separators to try in order (from most to least preferred). - #[serde(default = "default_recursive_separators")] - pub separators: Vec, -} - -impl RecursiveChunkingConfig { - /// Creates a new recursive chunking config. - pub fn new(max_size: usize) -> Self { - Self { - max_size, - overlap: 0, - separators: default_recursive_separators(), - } - } - - /// Sets the overlap. - pub fn with_overlap(mut self, overlap: usize) -> Self { - self.overlap = overlap; - self - } - - /// Sets custom separators. - pub fn with_separators(mut self, separators: Vec) -> Self { - self.separators = separators; - self - } -} - -/// Semantic/similarity-based chunking configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SemanticChunkingConfig { - /// Similarity threshold for splitting (0.0-1.0). - /// Lower values = more aggressive splitting. - #[serde(default = "default_similarity_threshold")] - pub similarity_threshold: f32, - /// Minimum chunk size in characters. - #[serde(default = "default_min_chunk_size")] - pub min_size: usize, - /// Maximum chunk size in characters. - #[serde(default = "default_max_chunk_size")] - pub max_size: usize, - /// Embedding model to use for similarity. - #[serde(skip_serializing_if = "Option::is_none")] - pub embedding_model: Option, -} - -impl Default for SemanticChunkingConfig { - fn default() -> Self { - Self { - similarity_threshold: default_similarity_threshold(), - min_size: default_min_chunk_size(), - max_size: default_max_chunk_size(), - embedding_model: None, - } - } -} - -/// Contextual chunking using LLM to determine boundaries. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct ContextualChunkingConfig { - /// LLM model to use for boundary detection. - pub model: String, - /// Maximum chunk size in characters. - #[serde(default = "default_max_chunk_size")] - pub max_size: usize, - /// Custom prompt for boundary detection. - #[serde(skip_serializing_if = "Option::is_none")] - pub custom_prompt: Option, -} - -impl ContextualChunkingConfig { - /// Creates a new contextual chunking config. - pub fn new(model: impl Into) -> Self { - Self { - model: model.into(), - max_size: default_max_chunk_size(), - custom_prompt: None, - } - } -} - -// Default value functions - -fn default_true() -> bool { - true -} - -fn default_one() -> usize { - 1 -} - -fn default_heading_levels() -> Vec { - vec![1, 2, 3] -} - -fn default_recursive_separators() -> Vec { - vec![ - "\n\n".to_string(), // Paragraphs - "\n".to_string(), // Lines - ". ".to_string(), // Sentences - ", ".to_string(), // Clauses - " ".to_string(), // Words - ] -} - -fn default_similarity_threshold() -> f32 { - 0.5 -} - -fn default_min_chunk_size() -> usize { - 100 -} - -fn default_max_chunk_size() -> usize { - 1000 -} diff --git a/crates/nvisy-runtime/src/graph/transformer/config.rs b/crates/nvisy-runtime/src/graph/transformer/config.rs deleted file mode 100644 index 3ee45eb..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/config.rs +++ /dev/null @@ -1,93 +0,0 @@ -//! Transformer node configuration types. - -use serde::{Deserialize, Serialize}; - -use super::document::{ - LanguageDetectionConfig, SentimentAnalysisConfig, SummarizationConfig, - TopicClassificationConfig, TranslationConfig, -}; -use super::embedding::GenerateEmbeddingsConfig; -use super::extraction::{ - CitationParsingConfig, EntityRelationExtractionConfig, ExtractTextConfig, - ImageDescriptionConfig, MetadataExtractionConfig, NamedEntityRecognitionConfig, - TableDescriptionConfig, TableToHtmlConfig, -}; -use super::processing::{ - ChunkContentConfig, ConvertFormatConfig, FilterConfig, LlmTransformConfig, MergeConfig, - ValidateConfig, -}; -use super::quality::{DataNormalizationConfig, DeduplicationConfig, TextCleaningConfig}; -use super::routing::{ - ContentTypeRouterConfig, DurationRouterConfig, FileDateRouterConfig, FileNameRouterConfig, - FileSizeRouterConfig, LanguageRouterConfig, PageCountRouterConfig, -}; - -/// Transformer node configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub enum TransformerConfig { - /// Route by detected content/mime type. - ContentTypeRouter(ContentTypeRouterConfig), - /// Route by file size threshold. - FileSizeRouter(FileSizeRouterConfig), - /// Route by document page count threshold. - PageCountRouter(PageCountRouterConfig), - /// Route by audio/video duration threshold. - DurationRouter(DurationRouterConfig), - /// Route by detected language. - LanguageRouter(LanguageRouterConfig), - /// Route by file date (created/modified). - FileDateRouter(FileDateRouterConfig), - /// Route by filename regex patterns. - FileNameRouter(FileNameRouterConfig), - - /// Detect language of text content. - LanguageDetection(LanguageDetectionConfig), - /// Translate text to target language. - Translation(TranslationConfig), - /// Analyze sentiment of text content. - SentimentAnalysis(SentimentAnalysisConfig), - /// Classify content into topics. - TopicClassification(TopicClassificationConfig), - /// Generate summary of content. - Summarization(SummarizationConfig), - - /// Extract text from documents (PDF, images via OCR). - ExtractText(ExtractTextConfig), - /// Extract metadata from documents. - MetadataExtraction(MetadataExtractionConfig), - /// Extract named entities (people, organizations, locations, dates). - NamedEntityRecognition(NamedEntityRecognitionConfig), - /// Extract relationships between entities. - EntityRelationExtraction(EntityRelationExtractionConfig), - /// Generate descriptions for images. - ImageDescription(ImageDescriptionConfig), - /// Generate descriptions for tables. - TableDescription(TableDescriptionConfig), - /// Convert tables to HTML. - TableToHtml(TableToHtmlConfig), - /// Parse and normalize citations and references. - CitationParsing(CitationParsingConfig), - - /// Normalize data formats (dates, times, units). - DataNormalization(DataNormalizationConfig), - /// Detect and remove duplicate content. - Deduplication(DeduplicationConfig), - /// Clean and correct text (spelling, grammar, formatting, noise removal). - TextCleaning(TextCleaningConfig), - - /// Split content into chunks. - ChunkContent(ChunkContentConfig), - /// Generate vector embeddings. - GenerateEmbeddings(GenerateEmbeddingsConfig), - /// Transform using an LLM. - LlmTransform(LlmTransformConfig), - /// Convert file format. - ConvertFormat(ConvertFormatConfig), - /// Validate content against schema. - Validate(ValidateConfig), - /// Filter data based on conditions. - Filter(FilterConfig), - /// Merge multiple inputs. - Merge(MergeConfig), -} diff --git a/crates/nvisy-runtime/src/graph/transformer/document.rs b/crates/nvisy-runtime/src/graph/transformer/document.rs deleted file mode 100644 index 762a4ad..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/document.rs +++ /dev/null @@ -1,104 +0,0 @@ -//! Document understanding transformer configurations. - -use serde::{Deserialize, Serialize}; - -/// Configuration for language detection. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct LanguageDetectionConfig { - /// Minimum confidence threshold (0.0 to 1.0). - #[serde(skip_serializing_if = "Option::is_none")] - pub min_confidence: Option, -} - -/// Configuration for translation. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct TranslationConfig { - /// Target language code (e.g., "en", "es", "fr"). - pub target_language: String, - /// Source language code (auto-detect if not specified). - #[serde(skip_serializing_if = "Option::is_none")] - pub source_language: Option, - /// Model to use for translation. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -/// Configuration for sentiment analysis. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct SentimentAnalysisConfig { - /// Granularity of analysis. - #[serde(default)] - pub granularity: SentimentGranularity, - /// Model to use for analysis. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -/// Granularity for sentiment analysis. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SentimentGranularity { - /// Analyze entire document. - #[default] - Document, - /// Analyze each paragraph. - Paragraph, - /// Analyze each sentence. - Sentence, -} - -/// Configuration for topic classification. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct TopicClassificationConfig { - /// Predefined topics to classify into (empty for auto-discovery). - #[serde(default)] - pub topics: Vec, - /// Maximum number of topics to assign. - #[serde(skip_serializing_if = "Option::is_none")] - pub max_topics: Option, - /// Model to use for classification. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -/// Configuration for summarization. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct SummarizationConfig { - /// Target summary length. - #[serde(default)] - pub length: SummaryLength, - /// Summary style. - #[serde(default)] - pub style: SummaryStyle, - /// Model to use for summarization. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -/// Target length for summaries. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SummaryLength { - /// Brief summary (1-2 sentences). - Brief, - /// Standard summary. - #[default] - Standard, - /// Detailed summary. - Detailed, - /// Custom max tokens. - Custom(usize), -} - -/// Style for summaries. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum SummaryStyle { - /// Extractive summary (key sentences). - Extractive, - /// Abstractive summary (rewritten). - #[default] - Abstractive, - /// Bullet points. - BulletPoints, -} diff --git a/crates/nvisy-runtime/src/graph/transformer/embedding.rs b/crates/nvisy-runtime/src/graph/transformer/embedding.rs deleted file mode 100644 index 51b3c6e..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/embedding.rs +++ /dev/null @@ -1,270 +0,0 @@ -//! Embedding generation configurations. - -use serde::{Deserialize, Serialize}; - -use super::chunking::ChunkingStrategy; - -/// Configuration for embedding generation. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct GenerateEmbeddingsConfig { - /// Embedding provider and model. - pub provider: EmbeddingProvider, - /// Chunking strategy (if content should be chunked before embedding). - #[serde(skip_serializing_if = "Option::is_none")] - pub chunking: Option, - /// Batch size for embedding requests. - #[serde(skip_serializing_if = "Option::is_none")] - pub batch_size: Option, -} - -impl GenerateEmbeddingsConfig { - /// Creates a new embedding config with the given provider. - pub fn new(provider: EmbeddingProvider) -> Self { - Self { - provider, - chunking: None, - batch_size: None, - } - } - - /// Sets the chunking strategy. - pub fn with_chunking(mut self, chunking: ChunkingStrategy) -> Self { - self.chunking = Some(chunking); - self - } - - /// Sets the batch size. - pub fn with_batch_size(mut self, batch_size: usize) -> Self { - self.batch_size = Some(batch_size); - self - } -} - -/// Embedding provider configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum EmbeddingProvider { - /// OpenAI embeddings. - OpenAi(OpenAiEmbeddingConfig), - /// Ollama local embeddings. - Ollama(OllamaEmbeddingConfig), - /// Cohere embeddings. - Cohere(CohereEmbeddingConfig), - /// Google Gemini embeddings. - Gemini(GeminiEmbeddingConfig), -} - -/// OpenAI embedding configuration. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct OpenAiEmbeddingConfig { - /// Model to use. - #[serde(default)] - pub model: OpenAiEmbeddingModel, - /// Embedding dimensions (for models that support it). - #[serde(skip_serializing_if = "Option::is_none")] - pub dimensions: Option, -} - -impl OpenAiEmbeddingConfig { - /// Creates a new OpenAI embedding config with the given model. - pub fn new(model: OpenAiEmbeddingModel) -> Self { - Self { - model, - dimensions: None, - } - } - - /// Sets custom dimensions. - pub fn with_dimensions(mut self, dimensions: usize) -> Self { - self.dimensions = Some(dimensions); - self - } -} - -/// OpenAI embedding models. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum OpenAiEmbeddingModel { - /// text-embedding-3-small (1536 dimensions, cheapest). - #[default] - TextEmbedding3Small, - /// text-embedding-3-large (3072 dimensions, best quality). - TextEmbedding3Large, - /// text-embedding-ada-002 (1536 dimensions, legacy). - TextEmbeddingAda002, -} - -impl OpenAiEmbeddingModel { - /// Returns the model identifier string. - pub fn as_str(&self) -> &'static str { - match self { - Self::TextEmbedding3Small => "text-embedding-3-small", - Self::TextEmbedding3Large => "text-embedding-3-large", - Self::TextEmbeddingAda002 => "text-embedding-ada-002", - } - } - - /// Returns the default dimensions for this model. - pub fn default_dimensions(&self) -> usize { - match self { - Self::TextEmbedding3Small => 1536, - Self::TextEmbedding3Large => 3072, - Self::TextEmbeddingAda002 => 1536, - } - } -} - -/// Ollama embedding configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OllamaEmbeddingConfig { - /// Model name. - #[serde(default = "default_ollama_model")] - pub model: String, - /// Ollama server base URL. - #[serde(skip_serializing_if = "Option::is_none")] - pub base_url: Option, -} - -impl Default for OllamaEmbeddingConfig { - fn default() -> Self { - Self { - model: default_ollama_model(), - base_url: None, - } - } -} - -impl OllamaEmbeddingConfig { - /// Creates a new Ollama embedding config with the given model. - pub fn new(model: impl Into) -> Self { - Self { - model: model.into(), - base_url: None, - } - } - - /// Sets the base URL. - pub fn with_base_url(mut self, base_url: impl Into) -> Self { - self.base_url = Some(base_url.into()); - self - } -} - -/// Cohere embedding configuration. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct CohereEmbeddingConfig { - /// Model to use. - #[serde(default)] - pub model: CohereEmbeddingModel, - /// Input type for embeddings. - #[serde(default)] - pub input_type: CohereInputType, -} - -impl CohereEmbeddingConfig { - /// Creates a new Cohere embedding config with the given model. - pub fn new(model: CohereEmbeddingModel) -> Self { - Self { - model, - input_type: CohereInputType::default(), - } - } - - /// Sets the input type. - pub fn with_input_type(mut self, input_type: CohereInputType) -> Self { - self.input_type = input_type; - self - } -} - -/// Cohere embedding models. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum CohereEmbeddingModel { - /// embed-english-v3.0 (1024 dimensions). - #[default] - EmbedEnglishV3, - /// embed-multilingual-v3.0 (1024 dimensions). - EmbedMultilingualV3, - /// embed-english-light-v3.0 (384 dimensions). - EmbedEnglishLightV3, - /// embed-multilingual-light-v3.0 (384 dimensions). - EmbedMultilingualLightV3, -} - -impl CohereEmbeddingModel { - /// Returns the model identifier string. - pub fn as_str(&self) -> &'static str { - match self { - Self::EmbedEnglishV3 => "embed-english-v3.0", - Self::EmbedMultilingualV3 => "embed-multilingual-v3.0", - Self::EmbedEnglishLightV3 => "embed-english-light-v3.0", - Self::EmbedMultilingualLightV3 => "embed-multilingual-light-v3.0", - } - } - - /// Returns the default dimensions for this model. - pub fn default_dimensions(&self) -> usize { - match self { - Self::EmbedEnglishV3 | Self::EmbedMultilingualV3 => 1024, - Self::EmbedEnglishLightV3 | Self::EmbedMultilingualLightV3 => 384, - } - } -} - -/// Cohere input types. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum CohereInputType { - /// For search queries. - SearchQuery, - /// For documents to be searched. - #[default] - SearchDocument, - /// For classification tasks. - Classification, - /// For clustering tasks. - Clustering, -} - -/// Google Gemini embedding configuration. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct GeminiEmbeddingConfig { - /// Model to use. - #[serde(default)] - pub model: GeminiEmbeddingModel, -} - -impl GeminiEmbeddingConfig { - /// Creates a new Gemini embedding config with the given model. - pub fn new(model: GeminiEmbeddingModel) -> Self { - Self { model } - } -} - -/// Gemini embedding models. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -pub enum GeminiEmbeddingModel { - /// text-embedding-004 (768 dimensions). - #[default] - TextEmbedding004, -} - -impl GeminiEmbeddingModel { - /// Returns the model identifier string. - pub fn as_str(&self) -> &'static str { - match self { - Self::TextEmbedding004 => "text-embedding-004", - } - } - - /// Returns the default dimensions for this model. - pub fn default_dimensions(&self) -> usize { - 768 - } -} - -fn default_ollama_model() -> String { - "nomic-embed-text".to_string() -} diff --git a/crates/nvisy-runtime/src/graph/transformer/extraction.rs b/crates/nvisy-runtime/src/graph/transformer/extraction.rs deleted file mode 100644 index 180682f..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/extraction.rs +++ /dev/null @@ -1,136 +0,0 @@ -//! Content extraction transformer configurations. - -use serde::{Deserialize, Serialize}; - -/// Configuration for text extraction. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct ExtractTextConfig { - /// Enable OCR for images. - #[serde(default)] - pub ocr_enabled: bool, - /// OCR language codes. - #[serde(skip_serializing_if = "Option::is_none")] - pub ocr_languages: Option>, -} - -/// Configuration for metadata extraction. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct MetadataExtractionConfig { - /// Specific fields to extract (empty for all available). - #[serde(default)] - pub fields: Vec, -} - -/// Configuration for named entity recognition. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct NamedEntityRecognitionConfig { - /// Entity types to extract (empty for all). - #[serde(default)] - pub entity_types: Vec, - /// Model to use for NER. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -/// Types of named entities. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum EntityType { - Person, - Organization, - Location, - Date, - Time, - Money, - Percent, - Product, - Event, - WorkOfArt, - Law, - Language, -} - -/// Configuration for entity relation extraction. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct EntityRelationExtractionConfig { - /// Relation types to extract (empty for all). - #[serde(default)] - pub relation_types: Vec, - /// Model to use for extraction. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, - /// Include confidence scores. - #[serde(default)] - pub include_confidence: bool, -} - -/// Configuration for image description. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct ImageDescriptionConfig { - /// Detail level of description. - #[serde(default)] - pub detail_level: DetailLevel, - /// Model to use for description. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -/// Detail level for descriptions. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum DetailLevel { - /// Brief, concise description. - Brief, - /// Standard level of detail. - #[default] - Standard, - /// Comprehensive, detailed description. - Detailed, -} - -/// Configuration for table description. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct TableDescriptionConfig { - /// Include column statistics. - #[serde(default)] - pub include_statistics: bool, - /// Model to use for description. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -/// Configuration for table to HTML conversion. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct TableToHtmlConfig { - /// Include CSS styling. - #[serde(default)] - pub include_styles: bool, - /// Preserve cell formatting. - #[serde(default = "default_true")] - pub preserve_formatting: bool, -} - -/// Configuration for citation parsing. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct CitationParsingConfig { - /// Output format for normalized citations. - #[serde(default)] - pub output_format: CitationFormat, -} - -/// Citation output formats. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum CitationFormat { - /// BibTeX format. - #[default] - Bibtex, - /// CSL-JSON format. - CslJson, - /// RIS format. - Ris, -} - -fn default_true() -> bool { - true -} diff --git a/crates/nvisy-runtime/src/graph/transformer/mod.rs b/crates/nvisy-runtime/src/graph/transformer/mod.rs deleted file mode 100644 index 9c1d2d3..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/mod.rs +++ /dev/null @@ -1,88 +0,0 @@ -//! Transformer node types for processing and transforming data. - -mod chunking; -mod config; -mod document; -mod embedding; -mod extraction; -mod processing; -mod quality; -mod routing; - -pub use chunking::{ - CharacterChunkingConfig, ChunkingStrategy, ContextualChunkingConfig, PageChunkingConfig, - ParagraphChunkingConfig, RecursiveChunkingConfig, SemanticChunkingConfig, - SentenceChunkingConfig, TitleChunkingConfig, -}; -pub use config::TransformerConfig; -pub use document::{ - LanguageDetectionConfig, SentimentAnalysisConfig, SentimentGranularity, SummarizationConfig, - SummaryLength, SummaryStyle, TopicClassificationConfig, TranslationConfig, -}; -pub use embedding::{ - CohereEmbeddingConfig, CohereEmbeddingModel, CohereInputType, EmbeddingProvider, - GeminiEmbeddingConfig, GeminiEmbeddingModel, GenerateEmbeddingsConfig, OllamaEmbeddingConfig, - OpenAiEmbeddingConfig, OpenAiEmbeddingModel, -}; -pub use extraction::{ - CitationFormat, CitationParsingConfig, DetailLevel, EntityRelationExtractionConfig, EntityType, - ExtractTextConfig, ImageDescriptionConfig, MetadataExtractionConfig, - NamedEntityRecognitionConfig, TableDescriptionConfig, TableToHtmlConfig, -}; -pub use processing::{ - ChunkContentConfig, ChunkContentConfigBuilder, ConvertFormatConfig, FilterConfig, - LlmTransformConfig, LlmTransformConfigBuilder, MergeConfig, MergeStrategy, ValidateConfig, -}; -pub use quality::{ - DataNormalizationConfig, DateTimeNormalization, DeduplicationConfig, DeduplicationStrategy, - NormalizationType, TextCleaningConfig, TextCleaningOperation, UnitMapping, UnitNormalization, - UnitSystem, -}; -pub use routing::{ - ContentTypePort, ContentTypeRouterConfig, DateField, DurationRouterConfig, - FileDateRouterConfig, FileNamePattern, FileNameRouterConfig, FileSizeRouterConfig, - LanguageRouterConfig, MimeMapping, PageCountRouterConfig, -}; -use serde::{Deserialize, Serialize}; - -/// A data transformer node that processes or transforms data. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct TransformerNode { - /// Display name of the transformer. - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - /// Description of what this transformer does. - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, - /// Transformer configuration. - pub config: TransformerConfig, -} - -impl TransformerNode { - /// Creates a new transformer node. - pub fn new(config: TransformerConfig) -> Self { - Self { - name: None, - description: None, - config, - } - } - - /// Sets the display name. - pub fn with_name(mut self, name: impl Into) -> Self { - self.name = Some(name.into()); - self - } - - /// Sets the description. - pub fn with_description(mut self, description: impl Into) -> Self { - self.description = Some(description.into()); - self - } -} - -impl From for TransformerNode { - fn from(config: TransformerConfig) -> Self { - Self::new(config) - } -} diff --git a/crates/nvisy-runtime/src/graph/transformer/processing.rs b/crates/nvisy-runtime/src/graph/transformer/processing.rs deleted file mode 100644 index 4ad0190..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/processing.rs +++ /dev/null @@ -1,131 +0,0 @@ -//! Data processing transformer configurations. - -use derive_builder::Builder; -use serde::{Deserialize, Serialize}; - -/// Configuration for content chunking (simple character-based). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Builder)] -#[builder( - name = "ChunkContentConfigBuilder", - pattern = "owned", - setter(into, strip_option, prefix = "with"), - build_fn(validate = "Self::validate") -)] -pub struct ChunkContentConfig { - /// Maximum chunk size in characters. - pub max_chunk_size: usize, - /// Overlap between chunks in characters. - #[serde(default)] - #[builder(default)] - pub overlap: usize, -} - -impl ChunkContentConfigBuilder { - fn validate(&self) -> Result<(), String> { - if self.max_chunk_size.is_some_and(|s| s == 0) { - return Err("max_chunk_size must be greater than 0".into()); - } - if let (Some(max), Some(overlap)) = (&self.max_chunk_size, &self.overlap) - && overlap >= max - { - return Err("overlap must be less than max_chunk_size".into()); - } - Ok(()) - } -} - -/// Configuration for LLM transformation. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Builder)] -#[builder( - name = "LlmTransformConfigBuilder", - pattern = "owned", - setter(into, strip_option, prefix = "with"), - build_fn(validate = "Self::validate") -)] -pub struct LlmTransformConfig { - /// Model identifier. - pub model: String, - /// System prompt. - #[serde(skip_serializing_if = "Option::is_none")] - #[builder(default)] - pub system_prompt: Option, - /// User prompt template. - pub prompt_template: String, - /// Temperature for generation. - #[serde(skip_serializing_if = "Option::is_none")] - #[builder(default)] - pub temperature: Option, - /// Maximum tokens to generate. - #[serde(skip_serializing_if = "Option::is_none")] - #[builder(default)] - pub max_tokens: Option, -} - -impl LlmTransformConfigBuilder { - fn validate(&self) -> Result<(), String> { - if self.model.as_ref().is_some_and(|m| m.is_empty()) { - return Err("model cannot be empty".into()); - } - if self.prompt_template.as_ref().is_some_and(|p| p.is_empty()) { - return Err("prompt_template cannot be empty".into()); - } - if let Some(Some(temp)) = &self.temperature - && (*temp < 0.0 || *temp > 2.0) - { - return Err("temperature must be between 0.0 and 2.0".into()); - } - Ok(()) - } -} - -/// Configuration for format conversion. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct ConvertFormatConfig { - /// Target format. - pub target_format: String, - /// Format-specific options. - #[serde(default)] - pub options: serde_json::Value, -} - -/// Configuration for validation. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct ValidateConfig { - /// JSON schema for validation. - pub schema: serde_json::Value, - /// Whether to fail on validation error. - #[serde(default = "default_true")] - pub fail_on_error: bool, -} - -/// Configuration for filtering. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct FilterConfig { - /// Filter expression. - pub expression: String, -} - -/// Configuration for merging. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct MergeConfig { - /// Merge strategy. - #[serde(default)] - pub strategy: MergeStrategy, -} - -/// Merge strategy. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum MergeStrategy { - /// Concatenate all inputs. - #[default] - Concatenate, - /// Interleave inputs. - Interleave, - /// Take first non-empty input. - First, -} - -fn default_true() -> bool { - true -} diff --git a/crates/nvisy-runtime/src/graph/transformer/quality.rs b/crates/nvisy-runtime/src/graph/transformer/quality.rs deleted file mode 100644 index 7aeef7c..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/quality.rs +++ /dev/null @@ -1,147 +0,0 @@ -//! Data quality and normalization transformer configurations. - -use serde::{Deserialize, Serialize}; - -/// Configuration for data normalization (dates, times, units). -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct DataNormalizationConfig { - /// Types of normalization to apply. - #[serde(default)] - pub normalizations: Vec, -} - -/// Types of data normalization. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum NormalizationType { - /// Normalize date and time formats. - DateTime(DateTimeNormalization), - /// Convert measurement units. - Unit(UnitNormalization), -} - -/// Date and time normalization settings. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct DateTimeNormalization { - /// Target format (ISO 8601 by default). - #[serde(default = "default_datetime_format")] - pub target_format: String, - /// Target timezone (UTC by default). - #[serde(skip_serializing_if = "Option::is_none")] - pub target_timezone: Option, -} - -impl Default for DateTimeNormalization { - fn default() -> Self { - Self { - target_format: default_datetime_format(), - target_timezone: None, - } - } -} - -/// Unit normalization settings. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct UnitNormalization { - /// Target unit system. - #[serde(default)] - pub target_system: UnitSystem, - /// Specific unit mappings (e.g., "miles" -> "kilometers"). - #[serde(default)] - pub conversions: Vec, -} - -/// Unit systems for conversion. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum UnitSystem { - /// International System of Units. - #[default] - Si, - /// Imperial/US customary units. - Imperial, -} - -/// Mapping for unit conversion. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct UnitMapping { - /// Source unit. - pub from: String, - /// Target unit. - pub to: String, -} - -/// Configuration for deduplication. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct DeduplicationConfig { - /// Similarity threshold for considering duplicates (0.0 to 1.0). - #[serde(default = "default_similarity_threshold")] - pub similarity_threshold: f32, - /// Deduplication strategy. - #[serde(default)] - pub strategy: DeduplicationStrategy, -} - -impl Default for DeduplicationConfig { - fn default() -> Self { - Self { - similarity_threshold: default_similarity_threshold(), - strategy: DeduplicationStrategy::default(), - } - } -} - -/// Deduplication strategies. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum DeduplicationStrategy { - /// Keep first occurrence. - #[default] - KeepFirst, - /// Keep last occurrence. - KeepLast, - /// Keep longest version. - KeepLongest, - /// Merge duplicates. - Merge, -} - -/// Configuration for text cleaning and correction. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct TextCleaningConfig { - /// Language code for language-specific rules. - #[serde(skip_serializing_if = "Option::is_none")] - pub language: Option, - /// Cleaning operations to apply. - #[serde(default)] - pub operations: Vec, - /// Model to use for LLM-based cleaning. - #[serde(skip_serializing_if = "Option::is_none")] - pub model: Option, -} - -/// Text cleaning operations. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum TextCleaningOperation { - /// Fix spelling errors. - FixSpelling, - /// Fix grammar errors. - FixGrammar, - /// Normalize whitespace (remove extra spaces, normalize line breaks). - NormalizeWhitespace, - /// Normalize unicode (NFC normalization). - NormalizeUnicode, - /// Remove HTML tags. - StripHtml, - /// Fix common OCR errors. - FixOcrErrors, -} - -fn default_datetime_format() -> String { - "%Y-%m-%dT%H:%M:%S%.3fZ".to_string() -} - -fn default_similarity_threshold() -> f32 { - 0.9 -} diff --git a/crates/nvisy-runtime/src/graph/transformer/routing.rs b/crates/nvisy-runtime/src/graph/transformer/routing.rs deleted file mode 100644 index 9bd95a9..0000000 --- a/crates/nvisy-runtime/src/graph/transformer/routing.rs +++ /dev/null @@ -1,134 +0,0 @@ -//! Routing transformer configurations. - -use serde::{Deserialize, Serialize}; - -/// Configuration for content type routing. -/// -/// Routes content based on detected mime type (magic bytes + extension fallback). -/// Output ports: `text`, `image`, `audio`, `video`, `document`, `default`. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct ContentTypeRouterConfig { - /// Custom mime type to port mappings (overrides defaults). - #[serde(default)] - pub mappings: Vec, -} - -/// Custom mime type to port mapping. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MimeMapping { - /// Mime type pattern (e.g., "application/pdf", "image/*"). - pub mime: String, - /// Target port. - pub port: ContentTypePort, -} - -/// Output ports for content type routing. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum ContentTypePort { - Text, - Image, - Audio, - Video, - Document, - Default, -} - -/// Configuration for file size routing. -/// -/// Routes based on file size threshold. -/// Output ports: `true` (above threshold), `false` (below threshold), `default`. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct FileSizeRouterConfig { - /// Size threshold in bytes. - pub threshold_bytes: u64, -} - -/// Configuration for page count routing. -/// -/// Routes documents based on page count threshold. -/// Output ports: `true` (above threshold), `false` (below threshold), `default`. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PageCountRouterConfig { - /// Page count threshold. - pub threshold_pages: u32, -} - -/// Configuration for duration routing. -/// -/// Routes audio/video based on duration threshold. -/// Output ports: `true` (above threshold), `false` (below threshold), `default`. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct DurationRouterConfig { - /// Duration threshold in seconds. - pub threshold_seconds: u64, -} - -/// Configuration for language routing. -/// -/// Routes based on detected content language. -/// Output ports: configured language codes + `multiple` + `default`. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct LanguageRouterConfig { - /// Language codes to route (e.g., "en", "es", "fr"). - #[serde(default)] - pub languages: Vec, - /// Minimum confidence threshold (0.0 to 1.0) to consider a language detected. - #[serde(default = "default_confidence")] - pub min_confidence: f32, - /// Minimum percentage of content (0.0 to 1.0) for a language to be considered present. - #[serde(default = "default_min_percentage")] - pub min_percentage: f32, -} - -/// Configuration for file date routing. -/// -/// Routes based on file date threshold. -/// Output ports: `true` (newer than threshold), `false` (older than threshold), `default`. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct FileDateRouterConfig { - /// Which date field to use. - #[serde(default)] - pub date_field: DateField, - /// Threshold as ISO 8601 datetime or relative duration (e.g., "7d", "30d", "1y"). - pub threshold: String, -} - -/// Date field to use for routing. -#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum DateField { - /// File creation date. - #[default] - Created, - /// File modification date. - Modified, -} - -/// Configuration for filename routing. -/// -/// Routes based on regex pattern matching on filename. -/// Output ports: user-defined ports from pattern mappings + `default`. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct FileNameRouterConfig { - /// Regex pattern to port mappings (evaluated in order, first match wins). - #[serde(default)] - pub patterns: Vec, -} - -/// Filename pattern to port mapping. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct FileNamePattern { - /// Regex pattern to match against filename. - pub regex: String, - /// Target port name. - pub port: String, -} - -fn default_confidence() -> f32 { - 0.8 -} - -fn default_min_percentage() -> f32 { - 0.1 -} diff --git a/crates/nvisy-runtime/src/graph/workflow.rs b/crates/nvisy-runtime/src/graph/workflow.rs index 4d6ea2a..ca4af4f 100644 --- a/crates/nvisy-runtime/src/graph/workflow.rs +++ b/crates/nvisy-runtime/src/graph/workflow.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; +use jiff::Timestamp; use petgraph::Direction; use petgraph::algo::{is_cyclic_directed, toposort}; use petgraph::graph::{DiGraph, NodeIndex}; @@ -9,6 +10,7 @@ use petgraph::visit::EdgeRef; use semver::Version; use serde::{Deserialize, Serialize}; +use super::edge::EdgeData; use super::{Edge, NodeData, NodeId}; use crate::error::{WorkflowError, WorkflowResult}; @@ -27,15 +29,12 @@ pub struct WorkflowMetadata { /// Tags for organization. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub tags: Vec, - /// Author identifier. + /// Creation timestamp. #[serde(skip_serializing_if = "Option::is_none")] - pub author: Option, - /// Creation timestamp (ISO 8601). + pub created_at: Option, + /// Last update timestamp. #[serde(skip_serializing_if = "Option::is_none")] - pub created_at: Option, - /// Last update timestamp (ISO 8601). - #[serde(skip_serializing_if = "Option::is_none")] - pub updated_at: Option, + pub updated_at: Option, } impl WorkflowMetadata { @@ -62,12 +61,6 @@ impl WorkflowMetadata { self } - /// Sets the author. - pub fn with_author(mut self, author: impl Into) -> Self { - self.author = Some(author.into()); - self - } - /// Adds tags. pub fn with_tags(mut self, tags: impl IntoIterator>) -> Self { self.tags = tags.into_iter().map(Into::into).collect(); @@ -90,17 +83,6 @@ pub struct WorkflowGraph { pub metadata: WorkflowMetadata, } -/// Edge data stored in the graph. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] -pub struct EdgeData { - /// Optional port/slot name on the source node. - #[serde(skip_serializing_if = "Option::is_none")] - pub from_port: Option, - /// Optional port/slot name on the target node. - #[serde(skip_serializing_if = "Option::is_none")] - pub to_port: Option, -} - impl WorkflowGraph { /// Creates a new empty workflow graph. pub fn new() -> Self { diff --git a/crates/nvisy-runtime/src/provider/runtime/config.rs b/crates/nvisy-runtime/src/provider/runtime/config.rs index b21312b..0b215a6 100644 --- a/crates/nvisy-runtime/src/provider/runtime/config.rs +++ b/crates/nvisy-runtime/src/provider/runtime/config.rs @@ -2,8 +2,8 @@ use serde::{Deserialize, Serialize}; -/// Default maximum file size: 100 MB. -const DEFAULT_MAX_FILE_SIZE: u64 = 100 * 1024 * 1024; +/// Default maximum file size: 12 MB. +const DEFAULT_MAX_FILE_SIZE: u64 = 12 * 1024 * 1024; /// Configuration for the runtime service with sensible defaults. #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index 7a3bb4b..b6c0546 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -34,6 +34,7 @@ config = [ # Internal crates nvisy-nats = { workspace = true, features = ["schema"] } nvisy-postgres = { workspace = true, features = ["schema"] } +nvisy-runtime = { workspace = true, features = [] } nvisy-webhook = { workspace = true, features = ["schema"] } # Async runtime diff --git a/crates/nvisy-server/src/handler/pipelines.rs b/crates/nvisy-server/src/handler/pipelines.rs index 578d3c7..028c5ba 100644 --- a/crates/nvisy-server/src/handler/pipelines.rs +++ b/crates/nvisy-server/src/handler/pipelines.rs @@ -213,13 +213,6 @@ async fn update_pipeline( ) .await?; - // Check if pipeline is editable - if !existing.is_editable() { - return Err(ErrorKind::BadRequest - .with_message("Pipeline cannot be edited in its current state") - .with_resource("pipeline")); - } - let update_data = request.into_model(); let pipeline = conn .update_pipeline(path_params.pipeline_id, update_data) @@ -234,7 +227,7 @@ async fn update_pipeline( fn update_pipeline_docs(op: TransformOperation) -> TransformOperation { op.summary("Update pipeline") - .description("Updates an existing pipeline. Only provided fields are updated. Pipeline must be in an editable state.") + .description("Updates an existing pipeline. Only provided fields are updated.") .response::<200, Json>() .response::<400, Json>() .response::<401, Json>() diff --git a/crates/nvisy-server/src/handler/request/pipelines.rs b/crates/nvisy-server/src/handler/request/pipelines.rs index acb5a19..f3966d3 100644 --- a/crates/nvisy-server/src/handler/request/pipelines.rs +++ b/crates/nvisy-server/src/handler/request/pipelines.rs @@ -6,6 +6,7 @@ use nvisy_postgres::model::{NewPipeline, UpdatePipeline as UpdatePipelineModel}; use nvisy_postgres::types::PipelineStatus; +use nvisy_runtime::graph::WorkflowGraph; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -13,8 +14,8 @@ use validator::Validate; /// Request payload for creating a new pipeline. /// -/// Creates a new pipeline with the specified configuration. The creator is -/// automatically set as the owner of the pipeline. +/// Creates a new pipeline with the specified name and optional description. +/// The definition can be added later via update. #[must_use] #[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] #[serde(rename_all = "camelCase")] @@ -25,10 +26,6 @@ pub struct CreatePipeline { /// Optional description of the pipeline (max 500 characters). #[validate(length(max = 500))] pub description: Option, - /// Pipeline definition containing steps and configuration. - pub definition: Option, - /// Extended metadata for the pipeline. - pub metadata: Option, } impl CreatePipeline { @@ -45,8 +42,6 @@ impl CreatePipeline { account_id, name: self.name, description: self.description, - definition: self.definition, - metadata: self.metadata, ..Default::default() } } @@ -55,6 +50,7 @@ impl CreatePipeline { /// Request payload to update an existing pipeline. /// /// All fields are optional; only provided fields will be updated. +/// The definition field accepts a strictly typed WorkflowGraph. #[must_use] #[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] #[serde(rename_all = "camelCase")] @@ -67,10 +63,9 @@ pub struct UpdatePipeline { pub description: Option, /// New status for the pipeline. pub status: Option, - /// New definition for the pipeline. - pub definition: Option, - /// New metadata for the pipeline. - pub metadata: Option, + /// New definition for the pipeline (strictly typed workflow graph). + #[schemars(with = "Option")] + pub definition: Option, } impl UpdatePipeline { @@ -80,8 +75,9 @@ impl UpdatePipeline { name: self.name, description: self.description.map(Some), status: self.status, - definition: self.definition, - metadata: self.metadata, + definition: self.definition.map(|d| { + serde_json::to_value(d).expect("WorkflowGraph serialization should not fail") + }), ..Default::default() } } diff --git a/crates/nvisy-server/src/handler/response/pipelines.rs b/crates/nvisy-server/src/handler/response/pipelines.rs index 7a1fa6a..316a1e4 100644 --- a/crates/nvisy-server/src/handler/response/pipelines.rs +++ b/crates/nvisy-server/src/handler/response/pipelines.rs @@ -3,6 +3,7 @@ use jiff::Timestamp; use nvisy_postgres::model; use nvisy_postgres::types::PipelineStatus; +use nvisy_runtime::graph::WorkflowGraph; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -26,16 +27,9 @@ pub struct Pipeline { pub description: Option, /// Pipeline lifecycle status. pub status: PipelineStatus, - /// Pipeline definition (steps, configuration). - pub definition: serde_json::Value, - /// Extended metadata. - pub metadata: serde_json::Value, - /// Number of steps in the pipeline. - pub step_count: usize, - /// Whether the pipeline can be executed. - pub is_runnable: bool, - /// Whether the pipeline can be edited. - pub is_editable: bool, + /// Pipeline definition (workflow graph). + #[schemars(with = "serde_json::Value")] + pub definition: WorkflowGraph, /// Timestamp when the pipeline was created. pub created_at: Timestamp, /// Timestamp when the pipeline was last updated. @@ -45,18 +39,16 @@ pub struct Pipeline { impl Pipeline { /// Creates a new instance of [`Pipeline`] from the database model. pub fn from_model(pipeline: model::Pipeline) -> Self { + let definition: WorkflowGraph = + serde_json::from_value(pipeline.definition).unwrap_or_default(); Self { pipeline_id: pipeline.id, workspace_id: pipeline.workspace_id, account_id: pipeline.account_id, - name: pipeline.name.clone(), - description: pipeline.description.clone(), + name: pipeline.name, + description: pipeline.description, status: pipeline.status, - step_count: pipeline.step_count(), - is_runnable: pipeline.is_runnable(), - is_editable: pipeline.is_editable(), - definition: pipeline.definition.clone(), - metadata: pipeline.metadata.clone(), + definition, created_at: pipeline.created_at.into(), updated_at: pipeline.updated_at.into(), } @@ -79,10 +71,6 @@ pub struct PipelineSummary { pub description: Option, /// Pipeline lifecycle status. pub status: PipelineStatus, - /// Number of steps in the pipeline. - pub step_count: usize, - /// Whether the pipeline can be executed. - pub is_runnable: bool, /// Timestamp when the pipeline was created. pub created_at: Timestamp, /// Timestamp when the pipeline was last updated. @@ -92,15 +80,11 @@ pub struct PipelineSummary { impl PipelineSummary { /// Creates a new instance of [`PipelineSummary`] from the database model. pub fn from_model(pipeline: model::Pipeline) -> Self { - let step_count = pipeline.step_count(); - let is_runnable = pipeline.is_runnable(); Self { pipeline_id: pipeline.id, name: pipeline.name, description: pipeline.description, status: pipeline.status, - step_count, - is_runnable, created_at: pipeline.created_at.into(), updated_at: pipeline.updated_at.into(), } diff --git a/crates/nvisy-server/src/middleware/constants.rs b/crates/nvisy-server/src/middleware/constants.rs index f8c22a0..a386b93 100644 --- a/crates/nvisy-server/src/middleware/constants.rs +++ b/crates/nvisy-server/src/middleware/constants.rs @@ -6,8 +6,8 @@ /// and prevent denial-of-service attacks via large payloads. pub const DEFAULT_MAX_BODY_SIZE: usize = 4 * 1024 * 1024; -/// Maximum file size for uploads: 100MB. +/// Maximum file size for uploads: 12MB. /// /// Used in file upload handlers to enforce file size limits /// before accepting file data into memory. -pub const DEFAULT_MAX_FILE_BODY_SIZE: usize = 100 * 1024 * 1024; +pub const DEFAULT_MAX_FILE_BODY_SIZE: usize = 12 * 1024 * 1024; diff --git a/migrations/2026-01-19-045012_pipelines/up.sql b/migrations/2026-01-19-045012_pipelines/up.sql index b0329f8..edb8fb8 100644 --- a/migrations/2026-01-19-045012_pipelines/up.sql +++ b/migrations/2026-01-19-045012_pipelines/up.sql @@ -4,7 +4,7 @@ -- Pipeline status enum CREATE TYPE PIPELINE_STATUS AS ENUM ( 'draft', -- Pipeline is being configured - 'active', -- Pipeline is ready to run + 'enabled', -- Pipeline is ready to run 'disabled' -- Pipeline is disabled ); From 0d3506374c59e060f2a330e393bf9b1bfde8c074 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Thu, 22 Jan 2026 00:13:06 +0100 Subject: [PATCH 14/28] feat(rig): add agent module, refactor providers with Arc for cheap cloning - Add agent module with 5 specialized agents: - VisionAgent: VLM tasks (image description, OCR, object detection) - TableAgent: table processing and format conversion - TextAnalysisAgent: NER, keywords, classification, sentiment - TextGenerationAgent: summarization, titles, contextual chunking - StructuredOutputAgent: JSON conversion with schema validation - Add Agents struct for convenient access to all agents - Refactor CompletionProvider and EmbeddingProvider to use Arc for cheap cloning - Rename inner enums to CompletionService and EmbeddingService - Split completion provider.rs into provider.rs, response.rs, rig_impl.rs - Split embedding provider.rs into provider.rs, rig_impl.rs - Add strum derives (AsRefStr, Display, EnumString) to model enums - Add PromptError variant to Error enum - Refactor transform configs: - ChunkConfig: add contextual_chunking, rename overlap - PartitionConfig: add PartitionStrategy enum (Auto, Fast, Slow, Vlm) - EmbeddingConfig: add normalize field - Add EnrichConfig, ExtractConfig, DeriveConfig for LLM tasks --- Cargo.lock | 1 + crates/nvisy-rig/Cargo.toml | 1 + crates/nvisy-rig/src/agent/mod.rs | 59 +++ .../nvisy-rig/src/agent/structured_output.rs | 71 ++++ crates/nvisy-rig/src/agent/table.rs | 108 +++++ crates/nvisy-rig/src/agent/text_analysis.rs | 107 +++++ crates/nvisy-rig/src/agent/text_generation.rs | 84 ++++ crates/nvisy-rig/src/agent/vision.rs | 87 ++++ crates/nvisy-rig/src/chat/agent/context.rs | 11 - crates/nvisy-rig/src/error.rs | 9 + crates/nvisy-rig/src/lib.rs | 1 + .../nvisy-rig/src/provider/completion/mod.rs | 7 + .../src/provider/completion/model.rs | 97 ++--- .../src/provider/completion/provider.rs | 269 +++++++++++++ .../src/provider/completion/response.rs | 38 ++ .../src/provider/completion/rig_impl.rs | 153 +++++++ .../nvisy-rig/src/provider/embedding/mod.rs | 3 +- .../nvisy-rig/src/provider/embedding/model.rs | 53 +-- .../src/provider/embedding/provider.rs | 140 +++---- .../src/provider/embedding/rig_impl.rs | 48 +++ crates/nvisy-rig/src/provider/mod.rs | 4 +- .../src/provider/splitting/metadata.rs | 6 +- .../nvisy-rig/src/provider/splitting/mod.rs | 2 +- .../src/provider/splitting/splitter.rs | 73 ++-- crates/nvisy-rig/src/rag/config.rs | 10 +- crates/nvisy-rig/src/rag/indexer/mod.rs | 21 +- crates/nvisy-rig/src/rag/mod.rs | 8 +- crates/nvisy-rig/src/rag/vector_store.rs | 378 ++++++++++++++++++ .../src/graph/transform/chunk.rs | 45 +-- .../src/graph/transform/derive.rs | 29 ++ .../src/graph/transform/embedding.rs | 4 + .../src/graph/transform/enrich.rs | 52 ++- .../src/graph/transform/extract.rs | 90 +++++ .../nvisy-runtime/src/graph/transform/mod.rs | 21 +- .../src/graph/transform/partition.rs | 31 +- crates/nvisy-runtime/src/provider/mod.rs | 5 +- 36 files changed, 1854 insertions(+), 272 deletions(-) create mode 100644 crates/nvisy-rig/src/agent/mod.rs create mode 100644 crates/nvisy-rig/src/agent/structured_output.rs create mode 100644 crates/nvisy-rig/src/agent/table.rs create mode 100644 crates/nvisy-rig/src/agent/text_analysis.rs create mode 100644 crates/nvisy-rig/src/agent/text_generation.rs create mode 100644 crates/nvisy-rig/src/agent/vision.rs create mode 100644 crates/nvisy-rig/src/provider/completion/provider.rs create mode 100644 crates/nvisy-rig/src/provider/completion/response.rs create mode 100644 crates/nvisy-rig/src/provider/completion/rig_impl.rs create mode 100644 crates/nvisy-rig/src/provider/embedding/rig_impl.rs create mode 100644 crates/nvisy-rig/src/rag/vector_store.rs create mode 100644 crates/nvisy-runtime/src/graph/transform/derive.rs create mode 100644 crates/nvisy-runtime/src/graph/transform/extract.rs diff --git a/Cargo.lock b/Cargo.lock index 6a50ed9..d157c38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3335,6 +3335,7 @@ dependencies = [ "serde", "serde_json", "sha2", + "strum 0.27.2", "text-splitter", "thiserror 2.0.18", "tokio", diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml index e532293..de71b8f 100644 --- a/crates/nvisy-rig/Cargo.toml +++ b/crates/nvisy-rig/Cargo.toml @@ -48,6 +48,7 @@ thiserror = { workspace = true } # Derive macros derive_builder = { workspace = true } derive_more = { workspace = true } +strum = { workspace = true } # Observability tracing = { workspace = true } diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs new file mode 100644 index 0000000..eecfc59 --- /dev/null +++ b/crates/nvisy-rig/src/agent/mod.rs @@ -0,0 +1,59 @@ +//! Agent module for LLM-powered document processing tasks. +//! +//! This module provides specialized agents for different types of tasks: +//! +//! - [`VisionAgent`] - VLM tasks (image description, OCR, object detection) +//! - [`TableAgent`] - Table processing (descriptions, format conversion) +//! - [`TextAnalysisAgent`] - Text analysis (NER, keywords, classification, sentiment) +//! - [`TextGenerationAgent`] - Text generation (summarization, titles) +//! - [`StructuredOutputAgent`] - JSON conversion (structured extraction) +//! +//! Use [`Agents`] to create all agents from a single provider. + +mod structured_output; +mod table; +mod text_analysis; +mod text_generation; +mod vision; + +pub use structured_output::StructuredOutputAgent; +pub use table::TableAgent; +pub use text_analysis::TextAnalysisAgent; +pub use text_generation::TextGenerationAgent; +pub use vision::VisionAgent; + +use crate::provider::CompletionProvider; + +/// Collection of all specialized agents. +/// +/// Provides convenient access to all agents created from a single completion provider. +/// +/// # Example +/// +/// ```ignore +/// let provider = CompletionProvider::new(...); +/// let agents = Agents::new(provider); +/// +/// let summary = agents.text_generation().summarize("...").await?; +/// let entities = agents.text_analysis().extract_entities("...").await?; +/// ``` +pub struct Agents { + pub structured_output_agent: StructuredOutputAgent, + pub table_agent: TableAgent, + pub text_analysis_agent: TextAnalysisAgent, + pub text_generation_agent: TextGenerationAgent, + pub vision_agent: VisionAgent, +} + +impl Agents { + /// Creates all agents from a completion provider. + pub fn new(provider: CompletionProvider) -> Self { + Self { + structured_output_agent: StructuredOutputAgent::new(provider.clone()), + table_agent: TableAgent::new(provider.clone()), + text_analysis_agent: TextAnalysisAgent::new(provider.clone()), + text_generation_agent: TextGenerationAgent::new(provider.clone()), + vision_agent: VisionAgent::new(provider), + } + } +} diff --git a/crates/nvisy-rig/src/agent/structured_output.rs b/crates/nvisy-rig/src/agent/structured_output.rs new file mode 100644 index 0000000..97766e8 --- /dev/null +++ b/crates/nvisy-rig/src/agent/structured_output.rs @@ -0,0 +1,71 @@ +//! Structured output agent for JSON conversion tasks. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; + +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "StructuredOutputAgent"; +const DESCRIPTION: &str = + "Agent for converting unstructured text to structured JSON with optional schema validation"; + +const PREAMBLE: &str = "\ +You are a data extraction assistant specialized in converting unstructured text to structured JSON. +Your task is to identify and extract relevant information and format it as valid JSON. +When a schema is provided, strictly adhere to it. Use null for fields that cannot be determined. +Always output valid JSON, no explanations or markdown formatting."; + +const PROMPT_TO_JSON: &str = "\ +Convert the following text to a well-structured JSON object. +Identify the key information and organize it logically. +Only output valid JSON, no explanation."; + +const PROMPT_TO_STRUCTURED_JSON: &str = "\ +Extract information from the following text and format it as JSON matching this schema: + +Schema: +{} + +Only output valid JSON that conforms to the schema, no explanation. +If a field cannot be determined from the text, use null."; + +/// Agent for structured output tasks. +/// +/// Handles tasks that convert text to structured JSON: +/// - Free-form JSON conversion +/// - Schema-based structured extraction +pub struct StructuredOutputAgent { + agent: Agent, +} + +impl StructuredOutputAgent { + /// Creates a new structured output agent with the given completion provider. + pub fn new(provider: CompletionProvider) -> Self { + let agent = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE) + .build(); + Self { agent } + } + + /// Converts text to JSON format. + /// + /// Attempts to extract structured information from free-form text + /// and represent it as JSON. + pub async fn to_json(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_TO_JSON, text); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Converts text to JSON matching a specific schema. + /// + /// Extracts information from text and structures it according to + /// the provided JSON schema. + pub async fn to_structured_json(&self, text: &str, schema: &str) -> Result { + let base_prompt = PROMPT_TO_STRUCTURED_JSON.replace("{}", schema); + let prompt = format!("{}\n\nText:\n{}", base_prompt, text); + Ok(self.agent.prompt(&prompt).await?) + } +} diff --git a/crates/nvisy-rig/src/agent/table.rs b/crates/nvisy-rig/src/agent/table.rs new file mode 100644 index 0000000..68e67be --- /dev/null +++ b/crates/nvisy-rig/src/agent/table.rs @@ -0,0 +1,108 @@ +//! Table agent for table processing tasks. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; + +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "TableAgent"; +const DESCRIPTION: &str = "Agent for table processing including description and format conversion (HTML, Markdown, CSV, JSON)"; + +const PREAMBLE: &str = "\ +You are a table processing assistant specialized in understanding and transforming tabular data. +Your task is to analyze tables and either describe their contents or convert them to different formats. +Preserve data accuracy and structure during conversions. +When outputting structured data, use valid JSON format."; + +const PROMPT_DESCRIBE: &str = "\ +Describe this table concisely. Include: +- What data the table contains +- Number of rows and columns +- Key insights or patterns"; + +const PROMPT_DESCRIBE_COLUMNS: &str = "\ +For each column in this table, provide: +- Column name +- Data type (text, number, date, etc.) +- Brief description of what the column contains + +Format as a JSON array with objects containing 'name', 'type', and 'description' fields."; + +const PROMPT_TO_HTML: &str = "\ +Convert this table to clean, semantic HTML. +Use , , , ,
, and tags appropriately. +Do not include any CSS or styling. Only output the HTML, no explanation."; + +const PROMPT_TO_MARKDOWN: &str = "\ +Convert this table to Markdown format. +Use proper Markdown table syntax with | separators and header dividers. +Only output the Markdown table, no explanation."; + +const PROMPT_TO_CSV: &str = "\ +Convert this table to CSV format. +Use commas as delimiters and quote fields containing commas or newlines. +Only output the CSV, no explanation."; + +const PROMPT_TO_JSON: &str = "\ +Convert this table to a JSON array of objects. +Each row should be an object with column names as keys. +Only output valid JSON, no explanation."; + +/// Agent for table processing tasks. +/// +/// Handles tasks that involve understanding and transforming tables: +/// - Table description +/// - Column descriptions +/// - Format conversion (HTML, Markdown, CSV, JSON) +pub struct TableAgent { + agent: Agent, +} + +impl TableAgent { + /// Creates a new table agent with the given completion provider. + pub fn new(provider: CompletionProvider) -> Self { + let agent = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE) + .build(); + Self { agent } + } + + /// Generates a description of a table. + pub async fn describe(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_DESCRIBE, table_content); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Generates descriptions for each column in a table. + pub async fn describe_columns(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_DESCRIBE_COLUMNS, table_content); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Converts a table to HTML format. + pub async fn to_html(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_HTML, table_content); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Converts a table to Markdown format. + pub async fn to_markdown(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_MARKDOWN, table_content); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Converts a table to CSV format. + pub async fn to_csv(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_CSV, table_content); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Converts a table to JSON format. + pub async fn to_json(&self, table_content: &str) -> Result { + let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_JSON, table_content); + Ok(self.agent.prompt(&prompt).await?) + } +} diff --git a/crates/nvisy-rig/src/agent/text_analysis.rs b/crates/nvisy-rig/src/agent/text_analysis.rs new file mode 100644 index 0000000..a3ab7eb --- /dev/null +++ b/crates/nvisy-rig/src/agent/text_analysis.rs @@ -0,0 +1,107 @@ +//! Text analysis agent for extracting structured information. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; + +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "TextAnalysisAgent"; +const DESCRIPTION: &str = "Agent for text analysis including entity extraction, keyword extraction, classification, and sentiment analysis"; + +const PREAMBLE: &str = "\ +You are a text analysis assistant specialized in extracting structured information from text. +Your task is to identify entities, relationships, sentiment, and other structured data from unstructured text. +Be precise and comprehensive in your extractions. +Always output valid JSON format matching the requested structure."; + +const PROMPT_EXTRACT_ENTITIES: &str = "\ +Extract all named entities from the following text. +Identify: people, organizations, locations, dates, monetary values, and other notable entities. + +Format as a JSON array with objects containing 'text', 'type', and 'start_index' fields."; + +const PROMPT_EXTRACT_KEYWORDS: &str = "\ +Extract the most important keywords and key phrases from the following text. +Return 5-15 keywords ordered by relevance. + +Format as a JSON array of strings."; + +const PROMPT_CLASSIFY: &str = "\ +Classify the following text into one or more of these categories: {} + +Format as a JSON object with 'labels' (array of matching categories) \ +and 'confidence' (object mapping each label to a confidence score 0-1)."; + +const PROMPT_ANALYZE_SENTIMENT: &str = "\ +Analyze the sentiment of the following text. + +Format as a JSON object with: +- 'sentiment': one of 'positive', 'negative', 'neutral', or 'mixed' +- 'confidence': confidence score 0-1 +- 'explanation': brief explanation of the sentiment"; + +const PROMPT_EXTRACT_RELATIONSHIPS: &str = "\ +Extract relationships between entities in the following text. +Identify how people, organizations, and other entities are connected. + +Format as a JSON array with objects containing: +- 'subject': the first entity +- 'predicate': the relationship type +- 'object': the second entity"; + +/// Agent for text analysis tasks. +/// +/// Handles tasks that extract structured information from text: +/// - Named entity recognition (NER) +/// - Keyword extraction +/// - Classification +/// - Sentiment analysis +/// - Relationship extraction +pub struct TextAnalysisAgent { + agent: Agent, +} + +impl TextAnalysisAgent { + /// Creates a new text analysis agent with the given completion provider. + pub fn new(provider: CompletionProvider) -> Self { + let agent = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE) + .build(); + Self { agent } + } + + /// Extracts named entities from text. + pub async fn extract_entities(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_ENTITIES, text); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Extracts keywords from text. + pub async fn extract_keywords(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_KEYWORDS, text); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Classifies text into provided categories. + pub async fn classify(&self, text: &str, labels: &[String]) -> Result { + let labels_str = labels.join(", "); + let base_prompt = PROMPT_CLASSIFY.replace("{}", &labels_str); + let prompt = format!("{}\n\nText:\n{}", base_prompt, text); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Analyzes sentiment of text. + pub async fn analyze_sentiment(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_ANALYZE_SENTIMENT, text); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Extracts relationships between entities in text. + pub async fn extract_relationships(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_RELATIONSHIPS, text); + Ok(self.agent.prompt(&prompt).await?) + } +} diff --git a/crates/nvisy-rig/src/agent/text_generation.rs b/crates/nvisy-rig/src/agent/text_generation.rs new file mode 100644 index 0000000..675e8c7 --- /dev/null +++ b/crates/nvisy-rig/src/agent/text_generation.rs @@ -0,0 +1,84 @@ +//! Text generation agent for creating new text content. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; + +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "TextGenerationAgent"; +const DESCRIPTION: &str = + "Agent for text generation including summarization, title generation, and contextual chunking"; + +const PREAMBLE: &str = "\ +You are a text generation assistant specialized in creating concise, high-quality content. +Your task is to generate summaries, titles, and contextual information based on input text. +Maintain accuracy while being concise. Preserve the key information and main points."; + +const PROMPT_SUMMARIZE: &str = "\ +Summarize the following text concisely while preserving the key information and main points. +The summary should be about 20-30% of the original length."; + +const PROMPT_GENERATE_TITLE: &str = "\ +Generate a concise, descriptive title for the following text. +The title should capture the main topic and be no more than 10 words. + +Only output the title, no explanation or quotes."; + +const PROMPT_GENERATE_CHUNK_CONTEXT: &str = "\ +Given the following document summary and a specific chunk from that document, \ +generate a brief context statement (1-2 sentences) that situates this chunk \ +within the broader document. This context will be prepended to the chunk \ +to improve retrieval quality. + +Only output the context statement, no explanation."; + +/// Agent for text generation tasks. +/// +/// Handles tasks that generate new text content: +/// - Summarization +/// - Title generation +/// - Contextual chunking (adding context to chunks) +pub struct TextGenerationAgent { + agent: Agent, +} + +impl TextGenerationAgent { + /// Creates a new text generation agent with the given completion provider. + pub fn new(provider: CompletionProvider) -> Self { + let agent = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE) + .build(); + Self { agent } + } + + /// Generates a summary of the text. + pub async fn summarize(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_SUMMARIZE, text); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Generates a title for the text. + pub async fn generate_title(&self, text: &str) -> Result { + let prompt = format!("{}\n\nText:\n{}", PROMPT_GENERATE_TITLE, text); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Generates contextual information for a chunk. + /// + /// This is used for contextual chunking, where each chunk is enriched + /// with context about how it fits into the larger document. + pub async fn generate_chunk_context( + &self, + chunk: &str, + document_summary: &str, + ) -> Result { + let prompt = format!( + "{}\n\nDocument Summary:\n{}\n\nChunk:\n{}", + PROMPT_GENERATE_CHUNK_CONTEXT, document_summary, chunk + ); + Ok(self.agent.prompt(&prompt).await?) + } +} diff --git a/crates/nvisy-rig/src/agent/vision.rs b/crates/nvisy-rig/src/agent/vision.rs new file mode 100644 index 0000000..a5c17d4 --- /dev/null +++ b/crates/nvisy-rig/src/agent/vision.rs @@ -0,0 +1,87 @@ +//! Vision agent for VLM-powered tasks. + +use rig::agent::{Agent, AgentBuilder}; +use rig::completion::Prompt; + +use crate::Result; +use crate::provider::CompletionProvider; + +const NAME: &str = "VisionAgent"; +const DESCRIPTION: &str = + "Agent for vision-language model tasks including image description, OCR, and object detection"; + +const PREAMBLE: &str = "\ +You are a vision analysis assistant specialized in understanding and describing visual content. +Your task is to analyze images and provide accurate, detailed information based on what you observe. +Always be precise and factual in your descriptions. If you cannot determine something with certainty, say so. +When outputting structured data, use valid JSON format."; + +const PROMPT_DESCRIBE: &str = "Describe this image concisely in 1-2 sentences."; + +const PROMPT_DESCRIBE_DETAILED: &str = "\ +Provide a detailed description of this image, including: +- Main subjects and objects +- Text visible in the image +- Colors and visual style +- Layout and composition"; + +const PROMPT_EXTRACT_TEXT: &str = "\ +Extract all text visible in this image. +Preserve the original formatting and structure as much as possible. +If no text is visible, respond with 'No text detected.'"; + +const PROMPT_DETECT_OBJECTS: &str = "\ +List all objects and entities visible in this image. +For each object, provide: +- Object type/name +- Brief description +- Approximate location (e.g., top-left, center, bottom-right) + +Format as a JSON array."; + +/// Agent for vision-language model tasks. +/// +/// Handles tasks that require understanding visual content: +/// - Image description (brief and detailed) +/// - Generative OCR (text extraction from images) +/// - Object detection +/// - VLM-based document partitioning +pub struct VisionAgent { + agent: Agent, +} + +impl VisionAgent { + /// Creates a new vision agent with the given completion provider. + pub fn new(provider: CompletionProvider) -> Self { + let agent = AgentBuilder::new(provider) + .name(NAME) + .description(DESCRIPTION) + .preamble(PREAMBLE) + .build(); + Self { agent } + } + + /// Generates a brief description of an image. + pub async fn describe(&self, image_base64: &str) -> Result { + let prompt = format!("{}\n\n[Image: {}]", PROMPT_DESCRIBE, image_base64); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Generates a detailed description of an image. + pub async fn describe_detailed(&self, image_base64: &str) -> Result { + let prompt = format!("{}\n\n[Image: {}]", PROMPT_DESCRIBE_DETAILED, image_base64); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Extracts text from an image using generative OCR. + pub async fn extract_text(&self, image_base64: &str) -> Result { + let prompt = format!("{}\n\n[Image: {}]", PROMPT_EXTRACT_TEXT, image_base64); + Ok(self.agent.prompt(&prompt).await?) + } + + /// Detects and lists objects in an image. + pub async fn detect_objects(&self, image_base64: &str) -> Result { + let prompt = format!("{}\n\n[Image: {}]", PROMPT_DETECT_OBJECTS, image_base64); + Ok(self.agent.prompt(&prompt).await?) + } +} diff --git a/crates/nvisy-rig/src/chat/agent/context.rs b/crates/nvisy-rig/src/chat/agent/context.rs index 9d9256d..c6a4483 100644 --- a/crates/nvisy-rig/src/chat/agent/context.rs +++ b/crates/nvisy-rig/src/chat/agent/context.rs @@ -1,6 +1,5 @@ //! Agent context for a single request. -use crate::provider::estimate_tokens; use crate::rag::RetrievedChunk; use crate::session::Session; @@ -46,15 +45,6 @@ impl AgentContext { pub fn has_context(&self) -> bool { !self.retrieved_chunks.is_empty() } - - /// Returns the total token count of retrieved chunks (estimated). - pub fn context_tokens(&self) -> u32 { - self.retrieved_chunks - .iter() - .filter_map(|c| c.content.as_deref()) - .map(estimate_tokens) - .sum() - } } #[cfg(test)] @@ -74,6 +64,5 @@ mod tests { let context = AgentContext::new(session, "Hello".to_string(), Vec::new()); assert!(!context.has_context()); - assert_eq!(context.context_tokens(), 0); } } diff --git a/crates/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs index 75d6284..c67563d 100644 --- a/crates/nvisy-rig/src/error.rs +++ b/crates/nvisy-rig/src/error.rs @@ -2,6 +2,7 @@ use std::fmt; +use rig::completion::{CompletionError, PromptError}; use rig::embeddings::EmbeddingError; /// Result type alias for rig operations. @@ -26,6 +27,14 @@ pub enum Error { #[error("embedding error: {0}")] Embedding(#[from] EmbeddingError), + /// Completion error. + #[error("completion error: {0}")] + Completion(#[from] CompletionError), + + /// Prompt error. + #[error("prompt error: {0}")] + Prompt(#[from] PromptError), + /// Configuration error. #[error("configuration error: {0}")] Config(String), diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs index ace76bf..53c9ee5 100644 --- a/crates/nvisy-rig/src/lib.rs +++ b/crates/nvisy-rig/src/lib.rs @@ -2,6 +2,7 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![doc = include_str!("../README.md")] +pub mod agent; pub mod chat; mod error; pub mod provider; diff --git a/crates/nvisy-rig/src/provider/completion/mod.rs b/crates/nvisy-rig/src/provider/completion/mod.rs index 6422c6e..bb505a4 100644 --- a/crates/nvisy-rig/src/provider/completion/mod.rs +++ b/crates/nvisy-rig/src/provider/completion/mod.rs @@ -2,9 +2,16 @@ mod credentials; mod model; +mod provider; +mod response; +mod rig_impl; pub use credentials::CompletionCredentials; pub use model::{ AnthropicModel, CohereCompletionModel, CompletionModel, GeminiCompletionModel, OpenAiCompletionModel, PerplexityModel, }; +pub use provider::CompletionProvider; +// Response types are part of the public API for CompletionModel trait consumers +#[allow(unused_imports)] +pub use response::{ProviderResponse, ProviderStreamingResponse}; diff --git a/crates/nvisy-rig/src/provider/completion/model.rs b/crates/nvisy-rig/src/provider/completion/model.rs index 33d874b..b2f59bb 100644 --- a/crates/nvisy-rig/src/provider/completion/model.rs +++ b/crates/nvisy-rig/src/provider/completion/model.rs @@ -1,6 +1,7 @@ //! Type-safe completion model references. use serde::{Deserialize, Serialize}; +use strum::{AsRefStr, Display, EnumString}; /// Reference to a completion/chat model. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -17,139 +18,117 @@ pub enum CompletionModel { /// Perplexity models. Perplexity(PerplexityModel), /// Ollama local models (model name as string). + #[cfg(feature = "ollama")] Ollama(String), } /// OpenAI completion models. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] #[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] pub enum OpenAiCompletionModel { /// GPT-4o (multimodal flagship) + #[strum(serialize = "gpt-4o")] Gpt4o, /// GPT-4o mini (fast, affordable) + #[strum(serialize = "gpt-4o-mini")] Gpt4oMini, /// GPT-4 Turbo + #[strum(serialize = "gpt-4-turbo")] Gpt4Turbo, /// o1 (reasoning) + #[strum(serialize = "o1")] O1, /// o1 mini (fast reasoning) + #[strum(serialize = "o1-mini")] O1Mini, /// o3 mini (latest reasoning) + #[strum(serialize = "o3-mini")] O3Mini, } -impl OpenAiCompletionModel { - pub fn as_str(&self) -> &'static str { - match self { - Self::Gpt4o => "gpt-4o", - Self::Gpt4oMini => "gpt-4o-mini", - Self::Gpt4Turbo => "gpt-4-turbo", - Self::O1 => "o1", - Self::O1Mini => "o1-mini", - Self::O3Mini => "o3-mini", - } - } -} - /// Anthropic models. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] #[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] pub enum AnthropicModel { /// Claude Opus 4 (most capable) + #[strum(serialize = "claude-opus-4-20250514")] ClaudeOpus4, /// Claude Sonnet 4 (balanced) + #[strum(serialize = "claude-sonnet-4-20250514")] ClaudeSonnet4, /// Claude Haiku 3.5 (fast) + #[strum(serialize = "claude-3-5-haiku-20241022")] ClaudeHaiku35, } -impl AnthropicModel { - pub fn as_str(&self) -> &'static str { - match self { - Self::ClaudeOpus4 => "claude-opus-4-20250514", - Self::ClaudeSonnet4 => "claude-sonnet-4-20250514", - Self::ClaudeHaiku35 => "claude-3-5-haiku-20241022", - } - } -} - /// Cohere completion models. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] #[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] pub enum CohereCompletionModel { /// Command R+ (most capable) + #[strum(serialize = "command-r-plus")] CommandRPlus, /// Command R (balanced) + #[strum(serialize = "command-r")] CommandR, /// Command (legacy) + #[strum(serialize = "command")] Command, } -impl CohereCompletionModel { - pub fn as_str(&self) -> &'static str { - match self { - Self::CommandRPlus => "command-r-plus", - Self::CommandR => "command-r", - Self::Command => "command", - } - } -} - /// Google Gemini completion models. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] #[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] pub enum GeminiCompletionModel { /// Gemini 2.0 Flash (fast, multimodal) + #[strum(serialize = "gemini-2.0-flash")] Gemini20Flash, /// Gemini 2.0 Flash Thinking (reasoning) + #[strum(serialize = "gemini-2.0-flash-thinking-exp")] Gemini20FlashThinking, /// Gemini 1.5 Pro (long context) + #[strum(serialize = "gemini-1.5-pro")] Gemini15Pro, /// Gemini 1.5 Flash (fast) + #[strum(serialize = "gemini-1.5-flash")] Gemini15Flash, } -impl GeminiCompletionModel { - pub fn as_str(&self) -> &'static str { - match self { - Self::Gemini20Flash => "gemini-2.0-flash", - Self::Gemini20FlashThinking => "gemini-2.0-flash-thinking-exp", - Self::Gemini15Pro => "gemini-1.5-pro", - Self::Gemini15Flash => "gemini-1.5-flash", - } - } -} - /// Perplexity models. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] #[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] pub enum PerplexityModel { /// Sonar (online, search-augmented) + #[strum(serialize = "sonar")] Sonar, /// Sonar Pro (online, more capable) + #[strum(serialize = "sonar-pro")] SonarPro, /// Sonar Reasoning (online, reasoning) + #[strum(serialize = "sonar-reasoning")] SonarReasoning, } -impl PerplexityModel { - pub fn as_str(&self) -> &'static str { - match self { - Self::Sonar => "sonar", - Self::SonarPro => "sonar-pro", - Self::SonarReasoning => "sonar-reasoning", - } - } -} - impl CompletionModel { + /// Returns the model identifier string. pub fn as_str(&self) -> &str { match self { - Self::OpenAi(m) => m.as_str(), - Self::Anthropic(m) => m.as_str(), - Self::Cohere(m) => m.as_str(), - Self::Gemini(m) => m.as_str(), - Self::Perplexity(m) => m.as_str(), + Self::OpenAi(m) => m.as_ref(), + Self::Anthropic(m) => m.as_ref(), + Self::Cohere(m) => m.as_ref(), + Self::Gemini(m) => m.as_ref(), + Self::Perplexity(m) => m.as_ref(), + #[cfg(feature = "ollama")] Self::Ollama(m) => m.as_str(), } } diff --git a/crates/nvisy-rig/src/provider/completion/provider.rs b/crates/nvisy-rig/src/provider/completion/provider.rs new file mode 100644 index 0000000..f3ad81b --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/provider.rs @@ -0,0 +1,269 @@ +//! Completion provider abstraction. + +use std::sync::Arc; + +#[cfg(feature = "ollama")] +use rig::client::Nothing; +use rig::completion::{AssistantContent, CompletionError, CompletionModel as RigCompletionModel}; +use rig::message::Message; +use rig::one_or_many::OneOrMany; +use rig::prelude::CompletionClient; +#[cfg(feature = "ollama")] +use rig::providers::ollama; +use rig::providers::{anthropic, cohere, gemini, openai, perplexity}; + +use super::credentials::CompletionCredentials; +use super::model::{AnthropicModel, CompletionModel}; +use crate::{Error, Result}; + +/// Completion provider that wraps different rig completion model implementations. +/// +/// This is a cheaply cloneable wrapper around an `Arc`. +#[derive(Clone)] +pub struct CompletionProvider(Arc); + +pub(crate) enum CompletionService { + OpenAi { + model: openai::CompletionModel, + model_name: String, + }, + Anthropic { + model: anthropic::completion::CompletionModel, + model_name: String, + }, + Cohere { + model: cohere::CompletionModel, + model_name: String, + }, + Gemini { + model: gemini::completion::CompletionModel, + model_name: String, + }, + Perplexity { + model: perplexity::CompletionModel, + model_name: String, + }, + #[cfg(feature = "ollama")] + Ollama { + client: ollama::Client, + model_name: String, + }, +} + +impl CompletionProvider { + /// Returns a reference to the inner provider. + pub(crate) fn inner(&self) -> &CompletionService { + &self.0 + } + + /// Creates a new completion provider from credentials and model. + pub fn new(credentials: &CompletionCredentials, model: &CompletionModel) -> Result { + let inner = match (credentials, model) { + (CompletionCredentials::OpenAi { api_key }, CompletionModel::OpenAi(m)) => { + let client = openai::Client::new(api_key) + .map_err(|e| Error::provider("openai", e.to_string()))? + .completions_api(); + CompletionService::OpenAi { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + (CompletionCredentials::Anthropic { api_key }, CompletionModel::Anthropic(m)) => { + let client = anthropic::Client::new(api_key) + .map_err(|e| Error::provider("anthropic", e.to_string()))?; + CompletionService::Anthropic { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + (CompletionCredentials::Cohere { api_key }, CompletionModel::Cohere(m)) => { + let client = cohere::Client::new(api_key) + .map_err(|e| Error::provider("cohere", e.to_string()))?; + CompletionService::Cohere { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + (CompletionCredentials::Gemini { api_key }, CompletionModel::Gemini(m)) => { + let client = gemini::Client::new(api_key) + .map_err(|e| Error::provider("gemini", e.to_string()))?; + CompletionService::Gemini { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + (CompletionCredentials::Perplexity { api_key }, CompletionModel::Perplexity(m)) => { + let client = perplexity::Client::new(api_key) + .map_err(|e| Error::provider("perplexity", e.to_string()))?; + CompletionService::Perplexity { + model: client.completion_model(m.as_ref()), + model_name: m.as_ref().to_string(), + } + } + #[cfg(feature = "ollama")] + (CompletionCredentials::Ollama { base_url }, CompletionModel::Ollama(model_name)) => { + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(base_url) + .build() + .map_err(|e| Error::provider("ollama", e.to_string()))?; + CompletionService::Ollama { + client, + model_name: model_name.clone(), + } + } + #[allow(unreachable_patterns)] + _ => return Err(Error::config("mismatched credentials and model provider")), + }; + Ok(Self(Arc::new(inner))) + } + + /// Creates an Ollama completion provider (convenience for local development). + #[cfg(feature = "ollama")] + pub fn ollama(base_url: &str, model_name: &str) -> Result { + let client = ollama::Client::builder() + .api_key(Nothing) + .base_url(base_url) + .build() + .map_err(|e| Error::provider("ollama", e.to_string()))?; + Ok(Self(Arc::new(CompletionService::Ollama { + client, + model_name: model_name.to_string(), + }))) + } + + /// Creates an Anthropic completion provider with a specific model. + pub fn anthropic(api_key: &str, model: AnthropicModel) -> Result { + let client = anthropic::Client::new(api_key) + .map_err(|e| Error::provider("anthropic", e.to_string()))?; + Ok(Self(Arc::new(CompletionService::Anthropic { + model: client.completion_model(model.as_ref()), + model_name: model.as_ref().to_string(), + }))) + } + + /// Returns the model name. + pub fn model_name(&self) -> &str { + match self.0.as_ref() { + CompletionService::OpenAi { model_name, .. } => model_name, + CompletionService::Anthropic { model_name, .. } => model_name, + CompletionService::Cohere { model_name, .. } => model_name, + CompletionService::Gemini { model_name, .. } => model_name, + CompletionService::Perplexity { model_name, .. } => model_name, + #[cfg(feature = "ollama")] + CompletionService::Ollama { model_name, .. } => model_name, + } + } + + /// Returns the provider name. + pub fn provider_name(&self) -> &'static str { + match self.0.as_ref() { + CompletionService::OpenAi { .. } => "openai", + CompletionService::Anthropic { .. } => "anthropic", + CompletionService::Cohere { .. } => "cohere", + CompletionService::Gemini { .. } => "gemini", + CompletionService::Perplexity { .. } => "perplexity", + #[cfg(feature = "ollama")] + CompletionService::Ollama { .. } => "ollama", + } + } + + /// Sends a completion request with the given prompt and chat history. + pub async fn complete(&self, prompt: &str, chat_history: Vec) -> Result { + let model_name = self.model_name().to_string(); + let map_err = |e: CompletionError| Error::provider(&model_name, e.to_string()); + + match self.0.as_ref() { + CompletionService::OpenAi { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + CompletionService::Anthropic { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + CompletionService::Cohere { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + CompletionService::Gemini { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + CompletionService::Perplexity { model, .. } => model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err), + #[cfg(feature = "ollama")] + CompletionService::Ollama { client, model_name } => { + let model = client.completion_model(model_name); + model + .completion_request(prompt) + .messages(chat_history) + .send() + .await + .map(|r| extract_text_content(&r.choice)) + .map_err(map_err) + } + } + } +} + +/// Extracts text content from assistant content choices. +fn extract_text_content(choice: &OneOrMany) -> String { + choice + .iter() + .filter_map(|content| match content { + AssistantContent::Text(text) => Some(text.text()), + _ => None, + }) + .collect::>() + .join("") +} + +impl std::fmt::Debug for CompletionProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.0.as_ref() { + CompletionService::OpenAi { model_name, .. } => f + .debug_struct("CompletionProvider::OpenAi") + .field("model", model_name) + .finish(), + CompletionService::Anthropic { model_name, .. } => f + .debug_struct("CompletionProvider::Anthropic") + .field("model", model_name) + .finish(), + CompletionService::Cohere { model_name, .. } => f + .debug_struct("CompletionProvider::Cohere") + .field("model", model_name) + .finish(), + CompletionService::Gemini { model_name, .. } => f + .debug_struct("CompletionProvider::Gemini") + .field("model", model_name) + .finish(), + CompletionService::Perplexity { model_name, .. } => f + .debug_struct("CompletionProvider::Perplexity") + .field("model", model_name) + .finish(), + #[cfg(feature = "ollama")] + CompletionService::Ollama { model_name, .. } => f + .debug_struct("CompletionProvider::Ollama") + .field("model", model_name) + .finish(), + } + } +} diff --git a/crates/nvisy-rig/src/provider/completion/response.rs b/crates/nvisy-rig/src/provider/completion/response.rs new file mode 100644 index 0000000..8d82b4f --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/response.rs @@ -0,0 +1,38 @@ +//! Response types for completion provider. + +use rig::completion::{GetTokenUsage, Usage}; +use serde::{Deserialize, Serialize}; + +/// Unified raw response type for CompletionProvider. +/// +/// This type normalizes responses from different providers into a common format. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProviderResponse { + /// The provider name. + pub provider: String, + /// The model name used. + pub model: String, +} + +impl GetTokenUsage for ProviderResponse { + fn token_usage(&self) -> Option { + None + } +} + +/// Streaming response placeholder for CompletionProvider. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProviderStreamingResponse { + /// The provider name. + pub provider: String, + /// The model name used. + pub model: String, + /// Token usage if available. + pub usage: Option, +} + +impl GetTokenUsage for ProviderStreamingResponse { + fn token_usage(&self) -> Option { + self.usage + } +} diff --git a/crates/nvisy-rig/src/provider/completion/rig_impl.rs b/crates/nvisy-rig/src/provider/completion/rig_impl.rs new file mode 100644 index 0000000..2da6597 --- /dev/null +++ b/crates/nvisy-rig/src/provider/completion/rig_impl.rs @@ -0,0 +1,153 @@ +//! rig-core trait implementations for CompletionProvider. + +use rig::completion::{ + CompletionError, CompletionModel as RigCompletionModel, CompletionRequest, CompletionResponse, +}; +use rig::message::Message; +use rig::one_or_many::OneOrMany; +#[cfg(feature = "ollama")] +use rig::prelude::CompletionClient; +use rig::streaming::StreamingCompletionResponse; + +use super::provider::{CompletionProvider, CompletionService}; +use super::response::{ProviderResponse, ProviderStreamingResponse}; + +impl RigCompletionModel for CompletionProvider { + type Client = (); + type Response = ProviderResponse; + type StreamingResponse = ProviderStreamingResponse; + + fn make(_client: &Self::Client, _model: impl Into) -> Self { + // This is a no-op since CompletionProvider is constructed via its own methods + panic!("CompletionProvider should be constructed via CompletionProvider::new()") + } + + async fn completion( + &self, + request: CompletionRequest, + ) -> std::result::Result, CompletionError> { + // Extract the prompt from the request's chat history (last message) + let last_message = request.chat_history.last(); + let prompt = match last_message { + Message::User { content } => content + .iter() + .filter_map(|c| match c { + rig::message::UserContent::Text(t) => Some(t.text()), + _ => None, + }) + .collect::>() + .join(""), + _ => String::new(), + }; + + // Get chat history without the last message (which is the prompt) + let chat_history: Vec = if request.chat_history.len() > 1 { + request + .chat_history + .iter() + .take(request.chat_history.len() - 1) + .cloned() + .collect() + } else { + vec![] + }; + + // Build the full prompt with preamble if present + let full_prompt = match &request.preamble { + Some(preamble) => format!("{}\n\n{}", preamble, prompt), + None => prompt, + }; + + // Delegate to the underlying model based on variant + let (choice, usage) = match self.inner() { + CompletionService::OpenAi { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + CompletionService::Anthropic { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + CompletionService::Cohere { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + CompletionService::Gemini { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + CompletionService::Perplexity { model, .. } => { + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + #[cfg(feature = "ollama")] + CompletionService::Ollama { client, model_name } => { + let model = client.completion_model(model_name); + let resp = model + .completion(build_request(&full_prompt, &chat_history, &request)) + .await?; + (resp.choice, resp.usage) + } + }; + + Ok(CompletionResponse { + choice, + usage, + raw_response: ProviderResponse { + provider: self.provider_name().to_string(), + model: self.model_name().to_string(), + }, + }) + } + + async fn stream( + &self, + request: CompletionRequest, + ) -> std::result::Result, CompletionError> + { + // For now, streaming is not fully implemented - we'd need to unify the streaming types + // This is a placeholder that returns an error + let _ = request; + Err(CompletionError::RequestError( + "Streaming not yet implemented for CompletionProvider".into(), + )) + } +} + +/// Builds a completion request for delegation to underlying models. +fn build_request( + prompt: &str, + chat_history: &[Message], + original: &CompletionRequest, +) -> CompletionRequest { + CompletionRequest { + preamble: None, // Already incorporated into prompt + chat_history: { + let mut history = chat_history.to_vec(); + history.push(Message::User { + content: OneOrMany::one(rig::message::UserContent::text(prompt)), + }); + OneOrMany::many(history).unwrap_or_else(|_| { + OneOrMany::one(Message::User { + content: OneOrMany::one(rig::message::UserContent::text(prompt)), + }) + }) + }, + documents: original.documents.clone(), + tools: original.tools.clone(), + temperature: original.temperature, + max_tokens: original.max_tokens, + tool_choice: original.tool_choice.clone(), + additional_params: original.additional_params.clone(), + } +} diff --git a/crates/nvisy-rig/src/provider/embedding/mod.rs b/crates/nvisy-rig/src/provider/embedding/mod.rs index 760bfd7..6399aca 100644 --- a/crates/nvisy-rig/src/provider/embedding/mod.rs +++ b/crates/nvisy-rig/src/provider/embedding/mod.rs @@ -3,9 +3,10 @@ mod credentials; mod model; mod provider; +mod rig_impl; pub use credentials::EmbeddingCredentials; #[cfg(feature = "ollama")] pub use model::OllamaEmbeddingModel; pub use model::{CohereEmbeddingModel, EmbeddingModel, GeminiEmbeddingModel, OpenAiEmbeddingModel}; -pub use provider::EmbeddingProvider; +pub use provider::{EmbeddingProvider}; diff --git a/crates/nvisy-rig/src/provider/embedding/model.rs b/crates/nvisy-rig/src/provider/embedding/model.rs index db24296..3a63713 100644 --- a/crates/nvisy-rig/src/provider/embedding/model.rs +++ b/crates/nvisy-rig/src/provider/embedding/model.rs @@ -1,6 +1,7 @@ //! Type-safe embedding model references. use serde::{Deserialize, Serialize}; +use strum::{AsRefStr, Display, EnumString}; /// Reference to an embedding model. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -19,25 +20,23 @@ pub enum EmbeddingModel { /// OpenAI embedding models. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] #[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] pub enum OpenAiEmbeddingModel { /// text-embedding-3-small (1536 dimensions) + #[strum(serialize = "text-embedding-3-small")] TextEmbedding3Small, /// text-embedding-3-large (3072 dimensions) + #[strum(serialize = "text-embedding-3-large")] TextEmbedding3Large, /// text-embedding-ada-002 (legacy, 1536 dimensions) + #[strum(serialize = "text-embedding-ada-002")] TextEmbeddingAda002, } impl OpenAiEmbeddingModel { - pub fn as_str(&self) -> &'static str { - match self { - Self::TextEmbedding3Small => "text-embedding-3-small", - Self::TextEmbedding3Large => "text-embedding-3-large", - Self::TextEmbeddingAda002 => "text-embedding-ada-002", - } - } - + /// Returns the embedding dimensions for this model. pub fn dimensions(&self) -> usize { match self { Self::TextEmbedding3Small => 1536, @@ -49,28 +48,26 @@ impl OpenAiEmbeddingModel { /// Cohere embedding models. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] #[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] pub enum CohereEmbeddingModel { /// embed-english-v3.0 (1024 dimensions) + #[strum(serialize = "embed-english-v3.0")] EmbedEnglishV3, /// embed-multilingual-v3.0 (1024 dimensions) + #[strum(serialize = "embed-multilingual-v3.0")] EmbedMultilingualV3, /// embed-english-light-v3.0 (384 dimensions) + #[strum(serialize = "embed-english-light-v3.0")] EmbedEnglishLightV3, /// embed-multilingual-light-v3.0 (384 dimensions) + #[strum(serialize = "embed-multilingual-light-v3.0")] EmbedMultilingualLightV3, } impl CohereEmbeddingModel { - pub fn as_str(&self) -> &'static str { - match self { - Self::EmbedEnglishV3 => "embed-english-v3.0", - Self::EmbedMultilingualV3 => "embed-multilingual-v3.0", - Self::EmbedEnglishLightV3 => "embed-english-light-v3.0", - Self::EmbedMultilingualLightV3 => "embed-multilingual-light-v3.0", - } - } - + /// Returns the embedding dimensions for this model. pub fn dimensions(&self) -> usize { match self { Self::EmbedEnglishV3 | Self::EmbedMultilingualV3 => 1024, @@ -81,19 +78,17 @@ impl CohereEmbeddingModel { /// Google Gemini embedding models. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(AsRefStr, Display, EnumString)] #[serde(rename_all = "kebab-case")] +#[strum(serialize_all = "kebab-case")] pub enum GeminiEmbeddingModel { /// text-embedding-004 (768 dimensions) + #[strum(serialize = "text-embedding-004")] TextEmbedding004, } impl GeminiEmbeddingModel { - pub fn as_str(&self) -> &'static str { - match self { - Self::TextEmbedding004 => "text-embedding-004", - } - } - + /// Returns the embedding dimensions for this model. pub fn dimensions(&self) -> usize { 768 } @@ -111,6 +106,7 @@ pub struct OllamaEmbeddingModel { #[cfg(feature = "ollama")] impl OllamaEmbeddingModel { + /// Creates a new Ollama embedding model configuration. pub fn new(name: impl Into, dimensions: usize) -> Self { Self { name: name.into(), @@ -118,30 +114,35 @@ impl OllamaEmbeddingModel { } } + /// nomic-embed-text (768 dimensions) pub fn nomic_embed_text() -> Self { Self::new("nomic-embed-text", 768) } + /// mxbai-embed-large (1024 dimensions) pub fn mxbai_embed_large() -> Self { Self::new("mxbai-embed-large", 1024) } + /// all-minilm (384 dimensions) pub fn all_minilm() -> Self { Self::new("all-minilm", 384) } } impl EmbeddingModel { + /// Returns the model identifier string. pub fn as_str(&self) -> &str { match self { - Self::OpenAi(m) => m.as_str(), - Self::Cohere(m) => m.as_str(), - Self::Gemini(m) => m.as_str(), + Self::OpenAi(m) => m.as_ref(), + Self::Cohere(m) => m.as_ref(), + Self::Gemini(m) => m.as_ref(), #[cfg(feature = "ollama")] Self::Ollama(m) => &m.name, } } + /// Returns the embedding dimensions for this model. pub fn dimensions(&self) -> usize { match self { Self::OpenAi(m) => m.dimensions(), diff --git a/crates/nvisy-rig/src/provider/embedding/provider.rs b/crates/nvisy-rig/src/provider/embedding/provider.rs index 062d6d5..b379705 100644 --- a/crates/nvisy-rig/src/provider/embedding/provider.rs +++ b/crates/nvisy-rig/src/provider/embedding/provider.rs @@ -1,10 +1,7 @@ //! Embedding provider abstraction. -use super::credentials::EmbeddingCredentials; -use super::model::EmbeddingModel; -#[cfg(feature = "ollama")] -use super::model::OllamaEmbeddingModel; -use crate::{Error, Result}; +use std::sync::Arc; + #[cfg(feature = "ollama")] use rig::client::Nothing; use rig::embeddings::{Embedding, EmbeddingModel as RigEmbeddingModel}; @@ -13,9 +10,24 @@ use rig::prelude::EmbeddingsClient; use rig::providers::ollama; use rig::providers::{cohere, gemini, openai}; +use super::credentials::EmbeddingCredentials; +use super::model::EmbeddingModel; +#[cfg(feature = "ollama")] +use super::model::OllamaEmbeddingModel; +use crate::{Error, Result}; + +/// Default maximum documents per embedding request. +/// +/// This is a conservative default; individual providers may support more. +pub(crate) const DEFAULT_MAX_DOCUMENTS: usize = 96; + /// Embedding provider that wraps different rig embedding model implementations. +/// +/// This is a cheaply cloneable wrapper around an `Arc`. #[derive(Clone)] -pub enum EmbeddingProvider { +pub struct EmbeddingProvider(Arc); + +pub(crate) enum EmbeddingService { OpenAi { model: openai::EmbeddingModel, model_name: String, @@ -37,36 +49,41 @@ pub enum EmbeddingProvider { } impl EmbeddingProvider { + /// Returns a reference to the inner provider. + pub(crate) fn inner(&self) -> &EmbeddingService { + &self.0 + } + /// Creates a new embedding provider from credentials and model. pub fn new(credentials: &EmbeddingCredentials, model: &EmbeddingModel) -> Result { - match (credentials, model) { + let inner = match (credentials, model) { (EmbeddingCredentials::OpenAi { api_key }, EmbeddingModel::OpenAi(m)) => { let client = openai::Client::new(api_key) .map_err(|e| Error::provider("openai", e.to_string()))?; - Ok(Self::OpenAi { - model: client.embedding_model_with_ndims(m.as_str(), m.dimensions()), - model_name: m.as_str().to_string(), - }) + EmbeddingService::OpenAi { + model: client.embedding_model_with_ndims(m.as_ref(), m.dimensions()), + model_name: m.as_ref().to_string(), + } } (EmbeddingCredentials::Cohere { api_key }, EmbeddingModel::Cohere(m)) => { let client = cohere::Client::new(api_key) .map_err(|e| Error::provider("cohere", e.to_string()))?; - Ok(Self::Cohere { + EmbeddingService::Cohere { model: client.embedding_model_with_ndims( - m.as_str(), + m.as_ref(), "search_document", m.dimensions(), ), - model_name: m.as_str().to_string(), - }) + model_name: m.as_ref().to_string(), + } } (EmbeddingCredentials::Gemini { api_key }, EmbeddingModel::Gemini(m)) => { let client = gemini::Client::new(api_key) .map_err(|e| Error::provider("gemini", e.to_string()))?; - Ok(Self::Gemini { - model: client.embedding_model_with_ndims(m.as_str(), m.dimensions()), - model_name: m.as_str().to_string(), - }) + EmbeddingService::Gemini { + model: client.embedding_model_with_ndims(m.as_ref(), m.dimensions()), + model_name: m.as_ref().to_string(), + } } #[cfg(feature = "ollama")] (EmbeddingCredentials::Ollama { base_url }, EmbeddingModel::Ollama(m)) => { @@ -75,15 +92,16 @@ impl EmbeddingProvider { .base_url(base_url) .build() .map_err(|e| Error::provider("ollama", e.to_string()))?; - Ok(Self::Ollama { + EmbeddingService::Ollama { client, model_name: m.name.clone(), ndims: m.dimensions, - }) + } } #[allow(unreachable_patterns)] - _ => Err(Error::config("mismatched credentials and model provider")), - } + _ => return Err(Error::config("mismatched credentials and model provider")), + }; + Ok(Self(Arc::new(inner))) } /// Creates an Ollama embedding provider (convenience for local development). @@ -94,95 +112,77 @@ impl EmbeddingProvider { .base_url(base_url) .build() .map_err(|e| Error::provider("ollama", e.to_string()))?; - Ok(Self::Ollama { + Ok(Self(Arc::new(EmbeddingService::Ollama { client, model_name: model.name, ndims: model.dimensions, - }) + }))) } /// Returns the model name. pub fn model_name(&self) -> &str { - match self { - Self::OpenAi { model_name, .. } => model_name, - Self::Cohere { model_name, .. } => model_name, - Self::Gemini { model_name, .. } => model_name, + match self.0.as_ref() { + EmbeddingService::OpenAi { model_name, .. } => model_name, + EmbeddingService::Cohere { model_name, .. } => model_name, + EmbeddingService::Gemini { model_name, .. } => model_name, #[cfg(feature = "ollama")] - Self::Ollama { model_name, .. } => model_name, + EmbeddingService::Ollama { model_name, .. } => model_name, } } - /// Returns the number of dimensions. - pub fn ndims(&self) -> usize { - match self { - Self::OpenAi { model, .. } => model.ndims(), - Self::Cohere { model, .. } => model.ndims(), - Self::Gemini { model, .. } => model.ndims(), + /// Returns the provider name. + pub fn provider_name(&self) -> &'static str { + match self.0.as_ref() { + EmbeddingService::OpenAi { .. } => "openai", + EmbeddingService::Cohere { .. } => "cohere", + EmbeddingService::Gemini { .. } => "gemini", #[cfg(feature = "ollama")] - Self::Ollama { ndims, .. } => *ndims, + EmbeddingService::Ollama { .. } => "ollama", } } /// Embed a single text document. + /// + /// This is a convenience method that delegates to the trait implementation. pub async fn embed_text(&self, text: &str) -> Result { - match self { - Self::OpenAi { model, .. } => Ok(model.embed_text(text).await?), - Self::Cohere { model, .. } => Ok(model.embed_text(text).await?), - Self::Gemini { model, .. } => Ok(model.embed_text(text).await?), - #[cfg(feature = "ollama")] - Self::Ollama { - client, - model_name, - ndims, - } => { - let model = ollama::EmbeddingModel::new(client.clone(), model_name, *ndims); - Ok(model.embed_text(text).await?) - } - } + RigEmbeddingModel::embed_text(self, text) + .await + .map_err(|e| Error::provider(self.provider_name(), e.to_string())) } /// Embed multiple text documents. + /// + /// This is a convenience method that delegates to the trait implementation. pub async fn embed_texts( &self, texts: impl IntoIterator + Send, ) -> Result> { - match self { - Self::OpenAi { model, .. } => Ok(model.embed_texts(texts).await?), - Self::Cohere { model, .. } => Ok(model.embed_texts(texts).await?), - Self::Gemini { model, .. } => Ok(model.embed_texts(texts).await?), - #[cfg(feature = "ollama")] - Self::Ollama { - client, - model_name, - ndims, - } => { - let model = ollama::EmbeddingModel::new(client.clone(), model_name, *ndims); - Ok(model.embed_texts(texts).await?) - } - } + RigEmbeddingModel::embed_texts(self, texts) + .await + .map_err(|e| Error::provider(self.provider_name(), e.to_string())) } } impl std::fmt::Debug for EmbeddingProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::OpenAi { model, model_name } => f + match self.0.as_ref() { + EmbeddingService::OpenAi { model, model_name } => f .debug_struct("EmbeddingProvider::OpenAi") .field("model", model_name) .field("ndims", &model.ndims()) .finish(), - Self::Cohere { model, model_name } => f + EmbeddingService::Cohere { model, model_name } => f .debug_struct("EmbeddingProvider::Cohere") .field("model", model_name) .field("ndims", &model.ndims()) .finish(), - Self::Gemini { model, model_name } => f + EmbeddingService::Gemini { model, model_name } => f .debug_struct("EmbeddingProvider::Gemini") .field("model", model_name) .field("ndims", &model.ndims()) .finish(), #[cfg(feature = "ollama")] - Self::Ollama { + EmbeddingService::Ollama { model_name, ndims, .. } => f .debug_struct("EmbeddingProvider::Ollama") diff --git a/crates/nvisy-rig/src/provider/embedding/rig_impl.rs b/crates/nvisy-rig/src/provider/embedding/rig_impl.rs new file mode 100644 index 0000000..380bd4f --- /dev/null +++ b/crates/nvisy-rig/src/provider/embedding/rig_impl.rs @@ -0,0 +1,48 @@ +//! rig-core trait implementations for EmbeddingProvider. + +use rig::embeddings::{Embedding, EmbeddingError, EmbeddingModel as RigEmbeddingModel}; +#[cfg(feature = "ollama")] +use rig::providers::ollama; + +use super::provider::{DEFAULT_MAX_DOCUMENTS, EmbeddingProvider, EmbeddingService}; + +impl RigEmbeddingModel for EmbeddingProvider { + type Client = (); + + const MAX_DOCUMENTS: usize = DEFAULT_MAX_DOCUMENTS; + + fn make(_client: &Self::Client, _model: impl Into, _dims: Option) -> Self { + // This is a no-op since EmbeddingProvider is constructed via its own methods + panic!("EmbeddingProvider should be constructed via EmbeddingProvider::new()") + } + + fn ndims(&self) -> usize { + match self.inner() { + EmbeddingService::OpenAi { model, .. } => model.ndims(), + EmbeddingService::Cohere { model, .. } => model.ndims(), + EmbeddingService::Gemini { model, .. } => model.ndims(), + #[cfg(feature = "ollama")] + EmbeddingService::Ollama { ndims, .. } => *ndims, + } + } + + async fn embed_texts( + &self, + texts: impl IntoIterator + Send, + ) -> std::result::Result, EmbeddingError> { + match self.inner() { + EmbeddingService::OpenAi { model, .. } => model.embed_texts(texts).await, + EmbeddingService::Cohere { model, .. } => model.embed_texts(texts).await, + EmbeddingService::Gemini { model, .. } => model.embed_texts(texts).await, + #[cfg(feature = "ollama")] + EmbeddingService::Ollama { + client, + model_name, + ndims, + } => { + let model = ollama::EmbeddingModel::new(client.clone(), model_name, *ndims); + model.embed_texts(texts).await + } + } + } +} diff --git a/crates/nvisy-rig/src/provider/mod.rs b/crates/nvisy-rig/src/provider/mod.rs index 30ec0f9..a80012d 100644 --- a/crates/nvisy-rig/src/provider/mod.rs +++ b/crates/nvisy-rig/src/provider/mod.rs @@ -6,7 +6,7 @@ pub mod splitting; pub use completion::{ AnthropicModel, CohereCompletionModel, CompletionCredentials, CompletionModel, - GeminiCompletionModel, OpenAiCompletionModel, PerplexityModel, + CompletionProvider, GeminiCompletionModel, OpenAiCompletionModel, PerplexityModel, }; #[cfg(feature = "ollama")] pub use embedding::OllamaEmbeddingModel; @@ -14,4 +14,4 @@ pub use embedding::{ CohereEmbeddingModel, EmbeddingCredentials, EmbeddingModel, EmbeddingProvider, GeminiEmbeddingModel, OpenAiEmbeddingModel, }; -pub use splitting::{Chunk, ChunkMetadata, OwnedChunk, TextSplitter, estimate_tokens}; +pub use splitting::{Chunk, ChunkMetadata, OwnedChunk, TextSplitter}; diff --git a/crates/nvisy-rig/src/provider/splitting/metadata.rs b/crates/nvisy-rig/src/provider/splitting/metadata.rs index 9cc9b81..b3c9411 100644 --- a/crates/nvisy-rig/src/provider/splitting/metadata.rs +++ b/crates/nvisy-rig/src/provider/splitting/metadata.rs @@ -1,5 +1,7 @@ //! Split chunk metadata. +use std::num::NonZeroU32; + use serde::{Deserialize, Serialize}; /// Metadata about a split chunk's location in the source text. @@ -13,7 +15,7 @@ pub struct ChunkMetadata { pub end_offset: u32, /// Page number (1-indexed, if applicable). #[serde(default, skip_serializing_if = "Option::is_none")] - pub page: Option, + pub page: Option, } impl ChunkMetadata { @@ -28,7 +30,7 @@ impl ChunkMetadata { } /// Sets the page number. - pub fn with_page(mut self, page: u32) -> Self { + pub fn with_page(mut self, page: NonZeroU32) -> Self { self.page = Some(page); self } diff --git a/crates/nvisy-rig/src/provider/splitting/mod.rs b/crates/nvisy-rig/src/provider/splitting/mod.rs index 59602ac..37b2ca7 100644 --- a/crates/nvisy-rig/src/provider/splitting/mod.rs +++ b/crates/nvisy-rig/src/provider/splitting/mod.rs @@ -6,4 +6,4 @@ mod splitter; pub use chunk::{Chunk, OwnedChunk}; pub use metadata::ChunkMetadata; -pub use splitter::{TextSplitter, estimate_tokens}; +pub use splitter::TextSplitter; diff --git a/crates/nvisy-rig/src/provider/splitting/splitter.rs b/crates/nvisy-rig/src/provider/splitting/splitter.rs index 8959967..f9b0a2d 100644 --- a/crates/nvisy-rig/src/provider/splitting/splitter.rs +++ b/crates/nvisy-rig/src/provider/splitting/splitter.rs @@ -1,7 +1,8 @@ //! Text splitting implementation. +use std::num::NonZeroU32; + use text_splitter::{ChunkConfig, TextSplitter as TextSplitterImpl}; -use tracing::{debug, instrument}; use super::{Chunk, ChunkMetadata, OwnedChunk}; @@ -9,24 +10,33 @@ use super::{Chunk, ChunkMetadata, OwnedChunk}; #[derive(Debug, Clone)] pub struct TextSplitter { max_characters: u32, - overlap: u32, - trim: bool, + overlap_characters: Option, + trim_whitespace: bool, } impl TextSplitter { /// Creates a new text splitter. - pub fn new(max_characters: u32, overlap: u32, trim: bool) -> Self { - debug!(max_characters, overlap, trim, "created text splitter"); + pub fn new( + max_characters: u32, + overlap_characters: Option, + trim_whitespace: bool, + ) -> Self { + tracing::debug!( + max_characters, + ?overlap_characters, + trim_whitespace, + "created text splitter" + ); Self { max_characters, - overlap, - trim, + overlap_characters, + trim_whitespace, } } /// Creates a splitter with default settings (512 chars, no overlap, trimmed). pub fn with_defaults() -> Self { - Self::new(512, 0, true) + Self::new(512, None, true) } /// Returns the maximum characters per chunk. @@ -35,17 +45,18 @@ impl TextSplitter { } /// Returns the overlap between chunks. - pub fn overlap(&self) -> u32 { - self.overlap + pub fn overlap_characters(&self) -> Option { + self.overlap_characters } /// Splits text into chunks with byte offset tracking. - #[instrument(skip(self, text), fields(text_len = text.len()))] + #[tracing::instrument(skip(self, text), fields(text_len = text.len()))] pub fn split<'a>(&self, text: &'a str) -> Vec> { + let overlap = self.overlap_characters.map_or(0, |v| v.get() as usize); let chunk_config = ChunkConfig::new(self.max_characters as usize) - .with_overlap(self.overlap as usize) + .with_overlap(overlap) .expect("overlap must be less than max_characters") - .with_trim(self.trim); + .with_trim(self.trim_whitespace); let splitter = TextSplitterImpl::new(chunk_config); @@ -61,12 +72,12 @@ impl TextSplitter { }) .collect(); - debug!(chunk_count = chunks.len(), "split text into chunks"); + tracing::debug!(chunk_count = chunks.len(), "split text into chunks"); chunks } /// Splits text and returns owned chunks. - #[instrument(skip(self, text), fields(text_len = text.len()))] + #[tracing::instrument(skip(self, text), fields(text_len = text.len()))] pub fn split_owned(&self, text: &str) -> Vec { self.split(text) .into_iter() @@ -77,7 +88,7 @@ impl TextSplitter { /// Splits text with page awareness. /// /// Page breaks are indicated by form feed characters (`\x0c`). - #[instrument(skip(self, text), fields(text_len = text.len()))] + #[tracing::instrument(skip(self, text), fields(text_len = text.len()))] pub fn split_with_pages<'a>(&self, text: &'a str) -> Vec> { let page_breaks: Vec = text .char_indices() @@ -85,17 +96,20 @@ impl TextSplitter { .map(|(i, _)| i as u32) .collect(); - debug!(page_count = page_breaks.len() + 1, "detected pages"); + tracing::debug!(page_count = page_breaks.len() + 1, "detected pages"); self.split(text) .into_iter() .map(|chunk| { - let page = page_breaks + let page_num = page_breaks .iter() .take_while(|&&pos| pos < chunk.metadata.start_offset) .count() as u32 + 1; + // SAFETY: page_num is always >= 1 + let page = NonZeroU32::new(page_num).expect("page number is always >= 1"); + Chunk { text: chunk.text, metadata: chunk.metadata.with_page(page), @@ -105,7 +119,7 @@ impl TextSplitter { } /// Splits text with page awareness and returns owned chunks. - #[instrument(skip(self, text), fields(text_len = text.len()))] + #[tracing::instrument(skip(self, text), fields(text_len = text.len()))] pub fn split_with_pages_owned(&self, text: &str) -> Vec { self.split_with_pages(text) .into_iter() @@ -120,18 +134,13 @@ impl Default for TextSplitter { } } -/// Estimates the token count (~4 chars per token). -pub fn estimate_tokens(text: &str) -> u32 { - (text.len() / 4) as u32 -} - #[cfg(test)] mod tests { use super::*; #[test] fn test_split_basic() { - let splitter = TextSplitter::new(50, 0, true); + let splitter = TextSplitter::new(50, None, true); let text = "Hello world. This is a test. Another sentence here."; let chunks = splitter.split(text); @@ -143,7 +152,7 @@ mod tests { #[test] fn test_split_with_overlap() { - let splitter = TextSplitter::new(20, 5, true); + let splitter = TextSplitter::new(20, NonZeroU32::new(5), true); let text = "The quick brown fox jumps over the lazy dog."; let chunks = splitter.split(text); @@ -152,17 +161,17 @@ mod tests { #[test] fn test_split_with_pages() { - let splitter = TextSplitter::new(100, 0, true); + let splitter = TextSplitter::new(100, None, true); let text = "Page one content.\x0cPage two content.\x0cPage three."; let chunks = splitter.split_with_pages(text); assert!(!chunks.is_empty()); - assert_eq!(chunks[0].metadata.page, Some(1)); + assert_eq!(chunks[0].metadata.page, NonZeroU32::new(1)); } #[test] fn test_metadata_offsets() { - let splitter = TextSplitter::new(500, 0, false); + let splitter = TextSplitter::new(500, None, false); let text = "Hello world"; let chunks = splitter.split(text); @@ -170,10 +179,4 @@ mod tests { assert_eq!(chunks[0].metadata.start_offset, 0); assert_eq!(chunks[0].metadata.end_offset, text.len() as u32); } - - #[test] - fn test_estimate_tokens() { - assert_eq!(estimate_tokens("hello"), 1); - assert_eq!(estimate_tokens("hello world"), 2); - } } diff --git a/crates/nvisy-rig/src/rag/config.rs b/crates/nvisy-rig/src/rag/config.rs index 02e914e..c54c075 100644 --- a/crates/nvisy-rig/src/rag/config.rs +++ b/crates/nvisy-rig/src/rag/config.rs @@ -1,5 +1,7 @@ //! RAG system configuration. +use std::num::NonZeroU32; + /// Configuration for the RAG system. #[derive(Debug, Clone)] pub struct RagConfig { @@ -7,10 +9,10 @@ pub struct RagConfig { pub max_chunk_characters: u32, /// Number of characters to overlap between chunks. - pub chunk_overlap: u32, + pub chunk_overlap_characters: Option, /// Whether to trim whitespace from chunks. - pub trim_chunks: bool, + pub trim_whitespace: bool, /// Maximum chunks to retrieve per query. pub max_results: u32, @@ -23,8 +25,8 @@ impl Default for RagConfig { fn default() -> Self { Self { max_chunk_characters: 1000, - chunk_overlap: 0, - trim_chunks: true, + chunk_overlap_characters: None, + trim_whitespace: true, max_results: 5, min_score: None, } diff --git a/crates/nvisy-rig/src/rag/indexer/mod.rs b/crates/nvisy-rig/src/rag/indexer/mod.rs index f3d5c04..081c5a1 100644 --- a/crates/nvisy-rig/src/rag/indexer/mod.rs +++ b/crates/nvisy-rig/src/rag/indexer/mod.rs @@ -6,11 +6,10 @@ use nvisy_postgres::model::NewFileChunk; use nvisy_postgres::query::FileChunkRepository; use nvisy_postgres::{PgClient, Vector}; use sha2::{Digest, Sha256}; -use tracing::{debug, instrument}; use uuid::Uuid; pub use self::indexed::IndexedChunk; -use crate::provider::{EmbeddingProvider, OwnedChunk, TextSplitter, estimate_tokens}; +use crate::provider::{EmbeddingProvider, OwnedChunk, TextSplitter}; use crate::{Error, Result}; /// Indexer for batch-embedding and storing document chunks. @@ -43,28 +42,28 @@ impl Indexer { } /// Indexes text by splitting, embedding, and storing chunks. - #[instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] + #[tracing::instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn index(&self, text: &str) -> Result> { let chunks = self.splitter.split_owned(text); self.index_chunks(chunks).await } /// Indexes text with page awareness. - #[instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] + #[tracing::instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn index_with_pages(&self, text: &str) -> Result> { let chunks = self.splitter.split_with_pages_owned(text); self.index_chunks(chunks).await } /// Deletes all existing chunks for the file before indexing. - #[instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] + #[tracing::instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn reindex(&self, text: &str) -> Result> { let chunks = self.splitter.split_owned(text); self.reindex_chunks(chunks).await } /// Deletes all existing chunks for the file before indexing with page awareness. - #[instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] + #[tracing::instrument(skip(self, text), fields(file_id = %self.file_id, text_len = text.len()))] pub async fn reindex_with_pages(&self, text: &str) -> Result> { let chunks = self.splitter.split_with_pages_owned(text); self.reindex_chunks(chunks).await @@ -72,14 +71,14 @@ impl Indexer { async fn index_chunks(&self, chunks: Vec) -> Result> { if chunks.is_empty() { - debug!("no chunks to index"); + tracing::debug!("no chunks to index"); return Ok(vec![]); } let texts: Vec = chunks.iter().map(|c| c.text.clone()).collect(); let chunk_count = texts.len(); - debug!(chunk_count, "embedding chunks"); + tracing::debug!(chunk_count, "embedding chunks"); let embeddings = self.provider.embed_texts(texts).await?; if embeddings.len() != chunk_count { @@ -115,7 +114,7 @@ impl Indexer { chunk_index: Some(idx as i32), content_sha256, content_size: Some(content_size), - token_count: Some(estimate_tokens(&chunk.text) as i32), + token_count: None, embedding: Vector::from(embedding_vec), embedding_model: model_name.to_owned(), metadata: Some(metadata), @@ -134,7 +133,7 @@ impl Indexer { .await .map_err(|e| Error::retrieval(format!("failed to create chunks: {e}")))?; - debug!(created_count = created.len(), "stored chunks"); + tracing::debug!(created_count = created.len(), "stored chunks"); Ok(created.into_iter().map(IndexedChunk::from).collect()) } @@ -151,7 +150,7 @@ impl Indexer { .map_err(|e| Error::retrieval(format!("failed to delete chunks: {e}")))?; if deleted > 0 { - debug!(deleted, "deleted existing chunks"); + tracing::debug!(deleted, "deleted existing chunks"); } drop(conn); diff --git a/crates/nvisy-rig/src/rag/mod.rs b/crates/nvisy-rig/src/rag/mod.rs index 62024aa..d658ad4 100644 --- a/crates/nvisy-rig/src/rag/mod.rs +++ b/crates/nvisy-rig/src/rag/mod.rs @@ -5,6 +5,7 @@ mod config; mod indexer; mod searcher; +mod vector_store; use std::sync::Arc; @@ -15,7 +16,8 @@ use uuid::Uuid; pub use self::config::RagConfig; pub use self::indexer::{IndexedChunk, Indexer}; -pub use self::searcher::{RetrievedChunk, SearchScope, Searcher}; +pub use self::searcher::{ChunkMetadata, RetrievedChunk, SearchScope, Searcher}; +pub use self::vector_store::{ChunkDocument, PgFilter, PgVectorStore}; use crate::Result; use crate::provider::{EmbeddingProvider, TextSplitter}; @@ -66,8 +68,8 @@ impl RagService { pub fn indexer(&self, file_id: Uuid) -> Indexer { let splitter = TextSplitter::new( self.inner.config.max_chunk_characters, - self.inner.config.chunk_overlap, - self.inner.config.trim_chunks, + self.inner.config.chunk_overlap_characters, + self.inner.config.trim_whitespace, ); Indexer::new( diff --git a/crates/nvisy-rig/src/rag/vector_store.rs b/crates/nvisy-rig/src/rag/vector_store.rs new file mode 100644 index 0000000..6ee2d6a --- /dev/null +++ b/crates/nvisy-rig/src/rag/vector_store.rs @@ -0,0 +1,378 @@ +//! Vector store implementation using PostgreSQL with pgvector. +//! +//! Provides rig-core compatible [`VectorStoreIndex`] and [`InsertDocuments`] +//! implementations backed by PostgreSQL for document chunk storage and +//! similarity search. + +use nvisy_postgres::model::NewFileChunk; +use nvisy_postgres::query::FileChunkRepository; +use nvisy_postgres::{PgClient, Vector}; +use rig::embeddings::{Embedding, TextEmbedder}; +use rig::one_or_many::OneOrMany; +use rig::vector_store::request::{SearchFilter, VectorSearchRequest}; +use rig::vector_store::{InsertDocuments, VectorStoreError, VectorStoreIndex}; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use uuid::Uuid; + +use super::SearchScope; +use crate::provider::EmbeddingProvider; + +/// PostgreSQL-backed vector store for document chunks. +/// +/// Implements rig-core's [`VectorStoreIndex`] and [`InsertDocuments`] traits, +/// enabling integration with rig's agent and pipeline systems. +#[derive(Clone)] +pub struct PgVectorStore { + provider: EmbeddingProvider, + db: PgClient, + scope: SearchScope, + min_score: Option, +} + +impl PgVectorStore { + /// Creates a new vector store with the given scope. + pub fn new(provider: EmbeddingProvider, db: PgClient, scope: SearchScope) -> Self { + Self { + provider, + db, + scope, + min_score: None, + } + } + + /// Sets the minimum similarity score threshold. + pub fn with_min_score(mut self, min_score: f64) -> Self { + self.min_score = Some(min_score); + self + } + + /// Returns the search scope. + pub fn scope(&self) -> &SearchScope { + &self.scope + } + + /// Returns the embedding provider. + pub fn provider(&self) -> &EmbeddingProvider { + &self.provider + } +} + +/// A document that can be stored in the vector store. +/// +/// Contains the text content and metadata for a document chunk. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChunkDocument { + /// The text content of the chunk. + pub text: String, + /// The file ID this chunk belongs to. + pub file_id: Uuid, + /// The chunk index within the file. + pub chunk_index: u32, + /// Start byte offset in the source file. + pub start_offset: u32, + /// End byte offset in the source file. + pub end_offset: u32, + /// Optional page number. + #[serde(skip_serializing_if = "Option::is_none")] + pub page: Option, +} + +impl ChunkDocument { + /// Creates a new chunk document. + pub fn new( + text: impl Into, + file_id: Uuid, + chunk_index: u32, + start_offset: u32, + end_offset: u32, + ) -> Self { + Self { + text: text.into(), + file_id, + chunk_index, + start_offset, + end_offset, + page: None, + } + } + + /// Sets the page number. + pub fn with_page(mut self, page: u32) -> Self { + self.page = Some(page); + self + } +} + +impl rig::Embed for ChunkDocument { + fn embed(&self, embedder: &mut TextEmbedder) -> Result<(), rig::embeddings::EmbedError> { + embedder.embed(self.text.clone()); + Ok(()) + } +} + +/// Filter type for PostgreSQL vector store queries. +/// +/// Supports filtering by file ID and workspace scope. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum PgFilter { + /// Filter by exact file ID match. + FileId(Uuid), + /// Filter by workspace ID. + WorkspaceId(Uuid), + /// Combine filters with AND logic. + And(Box, Box), + /// Combine filters with OR logic. + Or(Box, Box), +} + +impl SearchFilter for PgFilter { + type Value = serde_json::Value; + + fn eq(key: impl AsRef, value: Self::Value) -> Self { + match key.as_ref() { + "file_id" => { + if let Some(id) = value.as_str().and_then(|s| Uuid::parse_str(s).ok()) { + Self::FileId(id) + } else { + // Fallback: treat as file ID filter with nil UUID + Self::FileId(Uuid::nil()) + } + } + "workspace_id" => { + if let Some(id) = value.as_str().and_then(|s| Uuid::parse_str(s).ok()) { + Self::WorkspaceId(id) + } else { + Self::WorkspaceId(Uuid::nil()) + } + } + _ => Self::FileId(Uuid::nil()), + } + } + + fn gt(_key: impl AsRef, _value: Self::Value) -> Self { + // Greater-than not meaningful for our use case + Self::FileId(Uuid::nil()) + } + + fn lt(_key: impl AsRef, _value: Self::Value) -> Self { + // Less-than not meaningful for our use case + Self::FileId(Uuid::nil()) + } + + fn and(self, rhs: Self) -> Self { + Self::And(Box::new(self), Box::new(rhs)) + } + + fn or(self, rhs: Self) -> Self { + Self::Or(Box::new(self), Box::new(rhs)) + } +} + +impl InsertDocuments for PgVectorStore { + async fn insert_documents( + &self, + documents: Vec<(Doc, OneOrMany)>, + ) -> Result<(), VectorStoreError> { + if documents.is_empty() { + return Ok(()); + } + + let model_name = self.provider.model_name(); + + let new_chunks: Vec = documents + .into_iter() + .filter_map(|(doc, embeddings)| { + // Serialize the document to extract fields + let json = serde_json::to_value(&doc).ok()?; + + let text = json.get("text")?.as_str()?; + let file_id = json + .get("file_id") + .and_then(|v| v.as_str()) + .and_then(|s| Uuid::parse_str(s).ok())?; + let chunk_index = json.get("chunk_index").and_then(|v| v.as_u64())? as i32; + let start_offset = json.get("start_offset").and_then(|v| v.as_u64())? as u32; + let end_offset = json.get("end_offset").and_then(|v| v.as_u64())? as u32; + let page = json.get("page").and_then(|v| v.as_u64()).map(|p| p as u32); + + // Get the first embedding + let embedding = embeddings.first(); + let embedding_vec: Vec = embedding.vec.iter().map(|&x| x as f32).collect(); + + let content_bytes = text.as_bytes(); + let content_sha256 = Sha256::digest(content_bytes).to_vec(); + let content_size = content_bytes.len() as i32; + + let metadata = serde_json::json!({ + "index": chunk_index, + "start_offset": start_offset, + "end_offset": end_offset, + "page": page, + }); + + Some(NewFileChunk { + file_id, + chunk_index: Some(chunk_index), + content_sha256, + content_size: Some(content_size), + token_count: None, + embedding: Vector::from(embedding_vec), + embedding_model: model_name.to_owned(), + metadata: Some(metadata), + }) + }) + .collect(); + + if new_chunks.is_empty() { + return Ok(()); + } + + let mut conn = self.db.get_connection().await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "failed to get connection: {e}" + )))) + })?; + + conn.create_file_chunks(new_chunks).await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "failed to create chunks: {e}" + )))) + })?; + + Ok(()) + } +} + +impl VectorStoreIndex for PgVectorStore { + type Filter = PgFilter; + + async fn top_n Deserialize<'a> + Send>( + &self, + req: VectorSearchRequest, + ) -> Result, VectorStoreError> { + let query = req.query(); + let limit = req.samples() as i64; + let min_score = req.threshold().or(self.min_score).unwrap_or(0.0); + + // Embed the query + let embedding = self.provider.embed_text(query).await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "embedding failed: {e}" + )))) + })?; + + let query_vector: Vector = embedding + .vec + .iter() + .map(|&x| x as f32) + .collect::>() + .into(); + + let mut conn = self.db.get_connection().await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "failed to get connection: {e}" + )))) + })?; + + // Use the scope to determine which search method to use + let scored_chunks = match &self.scope { + SearchScope::Files(file_ids) => { + conn.search_scored_chunks_in_files(query_vector, file_ids, min_score, limit) + .await + } + SearchScope::Workspace(workspace_id) => { + conn.search_scored_chunks_in_workspace( + query_vector, + *workspace_id, + min_score, + limit, + ) + .await + } + } + .map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "vector search failed: {e}" + )))) + })?; + + // Convert to rig format + let results: Vec<(f64, String, T)> = scored_chunks + .into_iter() + .filter_map(|scored| { + let chunk = scored.chunk; + let id = chunk.id.to_string(); + + // Build a document representation from metadata + let doc_json = serde_json::json!({ + "file_id": chunk.file_id.to_string(), + "chunk_index": chunk.chunk_index, + "metadata": chunk.metadata, + }); + + let doc: T = serde_json::from_value(doc_json).ok()?; + Some((scored.score, id, doc)) + }) + .collect(); + + Ok(results) + } + + async fn top_n_ids( + &self, + req: VectorSearchRequest, + ) -> Result, VectorStoreError> { + let query = req.query(); + let limit = req.samples() as i64; + let min_score = req.threshold().or(self.min_score).unwrap_or(0.0); + + // Embed the query + let embedding = self.provider.embed_text(query).await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "embedding failed: {e}" + )))) + })?; + + let query_vector: Vector = embedding + .vec + .iter() + .map(|&x| x as f32) + .collect::>() + .into(); + + let mut conn = self.db.get_connection().await.map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "failed to get connection: {e}" + )))) + })?; + + let scored_chunks = match &self.scope { + SearchScope::Files(file_ids) => { + conn.search_scored_chunks_in_files(query_vector, file_ids, min_score, limit) + .await + } + SearchScope::Workspace(workspace_id) => { + conn.search_scored_chunks_in_workspace( + query_vector, + *workspace_id, + min_score, + limit, + ) + .await + } + } + .map_err(|e| { + VectorStoreError::DatastoreError(Box::new(std::io::Error::other(format!( + "vector search failed: {e}" + )))) + })?; + + let results: Vec<(f64, String)> = scored_chunks + .into_iter() + .map(|scored| (scored.score, scored.chunk.id.to_string())) + .collect(); + + Ok(results) + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/chunk.rs b/crates/nvisy-runtime/src/graph/transform/chunk.rs index 88d5d74..41484e1 100644 --- a/crates/nvisy-runtime/src/graph/transform/chunk.rs +++ b/crates/nvisy-runtime/src/graph/transform/chunk.rs @@ -7,57 +7,46 @@ use serde::{Deserialize, Serialize}; pub struct ChunkConfig { /// Chunking strategy. #[serde(flatten)] - pub strategy: ChunkStrategy, + pub chunk_strategy: ChunkStrategy, + + /// Whether to use LLM-powered contextual chunking. + /// + /// When enabled, each chunk will include additional context + /// generated by an LLM to improve retrieval quality. + #[serde(default)] + pub contextual_chunking: bool, } /// Chunking strategy. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "strategy", rename_all = "snake_case")] +#[serde(tag = "chunk_strategy", rename_all = "snake_case")] pub enum ChunkStrategy { /// Chunk by character count. Character { /// Maximum chunk size in characters. max_characters: u32, /// Overlap between chunks in characters. - #[serde(default)] - overlap: u32, + overlap_characters: u32, }, /// Chunk by page boundaries. Page { - /// Maximum pages per chunk. - #[serde(default = "default_max_pages")] - max_pages: u32, /// Overlap between chunks in pages. - #[serde(default)] - overlap: u32, + overlap_pages: u32, }, /// Chunk by document sections/headings. Section { - /// Maximum sections per chunk. - #[serde(default = "default_max_sections")] - max_sections: u32, - /// Overlap between chunks in sections. - #[serde(default)] - overlap: u32, + /// Minimum chunk size in characters. + min_characters: u32, + /// Maximum chunk size in characters. + max_characters: u32, + /// Overlap between chunks in characters. + overlap_characters: u32, }, /// Chunk by semantic similarity. Similarity { /// Maximum chunk size in characters. max_characters: u32, /// Similarity score threshold (0.0 to 1.0). - #[serde(default = "default_score")] score: f32, }, } - -fn default_max_pages() -> u32 { - 1 -} - -fn default_max_sections() -> u32 { - 1 -} - -fn default_score() -> f32 { - 0.5 -} diff --git a/crates/nvisy-runtime/src/graph/transform/derive.rs b/crates/nvisy-runtime/src/graph/transform/derive.rs new file mode 100644 index 0000000..14e8dae --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/derive.rs @@ -0,0 +1,29 @@ +//! Derive transformer configuration - generate new content from input. + +use nvisy_rig::provider::CompletionModel; +use serde::{Deserialize, Serialize}; + +/// Configuration for generating new content from input. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct DeriveConfig { + /// Completion model provider configuration. + #[serde(flatten)] + pub provider: CompletionModel, + + /// The derivation task to perform. + pub task: DeriveTask, + + /// Optional prompt override for the task. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub override_prompt: Option, +} + +/// Tasks for generating new content from input. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DeriveTask { + /// Generate a condensed summary of the content. + Summarization, + /// Generate a title or heading for the content. + GenerateTitle, +} diff --git a/crates/nvisy-runtime/src/graph/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs index cbb841a..1d5eea3 100644 --- a/crates/nvisy-runtime/src/graph/transform/embedding.rs +++ b/crates/nvisy-runtime/src/graph/transform/embedding.rs @@ -9,4 +9,8 @@ pub struct EmbeddingConfig { /// Model to use for embedding generation. #[serde(flatten)] pub model: EmbeddingModel, + + /// Whether to L2-normalize the output embeddings. + #[serde(default)] + pub normalize: bool, } diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs index 5e8272a..d8b22c9 100644 --- a/crates/nvisy-runtime/src/graph/transform/enrich.rs +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -1,14 +1,54 @@ -//! Enrich transformer configuration. +//! Enrich transformer configuration - add metadata/descriptions to elements. use nvisy_rig::provider::CompletionModel; use serde::{Deserialize, Serialize}; -/// Configuration for enriching data. +/// Configuration for enriching elements with metadata/descriptions. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct EnrichConfig { - /// Model to use for enrichment. + /// Completion model provider configuration. #[serde(flatten)] - pub model: CompletionModel, - /// Prompt template for enrichment. - pub prompt: String, + pub provider: CompletionModel, + + /// The enrichment task to perform. + #[serde(flatten)] + pub task: EnrichTask, + + /// Optional prompt override for the task. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub override_prompt: Option, +} + +/// Tasks for adding metadata/descriptions to elements. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "input_type", content = "task", rename_all = "snake_case")] +pub enum EnrichTask { + /// Enrich table elements. + Table(TableEnrichTask), + /// Enrich image elements. + Image(ImageEnrichTask), +} + +/// Tasks for table enrichment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TableEnrichTask { + /// Generate a natural language description of the table. + Description, + /// Generate descriptions for each column. + ColumnDescriptions, +} + +/// Tasks for image enrichment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ImageEnrichTask { + /// Generate a natural language description of the image. + Description, + /// Generate a detailed description (people, objects, text, colors, layout). + DetailedDescription, + /// Extract text from image using generative OCR. + GenerativeOcr, + /// Detect and list objects/entities in the image. + ObjectDetection, } diff --git a/crates/nvisy-runtime/src/graph/transform/extract.rs b/crates/nvisy-runtime/src/graph/transform/extract.rs new file mode 100644 index 0000000..7fe1ddd --- /dev/null +++ b/crates/nvisy-runtime/src/graph/transform/extract.rs @@ -0,0 +1,90 @@ +//! Extract transformer configuration - extract structured data or convert formats. + +use nvisy_rig::provider::CompletionModel; +use serde::{Deserialize, Serialize}; + +/// Configuration for extracting structured data or converting formats. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ExtractConfig { + /// Completion model provider configuration. + #[serde(flatten)] + pub provider: CompletionModel, + + /// The extraction task to perform. + #[serde(flatten)] + pub task: ExtractTask, + + /// Optional prompt override for the task. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub override_prompt: Option, +} + +/// Tasks for extracting structured data or converting formats. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "task_type", content = "task", rename_all = "snake_case")] +pub enum ExtractTask { + /// Convert elements to different formats. + Convert(ConvertTask), + /// Analyze text to extract structured information. + Analyze(AnalyzeTask), +} + +/// Tasks for format conversion. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde( + tag = "input_type", + content = "convert_task", + rename_all = "snake_case" +)] +pub enum ConvertTask { + /// Convert table elements. + Table(TableConvertTask), + /// Convert text elements. + Text(TextConvertTask), +} + +/// Tasks for table conversion. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TableConvertTask { + /// Convert table to HTML format. + ToHtml, + /// Convert table to Markdown format. + ToMarkdown, + /// Convert table to CSV format. + ToCsv, + /// Convert table to structured JSON. + ToJson, +} + +/// Tasks for text conversion. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TextConvertTask { + /// Convert text to JSON format. + ToJson, + /// Convert text to structured JSON based on a schema. + ToStructuredJson { + /// JSON schema for the output structure. + schema: String, + }, +} + +/// Tasks for analyzing text to extract structured information. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum AnalyzeTask { + /// Extract named entities (people, places, organizations, dates, etc.). + NamedEntityRecognition, + /// Extract key terms and phrases. + KeywordExtraction, + /// Classify text into provided categories. + Classification { + /// Labels/categories for classification. + labels: Vec, + }, + /// Analyze sentiment (positive, negative, neutral). + SentimentAnalysis, + /// Extract relationships between entities. + RelationshipExtraction, +} diff --git a/crates/nvisy-runtime/src/graph/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs index fd40fa9..18b8c8d 100644 --- a/crates/nvisy-runtime/src/graph/transform/mod.rs +++ b/crates/nvisy-runtime/src/graph/transform/mod.rs @@ -1,27 +1,36 @@ //! Transformer node types for processing and transforming data. mod chunk; +mod derive; mod embedding; mod enrich; +mod extract; mod partition; pub use chunk::{ChunkConfig, ChunkStrategy}; +pub use derive::{DeriveConfig, DeriveTask}; pub use embedding::EmbeddingConfig; -pub use enrich::EnrichConfig; -pub use partition::PartitionConfig; - +pub use enrich::{EnrichConfig, EnrichTask, ImageEnrichTask, TableEnrichTask}; +pub use extract::{ + AnalyzeTask, ConvertTask, ExtractConfig, ExtractTask, TableConvertTask, TextConvertTask, +}; +pub use partition::{PartitionConfig, PartitionStrategy}; use serde::{Deserialize, Serialize}; /// Transformer node configuration. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum TransformerConfig { - /// Partition data into multiple outputs. + /// Partition documents into elements. Partition(PartitionConfig), /// Chunk content into smaller pieces. Chunk(ChunkConfig), - /// Enrich data with additional information. - Enrich(EnrichConfig), /// Generate vector embeddings. Embedding(EmbeddingConfig), + /// Enrich elements with metadata/descriptions. + Enrich(EnrichConfig), + /// Extract structured data or convert formats. + Extract(ExtractConfig), + /// Generate new content from input. + Derive(DeriveConfig), } diff --git a/crates/nvisy-runtime/src/graph/transform/partition.rs b/crates/nvisy-runtime/src/graph/transform/partition.rs index 619c457..02ed0e9 100644 --- a/crates/nvisy-runtime/src/graph/transform/partition.rs +++ b/crates/nvisy-runtime/src/graph/transform/partition.rs @@ -2,9 +2,32 @@ use serde::{Deserialize, Serialize}; -/// Configuration for partitioning data. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +/// Configuration for partitioning documents into elements. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct PartitionConfig { - /// Field to partition by. - pub field: String, + /// Partitioning strategy. + pub strategy: PartitionStrategy, + + /// Whether to include page break markers in output. + #[serde(default)] + pub include_page_breaks: bool, + + /// Whether to discard unsupported element types. + #[serde(default)] + pub discard_unsupported: bool, +} + +/// Partitioning strategy for document element extraction. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum PartitionStrategy { + /// Automatically detect the best partitioning approach. + #[default] + Auto, + /// Fast rule-based partitioning without ML. + Fast, + /// Slower ML-based partitioning with layout detection. + Slow, + /// Vision-Language Model based partitioning. + Vlm, } diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs index 36fae48..eaa2ac2 100644 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -18,18 +18,17 @@ mod outputs; mod registry; pub mod runtime; -use derive_more::From; -use serde::{Deserialize, Serialize}; - pub use backend::{ AzblobCredentials, AzblobParams, GcsCredentials, GcsParams, MilvusCredentials, MilvusParams, MysqlCredentials, MysqlParams, PgVectorCredentials, PgVectorParams, PineconeCredentials, PineconeParams, PostgresCredentials, PostgresParams, QdrantCredentials, QdrantParams, S3Credentials, S3Params, }; +use derive_more::From; pub use inputs::{InputProvider, InputProviderConfig, InputProviderParams}; pub use outputs::{OutputProvider, OutputProviderConfig, OutputProviderParams}; pub use registry::CredentialsRegistry; +use serde::{Deserialize, Serialize}; /// Provider credentials (sensitive). #[derive(Debug, Clone, From, Serialize, Deserialize)] From b101728502f0908d0898b34e54c30cb496d89b84 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Thu, 22 Jan 2026 11:14:56 +0100 Subject: [PATCH 15/28] feat(runtime): add AI providers, IntoProvider trait, and refactor graph module - Add AI provider backends (OpenAI, Anthropic, Cohere, Gemini, Perplexity) - Add IntoProvider trait for unified params + credentials -> provider pattern - Add AI credentials to ProviderCredentials enum - Update transform configs to use *ProviderParams with credentials_id - Add validate() credentials check against CredentialsRegistry - Implement IntoProvider for InputProviderParams and OutputProviderParams - Split workflow.rs into workflow/ module (definition, edge, metadata, node) - Move WorkflowGraph to graph/graph.rs, add WorkflowDefinition for serialization - Add agent tools module with 8 specialized tools - Add agent memory module --- crates/nvisy-rig/src/agent/memory/mod.rs | 1 + crates/nvisy-rig/src/agent/mod.rs | 5 +- .../nvisy-rig/src/agent/structured_output.rs | 36 +- crates/nvisy-rig/src/agent/text_analysis.rs | 98 +++++- .../src/agent/tools/context_store.rs | 214 +++++++++++ .../src/agent/tools/document_fetch.rs | 118 +++++++ .../src/agent/tools/image_analysis.rs | 176 +++++++++ .../nvisy-rig/src/agent/tools/json_schema.rs | 333 ++++++++++++++++++ .../src/agent/tools/metadata_query.rs | 178 ++++++++++ crates/nvisy-rig/src/agent/tools/mod.rs | 30 ++ .../nvisy-rig/src/agent/tools/scratchpad.rs | 307 ++++++++++++++++ .../src/agent/tools/vector_search.rs | 123 +++++++ crates/nvisy-rig/src/agent/tools/web_fetch.rs | 270 ++++++++++++++ crates/nvisy-rig/src/error.rs | 9 + crates/nvisy-runtime/src/engine/executor.rs | 21 +- .../src/graph/{workflow.rs => graph.rs} | 180 ++++------ crates/nvisy-runtime/src/graph/mod.rs | 12 +- .../src/graph/transform/derive.rs | 9 +- .../src/graph/transform/embedding.rs | 9 +- .../src/graph/transform/enrich.rs | 9 +- .../src/graph/transform/extract.rs | 9 +- .../src/graph/workflow/definition.rs | 80 +++++ .../src/graph/{ => workflow}/edge.rs | 0 .../src/graph/workflow/metadata.rs | 59 ++++ .../nvisy-runtime/src/graph/workflow/mod.rs | 18 + .../src/graph/{ => workflow}/node.rs | 6 +- crates/nvisy-runtime/src/provider/ai.rs | 163 +++++++++ .../src/provider/backend/anthropic.rs | 48 +++ .../src/provider/backend/azblob.rs | 13 +- .../src/provider/backend/cohere.rs | 83 +++++ .../nvisy-runtime/src/provider/backend/gcs.rs | 13 +- .../src/provider/backend/gemini.rs | 83 +++++ .../src/provider/backend/milvus.rs | 13 +- .../nvisy-runtime/src/provider/backend/mod.rs | 56 ++- .../src/provider/backend/mysql.rs | 13 +- .../src/provider/backend/openai.rs | 83 +++++ .../src/provider/backend/perplexity.rs | 48 +++ .../src/provider/backend/pgvector.rs | 13 +- .../src/provider/backend/pinecone.rs | 13 +- .../src/provider/backend/postgres.rs | 13 +- .../src/provider/backend/qdrant.rs | 13 +- .../nvisy-runtime/src/provider/backend/s3.rs | 13 +- crates/nvisy-runtime/src/provider/inputs.rs | 27 +- crates/nvisy-runtime/src/provider/mod.rs | 53 ++- crates/nvisy-runtime/src/provider/outputs.rs | 35 +- .../src/handler/request/pipelines.rs | 10 +- .../src/handler/response/pipelines.rs | 6 +- 47 files changed, 2886 insertions(+), 236 deletions(-) create mode 100644 crates/nvisy-rig/src/agent/memory/mod.rs create mode 100644 crates/nvisy-rig/src/agent/tools/context_store.rs create mode 100644 crates/nvisy-rig/src/agent/tools/document_fetch.rs create mode 100644 crates/nvisy-rig/src/agent/tools/image_analysis.rs create mode 100644 crates/nvisy-rig/src/agent/tools/json_schema.rs create mode 100644 crates/nvisy-rig/src/agent/tools/metadata_query.rs create mode 100644 crates/nvisy-rig/src/agent/tools/mod.rs create mode 100644 crates/nvisy-rig/src/agent/tools/scratchpad.rs create mode 100644 crates/nvisy-rig/src/agent/tools/vector_search.rs create mode 100644 crates/nvisy-rig/src/agent/tools/web_fetch.rs rename crates/nvisy-runtime/src/graph/{workflow.rs => graph.rs} (77%) create mode 100644 crates/nvisy-runtime/src/graph/workflow/definition.rs rename crates/nvisy-runtime/src/graph/{ => workflow}/edge.rs (100%) create mode 100644 crates/nvisy-runtime/src/graph/workflow/metadata.rs create mode 100644 crates/nvisy-runtime/src/graph/workflow/mod.rs rename crates/nvisy-runtime/src/graph/{ => workflow}/node.rs (96%) create mode 100644 crates/nvisy-runtime/src/provider/ai.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/anthropic.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/cohere.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/gemini.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/openai.rs create mode 100644 crates/nvisy-runtime/src/provider/backend/perplexity.rs diff --git a/crates/nvisy-rig/src/agent/memory/mod.rs b/crates/nvisy-rig/src/agent/memory/mod.rs new file mode 100644 index 0000000..1650e89 --- /dev/null +++ b/crates/nvisy-rig/src/agent/memory/mod.rs @@ -0,0 +1 @@ +//! Memory module for agent conversation history and context management. diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs index eecfc59..74703c1 100644 --- a/crates/nvisy-rig/src/agent/mod.rs +++ b/crates/nvisy-rig/src/agent/mod.rs @@ -10,6 +10,9 @@ //! //! Use [`Agents`] to create all agents from a single provider. +pub mod memory; +pub mod tools; + mod structured_output; mod table; mod text_analysis; @@ -18,7 +21,7 @@ mod vision; pub use structured_output::StructuredOutputAgent; pub use table::TableAgent; -pub use text_analysis::TextAnalysisAgent; +pub use text_analysis::{Classification, Entity, Relationship, Sentiment, TextAnalysisAgent}; pub use text_generation::TextGenerationAgent; pub use vision::VisionAgent; diff --git a/crates/nvisy-rig/src/agent/structured_output.rs b/crates/nvisy-rig/src/agent/structured_output.rs index 97766e8..0d0a5b6 100644 --- a/crates/nvisy-rig/src/agent/structured_output.rs +++ b/crates/nvisy-rig/src/agent/structured_output.rs @@ -2,9 +2,10 @@ use rig::agent::{Agent, AgentBuilder}; use rig::completion::Prompt; +use serde_json::Value; -use crate::Result; use crate::provider::CompletionProvider; +use crate::{Error, Result}; const NAME: &str = "StructuredOutputAgent"; const DESCRIPTION: &str = @@ -54,18 +55,43 @@ impl StructuredOutputAgent { /// /// Attempts to extract structured information from free-form text /// and represent it as JSON. - pub async fn to_json(&self, text: &str) -> Result { + pub async fn to_json(&self, text: &str) -> Result { let prompt = format!("{}\n\nText:\n{}", PROMPT_TO_JSON, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + parse_json(&response) } /// Converts text to JSON matching a specific schema. /// /// Extracts information from text and structures it according to /// the provided JSON schema. - pub async fn to_structured_json(&self, text: &str, schema: &str) -> Result { + pub async fn to_structured_json(&self, text: &str, schema: &str) -> Result { let base_prompt = PROMPT_TO_STRUCTURED_JSON.replace("{}", schema); let prompt = format!("{}\n\nText:\n{}", base_prompt, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + parse_json(&response) } } + +/// Parses JSON from LLM response, handling markdown code blocks. +fn parse_json(response: &str) -> Result { + // Try to extract JSON from markdown code block if present + let json_str = if response.contains("```json") { + response + .split("```json") + .nth(1) + .and_then(|s| s.split("```").next()) + .map(|s| s.trim()) + .unwrap_or(response.trim()) + } else if response.contains("```") { + response + .split("```") + .nth(1) + .map(|s| s.trim()) + .unwrap_or(response.trim()) + } else { + response.trim() + }; + + serde_json::from_str(json_str).map_err(|e| Error::parse(format!("invalid JSON: {e}"))) +} diff --git a/crates/nvisy-rig/src/agent/text_analysis.rs b/crates/nvisy-rig/src/agent/text_analysis.rs index a3ab7eb..f798968 100644 --- a/crates/nvisy-rig/src/agent/text_analysis.rs +++ b/crates/nvisy-rig/src/agent/text_analysis.rs @@ -1,10 +1,58 @@ //! Text analysis agent for extracting structured information. +use std::collections::HashMap; + use rig::agent::{Agent, AgentBuilder}; use rig::completion::Prompt; +use serde::{Deserialize, Serialize}; -use crate::Result; use crate::provider::CompletionProvider; +use crate::{Error, Result}; + +/// A named entity extracted from text. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Entity { + /// The text of the entity. + pub text: String, + /// The type of entity (e.g., "person", "organization", "location"). + #[serde(rename = "type")] + pub entity_type: String, + /// The starting character index in the source text. + #[serde(default)] + pub start_index: Option, +} + +/// Classification result with labels and confidence scores. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Classification { + /// The matched category labels. + pub labels: Vec, + /// Confidence scores for each label (0.0 to 1.0). + pub confidence: HashMap, +} + +/// Sentiment analysis result. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Sentiment { + /// The overall sentiment: "positive", "negative", "neutral", or "mixed". + pub sentiment: String, + /// Confidence score (0.0 to 1.0). + pub confidence: f64, + /// Brief explanation of the sentiment. + #[serde(default)] + pub explanation: Option, +} + +/// A relationship between two entities. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Relationship { + /// The first entity in the relationship. + pub subject: String, + /// The type of relationship. + pub predicate: String, + /// The second entity in the relationship. + pub object: String, +} const NAME: &str = "TextAnalysisAgent"; const DESCRIPTION: &str = "Agent for text analysis including entity extraction, keyword extraction, classification, and sentiment analysis"; @@ -74,34 +122,62 @@ impl TextAnalysisAgent { } /// Extracts named entities from text. - pub async fn extract_entities(&self, text: &str) -> Result { + pub async fn extract_entities(&self, text: &str) -> Result> { let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_ENTITIES, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + parse_json(&response) } /// Extracts keywords from text. - pub async fn extract_keywords(&self, text: &str) -> Result { + pub async fn extract_keywords(&self, text: &str) -> Result> { let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_KEYWORDS, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + parse_json(&response) } /// Classifies text into provided categories. - pub async fn classify(&self, text: &str, labels: &[String]) -> Result { + pub async fn classify(&self, text: &str, labels: &[String]) -> Result { let labels_str = labels.join(", "); let base_prompt = PROMPT_CLASSIFY.replace("{}", &labels_str); let prompt = format!("{}\n\nText:\n{}", base_prompt, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + parse_json(&response) } /// Analyzes sentiment of text. - pub async fn analyze_sentiment(&self, text: &str) -> Result { + pub async fn analyze_sentiment(&self, text: &str) -> Result { let prompt = format!("{}\n\nText:\n{}", PROMPT_ANALYZE_SENTIMENT, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + parse_json(&response) } /// Extracts relationships between entities in text. - pub async fn extract_relationships(&self, text: &str) -> Result { + pub async fn extract_relationships(&self, text: &str) -> Result> { let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_RELATIONSHIPS, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + parse_json(&response) } } + +/// Parses JSON from LLM response, handling markdown code blocks. +fn parse_json(response: &str) -> Result { + // Try to extract JSON from markdown code block if present + let json_str = if response.contains("```json") { + response + .split("```json") + .nth(1) + .and_then(|s| s.split("```").next()) + .map(|s| s.trim()) + .unwrap_or(response.trim()) + } else if response.contains("```") { + response + .split("```") + .nth(1) + .map(|s| s.trim()) + .unwrap_or(response.trim()) + } else { + response.trim() + }; + + serde_json::from_str(json_str).map_err(|e| Error::parse(format!("invalid JSON: {e}"))) +} diff --git a/crates/nvisy-rig/src/agent/tools/context_store.rs b/crates/nvisy-rig/src/agent/tools/context_store.rs new file mode 100644 index 0000000..2d516b0 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/context_store.rs @@ -0,0 +1,214 @@ +//! Context store tool for persistent agent memory. + +use std::sync::Arc; + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; + +/// Trait for context store implementations. +#[async_trait] +pub trait ContextStore: Send + Sync { + /// Store a value with a key. + async fn set(&self, key: &str, value: serde_json::Value) -> Result<(), ContextStoreError>; + + /// Retrieve a value by key. + async fn get(&self, key: &str) -> Result, ContextStoreError>; + + /// Delete a value by key. + async fn delete(&self, key: &str) -> Result; + + /// List all keys with optional prefix filter. + async fn list(&self, prefix: Option<&str>) -> Result, ContextStoreError>; +} + +/// Error type for context store operations. +#[derive(Debug, thiserror::Error)] +pub enum ContextStoreError { + #[error("store failed: {0}")] + Store(String), + #[error("retrieve failed: {0}")] + Retrieve(String), + #[error("serialization error: {0}")] + Serialization(String), +} + +/// The operation to perform on the context store. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ContextOperation { + /// Store a value. + Set { + key: String, + value: serde_json::Value, + }, + /// Retrieve a value. + Get { key: String }, + /// Delete a value. + Delete { key: String }, + /// List all keys. + List { prefix: Option }, +} + +/// Arguments for context store operations. +#[derive(Debug, Deserialize)] +pub struct ContextStoreArgs { + /// The operation to perform. + pub operation: ContextOperation, +} + +/// Result of a context store operation. +#[derive(Debug, Serialize)] +pub struct ContextStoreResult { + /// Whether the operation succeeded. + pub success: bool, + /// The result value (for get operations). + #[serde(skip_serializing_if = "Option::is_none")] + pub value: Option, + /// List of keys (for list operations). + #[serde(skip_serializing_if = "Option::is_none")] + pub keys: Option>, + /// Optional message. + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, +} + +/// Tool for storing and retrieving context. +pub struct ContextStoreTool { + store: Arc, +} + +impl ContextStoreTool { + /// Creates a new context store tool. + pub fn new(store: S) -> Self { + Self { + store: Arc::new(store), + } + } + + /// Creates a new context store tool from an Arc. + pub fn from_arc(store: Arc) -> Self { + Self { store } + } +} + +impl Tool for ContextStoreTool { + const NAME: &'static str = "context_store"; + + type Error = ContextStoreError; + type Args = ContextStoreArgs; + type Output = ContextStoreResult; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Store and retrieve persistent context values. Use this to remember information across conversation turns or save intermediate results.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "operation": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "set": { + "type": "object", + "properties": { + "key": { "type": "string" }, + "value": {} + }, + "required": ["key", "value"] + } + } + }, + { + "type": "object", + "properties": { + "get": { + "type": "object", + "properties": { + "key": { "type": "string" } + }, + "required": ["key"] + } + } + }, + { + "type": "object", + "properties": { + "delete": { + "type": "object", + "properties": { + "key": { "type": "string" } + }, + "required": ["key"] + } + } + }, + { + "type": "object", + "properties": { + "list": { + "type": "object", + "properties": { + "prefix": { "type": "string" } + } + } + } + } + ], + "description": "The operation to perform: set, get, delete, or list" + } + }, + "required": ["operation"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + match args.operation { + ContextOperation::Set { key, value } => { + self.store.set(&key, value).await?; + Ok(ContextStoreResult { + success: true, + value: None, + keys: None, + message: Some(format!("Stored value for key: {key}")), + }) + } + ContextOperation::Get { key } => { + let value = self.store.get(&key).await?; + Ok(ContextStoreResult { + success: value.is_some(), + value, + keys: None, + message: None, + }) + } + ContextOperation::Delete { key } => { + let deleted = self.store.delete(&key).await?; + Ok(ContextStoreResult { + success: deleted, + value: None, + keys: None, + message: if deleted { + Some(format!("Deleted key: {key}")) + } else { + Some(format!("Key not found: {key}")) + }, + }) + } + ContextOperation::List { prefix } => { + let keys = self.store.list(prefix.as_deref()).await?; + Ok(ContextStoreResult { + success: true, + value: None, + keys: Some(keys), + message: None, + }) + } + } + } +} diff --git a/crates/nvisy-rig/src/agent/tools/document_fetch.rs b/crates/nvisy-rig/src/agent/tools/document_fetch.rs new file mode 100644 index 0000000..7f2b73b --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/document_fetch.rs @@ -0,0 +1,118 @@ +//! Document fetch tool for retrieving documents by ID. + +use std::sync::Arc; + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; + +/// A fetched document. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Document { + /// The document ID. + pub id: String, + /// The document content. + pub content: String, + /// Document title if available. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub title: Option, + /// Document metadata. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metadata: Option, +} + +/// Trait for document fetch implementations. +#[async_trait] +pub trait DocumentFetcher: Send + Sync { + /// Fetch a document by ID. + async fn fetch(&self, id: &str) -> Result, DocumentFetchError>; + + /// Fetch multiple documents by IDs. + async fn fetch_many(&self, ids: &[String]) -> Result, DocumentFetchError>; +} + +/// Error type for document fetch operations. +#[derive(Debug, thiserror::Error)] +pub enum DocumentFetchError { + #[error("document not found: {0}")] + NotFound(String), + #[error("fetch failed: {0}")] + Fetch(String), + #[error("connection error: {0}")] + Connection(String), +} + +/// Arguments for document fetch. +#[derive(Debug, Deserialize)] +pub struct DocumentFetchArgs { + /// The document ID to fetch. + #[serde(default)] + pub id: Option, + /// Multiple document IDs to fetch. + #[serde(default)] + pub ids: Option>, +} + +/// Tool for fetching documents by ID. +pub struct DocumentFetchTool { + fetcher: Arc, +} + +impl DocumentFetchTool { + /// Creates a new document fetch tool. + pub fn new(fetcher: F) -> Self { + Self { + fetcher: Arc::new(fetcher), + } + } + + /// Creates a new document fetch tool from an Arc. + pub fn from_arc(fetcher: Arc) -> Self { + Self { fetcher } + } +} + +impl Tool for DocumentFetchTool { + const NAME: &'static str = "document_fetch"; + + type Error = DocumentFetchError; + type Args = DocumentFetchArgs; + type Output = Vec; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Fetch one or more documents by their IDs. Use this to retrieve the full content of documents you've found through search.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "A single document ID to fetch" + }, + "ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Multiple document IDs to fetch" + } + } + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + match (args.id, args.ids) { + (Some(id), _) => { + let doc = self + .fetcher + .fetch(&id) + .await? + .ok_or(DocumentFetchError::NotFound(id))?; + Ok(vec![doc]) + } + (None, Some(ids)) => self.fetcher.fetch_many(&ids).await, + (None, None) => Ok(vec![]), + } + } +} diff --git a/crates/nvisy-rig/src/agent/tools/image_analysis.rs b/crates/nvisy-rig/src/agent/tools/image_analysis.rs new file mode 100644 index 0000000..37be018 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/image_analysis.rs @@ -0,0 +1,176 @@ +//! Image analysis tool using VLM. + +use std::sync::Arc; + +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; + +use crate::agent::VisionAgent; + +/// Error type for image analysis operations. +#[derive(Debug, thiserror::Error)] +pub enum ImageAnalysisError { + #[error("analysis failed: {0}")] + Analysis(String), + #[error("invalid image: {0}")] + InvalidImage(String), + #[error("unsupported format: {0}")] + UnsupportedFormat(String), +} + +impl From for ImageAnalysisError { + fn from(e: crate::Error) -> Self { + Self::Analysis(e.to_string()) + } +} + +/// The type of analysis to perform. +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum AnalysisType { + /// Brief description (1-2 sentences). + Describe, + /// Detailed description. + DescribeDetailed, + /// Extract text (OCR). + ExtractText, + /// Detect objects. + DetectObjects, + /// Custom prompt. + Custom { prompt: String }, +} + +/// Arguments for image analysis. +#[derive(Debug, Deserialize)] +pub struct ImageAnalysisArgs { + /// The image data as base64 or URL. + pub image: String, + /// The type of analysis to perform. + #[serde(default = "default_analysis_type")] + pub analysis_type: AnalysisType, +} + +fn default_analysis_type() -> AnalysisType { + AnalysisType::Describe +} + +/// Result of image analysis. +#[derive(Debug, Serialize)] +pub struct ImageAnalysisResult { + /// The analysis result. + pub result: String, + /// The type of analysis performed. + pub analysis_type: AnalysisType, +} + +/// Tool for analyzing images using VLM. +pub struct ImageAnalysisTool { + agent: Arc, +} + +impl ImageAnalysisTool { + /// Creates a new image analysis tool. + pub fn new(agent: VisionAgent) -> Self { + Self { + agent: Arc::new(agent), + } + } + + /// Creates a new image analysis tool from an Arc. + pub fn from_arc(agent: Arc) -> Self { + Self { agent } + } +} + +impl Tool for ImageAnalysisTool { + const NAME: &'static str = "image_analysis"; + + type Error = ImageAnalysisError; + type Args = ImageAnalysisArgs; + type Output = ImageAnalysisResult; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Analyze an image using vision-language model. Can describe images, extract text (OCR), detect objects, or answer custom questions about the image.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "image": { + "type": "string", + "description": "The image as base64-encoded data or a URL" + }, + "analysis_type": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "describe": { "type": "object" } + }, + "description": "Brief description (1-2 sentences)" + }, + { + "type": "object", + "properties": { + "describe_detailed": { "type": "object" } + }, + "description": "Detailed description" + }, + { + "type": "object", + "properties": { + "extract_text": { "type": "object" } + }, + "description": "Extract text from the image (OCR)" + }, + { + "type": "object", + "properties": { + "detect_objects": { "type": "object" } + }, + "description": "Detect and list objects in the image" + }, + { + "type": "object", + "properties": { + "custom": { + "type": "object", + "properties": { + "prompt": { "type": "string" } + }, + "required": ["prompt"] + } + }, + "description": "Custom analysis with your own prompt" + } + ], + "description": "The type of analysis to perform (default: describe)" + } + }, + "required": ["image"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let result = match &args.analysis_type { + AnalysisType::Describe => self.agent.describe(&args.image).await?, + AnalysisType::DescribeDetailed => self.agent.describe_detailed(&args.image).await?, + AnalysisType::ExtractText => self.agent.extract_text(&args.image).await?, + AnalysisType::DetectObjects => self.agent.detect_objects(&args.image).await?, + AnalysisType::Custom { prompt } => { + // For custom prompts, we use describe with a modified prompt + // In a real implementation, VisionAgent would have a custom method + let custom_prompt = format!("{}\n\n[Image: {}]", prompt, args.image); + self.agent.describe(&custom_prompt).await? + } + }; + + Ok(ImageAnalysisResult { + result, + analysis_type: args.analysis_type, + }) + } +} diff --git a/crates/nvisy-rig/src/agent/tools/json_schema.rs b/crates/nvisy-rig/src/agent/tools/json_schema.rs new file mode 100644 index 0000000..66198e5 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/json_schema.rs @@ -0,0 +1,333 @@ +//! JSON schema validation tool. + +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +/// Error type for JSON schema operations. +#[derive(Debug, thiserror::Error)] +pub enum JsonSchemaError { + #[error("invalid schema: {0}")] + InvalidSchema(String), + #[error("invalid JSON: {0}")] + InvalidJson(String), + #[error("validation failed: {errors:?}")] + ValidationFailed { errors: Vec }, +} + +/// Arguments for JSON schema validation. +#[derive(Debug, Deserialize)] +pub struct JsonSchemaArgs { + /// The JSON schema to validate against. + pub schema: Value, + /// The JSON data to validate. + pub data: Value, +} + +/// Result of JSON schema validation. +#[derive(Debug, Serialize)] +pub struct JsonSchemaResult { + /// Whether the data is valid. + pub valid: bool, + /// Validation errors if any. + #[serde(skip_serializing_if = "Vec::is_empty")] + pub errors: Vec, +} + +/// Tool for validating JSON against a schema. +pub struct JsonSchemaTool; + +impl JsonSchemaTool { + /// Creates a new JSON schema tool. + pub fn new() -> Self { + Self + } + + /// Validates JSON data against a schema. + /// + /// This is a simplified validator that checks: + /// - Type matching + /// - Required properties + /// - Basic constraints + fn validate(schema: &Value, data: &Value, path: &str) -> Vec { + let mut errors = Vec::new(); + + // Get the expected type + let expected_type = schema.get("type").and_then(|t| t.as_str()); + + match expected_type { + Some("object") => { + if !data.is_object() { + errors.push(format!("{path}: expected object, got {}", type_name(data))); + return errors; + } + + let obj = data.as_object().unwrap(); + + // Check required properties + if let Some(required) = schema.get("required").and_then(|r| r.as_array()) { + for req in required { + if let Some(field) = req.as_str() + && !obj.contains_key(field) + { + errors.push(format!("{path}: missing required property '{field}'")); + } + } + } + + // Validate properties + if let Some(properties) = schema.get("properties").and_then(|p| p.as_object()) { + for (key, prop_schema) in properties { + if let Some(value) = obj.get(key) { + let prop_path = if path.is_empty() { + key.clone() + } else { + format!("{path}.{key}") + }; + errors.extend(Self::validate(prop_schema, value, &prop_path)); + } + } + } + } + Some("array") => { + if !data.is_array() { + errors.push(format!("{path}: expected array, got {}", type_name(data))); + return errors; + } + + let arr = data.as_array().unwrap(); + + // Check min/max items + if let Some(min) = schema.get("minItems").and_then(|m| m.as_u64()) + && (arr.len() as u64) < min + { + errors.push(format!( + "{path}: array has {} items, minimum is {min}", + arr.len() + )); + } + if let Some(max) = schema.get("maxItems").and_then(|m| m.as_u64()) + && (arr.len() as u64) > max + { + errors.push(format!( + "{path}: array has {} items, maximum is {max}", + arr.len() + )); + } + + // Validate items + if let Some(items_schema) = schema.get("items") { + for (i, item) in arr.iter().enumerate() { + let item_path = format!("{path}[{i}]"); + errors.extend(Self::validate(items_schema, item, &item_path)); + } + } + } + Some("string") => { + if !data.is_string() { + errors.push(format!("{path}: expected string, got {}", type_name(data))); + return errors; + } + + let s = data.as_str().unwrap(); + + // Check min/max length + if let Some(min) = schema.get("minLength").and_then(|m| m.as_u64()) + && (s.len() as u64) < min + { + errors.push(format!( + "{path}: string length {} is less than minimum {min}", + s.len() + )); + } + if let Some(max) = schema.get("maxLength").and_then(|m| m.as_u64()) + && (s.len() as u64) > max + { + errors.push(format!( + "{path}: string length {} exceeds maximum {max}", + s.len() + )); + } + + // Check enum + if let Some(enum_values) = schema.get("enum").and_then(|e| e.as_array()) + && !enum_values.contains(data) + { + errors.push(format!("{path}: value not in enum")); + } + } + Some("number") | Some("integer") => { + let is_valid = if expected_type == Some("integer") { + data.is_i64() || data.is_u64() + } else { + data.is_number() + }; + + if !is_valid { + errors.push(format!( + "{path}: expected {}, got {}", + expected_type.unwrap(), + type_name(data) + )); + return errors; + } + + if let Some(num) = data.as_f64() { + if let Some(min) = schema.get("minimum").and_then(|m| m.as_f64()) + && num < min + { + errors.push(format!("{path}: {num} is less than minimum {min}")); + } + if let Some(max) = schema.get("maximum").and_then(|m| m.as_f64()) + && num > max + { + errors.push(format!("{path}: {num} exceeds maximum {max}")); + } + } + } + Some("boolean") => { + if !data.is_boolean() { + errors.push(format!("{path}: expected boolean, got {}", type_name(data))); + } + } + Some("null") => { + if !data.is_null() { + errors.push(format!("{path}: expected null, got {}", type_name(data))); + } + } + None => { + // No type specified, accept anything + } + Some(t) => { + errors.push(format!("{path}: unknown type '{t}'")); + } + } + + errors + } +} + +fn type_name(value: &Value) -> &'static str { + match value { + Value::Null => "null", + Value::Bool(_) => "boolean", + Value::Number(n) => { + if n.is_i64() || n.is_u64() { + "integer" + } else { + "number" + } + } + Value::String(_) => "string", + Value::Array(_) => "array", + Value::Object(_) => "object", + } +} + +impl Default for JsonSchemaTool { + fn default() -> Self { + Self::new() + } +} + +impl Tool for JsonSchemaTool { + const NAME: &'static str = "json_schema"; + + type Error = JsonSchemaError; + type Args = JsonSchemaArgs; + type Output = JsonSchemaResult; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Validate JSON data against a JSON Schema. Use this to verify that structured data conforms to expected format.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "schema": { + "type": "object", + "description": "The JSON Schema to validate against" + }, + "data": { + "description": "The JSON data to validate" + } + }, + "required": ["schema", "data"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let errors = Self::validate(&args.schema, &args.data, ""); + + Ok(JsonSchemaResult { + valid: errors.is_empty(), + errors, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[tokio::test] + async fn test_valid_object() { + let tool = JsonSchemaTool::new(); + let result = tool + .call(JsonSchemaArgs { + schema: json!({ + "type": "object", + "properties": { + "name": { "type": "string" }, + "age": { "type": "integer" } + }, + "required": ["name"] + }), + data: json!({ + "name": "Alice", + "age": 30 + }), + }) + .await + .unwrap(); + + assert!(result.valid); + assert!(result.errors.is_empty()); + } + + #[tokio::test] + async fn test_missing_required() { + let tool = JsonSchemaTool::new(); + let result = tool + .call(JsonSchemaArgs { + schema: json!({ + "type": "object", + "required": ["name"] + }), + data: json!({}), + }) + .await + .unwrap(); + + assert!(!result.valid); + assert!(result.errors[0].contains("missing required")); + } + + #[tokio::test] + async fn test_type_mismatch() { + let tool = JsonSchemaTool::new(); + let result = tool + .call(JsonSchemaArgs { + schema: json!({ "type": "string" }), + data: json!(42), + }) + .await + .unwrap(); + + assert!(!result.valid); + assert!(result.errors[0].contains("expected string")); + } +} diff --git a/crates/nvisy-rig/src/agent/tools/metadata_query.rs b/crates/nvisy-rig/src/agent/tools/metadata_query.rs new file mode 100644 index 0000000..50d85bf --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/metadata_query.rs @@ -0,0 +1,178 @@ +//! Metadata query tool for filtering documents by metadata. + +use std::sync::Arc; + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; + +/// A metadata filter condition. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MetadataFilter { + /// The field name to filter on. + pub field: String, + /// The operator to use. + pub operator: FilterOperator, + /// The value to compare against. + pub value: serde_json::Value, +} + +/// Filter operators for metadata queries. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FilterOperator { + /// Equals. + Eq, + /// Not equals. + Ne, + /// Greater than. + Gt, + /// Greater than or equal. + Gte, + /// Less than. + Lt, + /// Less than or equal. + Lte, + /// Contains (for arrays or strings). + Contains, + /// Starts with (for strings). + StartsWith, + /// Ends with (for strings). + EndsWith, + /// In (value is in array). + In, + /// Not in (value is not in array). + NotIn, + /// Exists (field exists). + Exists, +} + +/// Result from a metadata query. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QueryResult { + /// The document ID. + pub id: String, + /// The document content (may be truncated). + pub content: String, + /// The matched metadata fields. + pub metadata: serde_json::Value, +} + +/// Trait for metadata query implementations. +#[async_trait] +pub trait MetadataQuerier: Send + Sync { + /// Query documents by metadata filters. + async fn query( + &self, + filters: &[MetadataFilter], + limit: usize, + offset: usize, + ) -> Result, MetadataQueryError>; +} + +/// Error type for metadata query operations. +#[derive(Debug, thiserror::Error)] +pub enum MetadataQueryError { + #[error("invalid filter: {0}")] + InvalidFilter(String), + #[error("query failed: {0}")] + Query(String), + #[error("connection error: {0}")] + Connection(String), +} + +/// Arguments for metadata query. +#[derive(Debug, Deserialize)] +pub struct MetadataQueryArgs { + /// The filters to apply. + pub filters: Vec, + /// Maximum number of results to return. + #[serde(default = "default_limit")] + pub limit: usize, + /// Number of results to skip. + #[serde(default)] + pub offset: usize, +} + +fn default_limit() -> usize { + 10 +} + +/// Tool for querying documents by metadata. +pub struct MetadataQueryTool { + querier: Arc, +} + +impl MetadataQueryTool { + /// Creates a new metadata query tool. + pub fn new(querier: Q) -> Self { + Self { + querier: Arc::new(querier), + } + } + + /// Creates a new metadata query tool from an Arc. + pub fn from_arc(querier: Arc) -> Self { + Self { querier } + } +} + +impl Tool for MetadataQueryTool { + const NAME: &'static str = "metadata_query"; + + type Error = MetadataQueryError; + type Args = MetadataQueryArgs; + type Output = Vec; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Query documents by their metadata fields. Use this to filter documents by specific attributes like date, author, type, tags, etc.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "filters": { + "type": "array", + "items": { + "type": "object", + "properties": { + "field": { + "type": "string", + "description": "The metadata field name" + }, + "operator": { + "type": "string", + "enum": ["eq", "ne", "gt", "gte", "lt", "lte", "contains", "starts_with", "ends_with", "in", "not_in", "exists"], + "description": "The comparison operator" + }, + "value": { + "description": "The value to compare against" + } + }, + "required": ["field", "operator", "value"] + }, + "description": "The filter conditions to apply" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results (default: 10)", + "default": 10 + }, + "offset": { + "type": "integer", + "description": "Number of results to skip for pagination", + "default": 0 + } + }, + "required": ["filters"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + self.querier + .query(&args.filters, args.limit, args.offset) + .await + } +} diff --git a/crates/nvisy-rig/src/agent/tools/mod.rs b/crates/nvisy-rig/src/agent/tools/mod.rs new file mode 100644 index 0000000..bc1b1e2 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/mod.rs @@ -0,0 +1,30 @@ +//! Tools module for agent function calling capabilities. +//! +//! This module provides tools that agents can use during execution: +//! +//! - [`VectorSearchTool`] - Search vector store for similar chunks +//! - [`DocumentFetchTool`] - Fetch document/chunk by ID +//! - [`MetadataQueryTool`] - Query documents by metadata filters +//! - [`ContextStoreTool`] - Save/retrieve from agent memory +//! - [`ScratchpadTool`] - Temporary working storage +//! - [`WebFetchTool`] - Fetch content from URLs +//! - [`ImageAnalysisTool`] - Analyze images with VLM +//! - [`JsonSchemaTool`] - Validate JSON against schema + +mod context_store; +mod document_fetch; +mod image_analysis; +mod json_schema; +mod metadata_query; +mod scratchpad; +mod vector_search; +mod web_fetch; + +pub use context_store::{ContextStore, ContextStoreTool}; +pub use document_fetch::{DocumentFetchTool, DocumentFetcher}; +pub use image_analysis::ImageAnalysisTool; +pub use json_schema::JsonSchemaTool; +pub use metadata_query::{MetadataQuerier, MetadataQueryTool}; +pub use scratchpad::{Scratchpad, ScratchpadTool}; +pub use vector_search::{VectorSearchTool, VectorSearcher}; +pub use web_fetch::{FetchResponse, WebFetchTool, WebFetcher}; diff --git a/crates/nvisy-rig/src/agent/tools/scratchpad.rs b/crates/nvisy-rig/src/agent/tools/scratchpad.rs new file mode 100644 index 0000000..9afa781 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/scratchpad.rs @@ -0,0 +1,307 @@ +//! Scratchpad tool for temporary working storage. + +use std::collections::HashMap; +use std::sync::Arc; + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; +use tokio::sync::RwLock; + +/// Trait for scratchpad implementations. +#[async_trait] +pub trait Scratchpad: Send + Sync { + /// Write to the scratchpad. + async fn write(&self, content: &str) -> Result<(), ScratchpadError>; + + /// Append to the scratchpad. + async fn append(&self, content: &str) -> Result<(), ScratchpadError>; + + /// Read the scratchpad content. + async fn read(&self) -> Result; + + /// Clear the scratchpad. + async fn clear(&self) -> Result<(), ScratchpadError>; + + /// Get a named section from the scratchpad. + async fn get_section(&self, name: &str) -> Result, ScratchpadError>; + + /// Set a named section in the scratchpad. + async fn set_section(&self, name: &str, content: &str) -> Result<(), ScratchpadError>; +} + +/// In-memory scratchpad implementation. +pub struct InMemoryScratchpad { + content: RwLock, + sections: RwLock>, +} + +impl InMemoryScratchpad { + /// Creates a new empty scratchpad. + pub fn new() -> Self { + Self { + content: RwLock::new(String::new()), + sections: RwLock::new(HashMap::new()), + } + } +} + +impl Default for InMemoryScratchpad { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl Scratchpad for InMemoryScratchpad { + async fn write(&self, content: &str) -> Result<(), ScratchpadError> { + let mut guard = self.content.write().await; + *guard = content.to_string(); + Ok(()) + } + + async fn append(&self, content: &str) -> Result<(), ScratchpadError> { + let mut guard = self.content.write().await; + guard.push_str(content); + Ok(()) + } + + async fn read(&self) -> Result { + let guard = self.content.read().await; + Ok(guard.clone()) + } + + async fn clear(&self) -> Result<(), ScratchpadError> { + let mut guard = self.content.write().await; + guard.clear(); + let mut sections = self.sections.write().await; + sections.clear(); + Ok(()) + } + + async fn get_section(&self, name: &str) -> Result, ScratchpadError> { + let guard = self.sections.read().await; + Ok(guard.get(name).cloned()) + } + + async fn set_section(&self, name: &str, content: &str) -> Result<(), ScratchpadError> { + let mut guard = self.sections.write().await; + guard.insert(name.to_string(), content.to_string()); + Ok(()) + } +} + +/// Error type for scratchpad operations. +#[derive(Debug, thiserror::Error)] +pub enum ScratchpadError { + #[error("write failed: {0}")] + Write(String), + #[error("read failed: {0}")] + Read(String), +} + +/// The operation to perform on the scratchpad. +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ScratchpadOperation { + /// Write content (replaces existing). + Write { content: String }, + /// Append content. + Append { content: String }, + /// Read all content. + Read, + /// Clear all content. + Clear, + /// Get a named section. + GetSection { name: String }, + /// Set a named section. + SetSection { name: String, content: String }, +} + +/// Arguments for scratchpad operations. +#[derive(Debug, Deserialize)] +pub struct ScratchpadArgs { + /// The operation to perform. + pub operation: ScratchpadOperation, +} + +/// Result of a scratchpad operation. +#[derive(Debug, Serialize)] +pub struct ScratchpadResult { + /// Whether the operation succeeded. + pub success: bool, + /// The content (for read operations). + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option, + /// Optional message. + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, +} + +/// Tool for temporary working storage. +pub struct ScratchpadTool { + scratchpad: Arc, +} + +impl ScratchpadTool { + /// Creates a new scratchpad tool. + pub fn new(scratchpad: S) -> Self { + Self { + scratchpad: Arc::new(scratchpad), + } + } + + /// Creates a new scratchpad tool from an Arc. + pub fn from_arc(scratchpad: Arc) -> Self { + Self { scratchpad } + } +} + +impl ScratchpadTool { + /// Creates a new scratchpad tool with in-memory storage. + pub fn in_memory() -> Self { + Self::new(InMemoryScratchpad::new()) + } +} + +impl Tool for ScratchpadTool { + const NAME: &'static str = "scratchpad"; + + type Error = ScratchpadError; + type Args = ScratchpadArgs; + type Output = ScratchpadResult; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "A temporary workspace for drafting, editing, and organizing content. Use this to work on intermediate results before producing final output.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "operation": { + "type": "object", + "oneOf": [ + { + "type": "object", + "properties": { + "write": { + "type": "object", + "properties": { + "content": { "type": "string" } + }, + "required": ["content"] + } + } + }, + { + "type": "object", + "properties": { + "append": { + "type": "object", + "properties": { + "content": { "type": "string" } + }, + "required": ["content"] + } + } + }, + { + "type": "object", + "properties": { + "read": { "type": "object" } + } + }, + { + "type": "object", + "properties": { + "clear": { "type": "object" } + } + }, + { + "type": "object", + "properties": { + "get_section": { + "type": "object", + "properties": { + "name": { "type": "string" } + }, + "required": ["name"] + } + } + }, + { + "type": "object", + "properties": { + "set_section": { + "type": "object", + "properties": { + "name": { "type": "string" }, + "content": { "type": "string" } + }, + "required": ["name", "content"] + } + } + } + ], + "description": "The operation: write, append, read, clear, get_section, or set_section" + } + }, + "required": ["operation"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + match args.operation { + ScratchpadOperation::Write { content } => { + self.scratchpad.write(&content).await?; + Ok(ScratchpadResult { + success: true, + content: None, + message: Some("Content written to scratchpad".to_string()), + }) + } + ScratchpadOperation::Append { content } => { + self.scratchpad.append(&content).await?; + Ok(ScratchpadResult { + success: true, + content: None, + message: Some("Content appended to scratchpad".to_string()), + }) + } + ScratchpadOperation::Read => { + let content = self.scratchpad.read().await?; + Ok(ScratchpadResult { + success: true, + content: Some(content), + message: None, + }) + } + ScratchpadOperation::Clear => { + self.scratchpad.clear().await?; + Ok(ScratchpadResult { + success: true, + content: None, + message: Some("Scratchpad cleared".to_string()), + }) + } + ScratchpadOperation::GetSection { name } => { + let content = self.scratchpad.get_section(&name).await?; + Ok(ScratchpadResult { + success: content.is_some(), + content, + message: None, + }) + } + ScratchpadOperation::SetSection { name, content } => { + self.scratchpad.set_section(&name, &content).await?; + Ok(ScratchpadResult { + success: true, + content: None, + message: Some(format!("Section '{name}' updated")), + }) + } + } + } +} diff --git a/crates/nvisy-rig/src/agent/tools/vector_search.rs b/crates/nvisy-rig/src/agent/tools/vector_search.rs new file mode 100644 index 0000000..b165a5f --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/vector_search.rs @@ -0,0 +1,123 @@ +//! Vector search tool for semantic similarity search. + +use std::sync::Arc; + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; + +/// Result from a vector search query. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SearchResult { + /// The document/chunk ID. + pub id: String, + /// The text content. + pub content: String, + /// Similarity score (0.0 to 1.0). + pub score: f64, + /// Optional metadata. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metadata: Option, +} + +/// Trait for vector search implementations. +#[async_trait] +pub trait VectorSearcher: Send + Sync { + /// Search for similar documents. + async fn search( + &self, + query: &str, + limit: usize, + threshold: Option, + ) -> Result, VectorSearchError>; +} + +/// Error type for vector search operations. +#[derive(Debug, thiserror::Error)] +pub enum VectorSearchError { + #[error("embedding failed: {0}")] + Embedding(String), + #[error("search failed: {0}")] + Search(String), + #[error("connection error: {0}")] + Connection(String), +} + +/// Arguments for vector search. +#[derive(Debug, Deserialize)] +pub struct VectorSearchArgs { + /// The search query text. + pub query: String, + /// Maximum number of results to return. + #[serde(default = "default_limit")] + pub limit: usize, + /// Minimum similarity threshold (0.0 to 1.0). + #[serde(default)] + pub threshold: Option, +} + +fn default_limit() -> usize { + 5 +} + +/// Tool for searching vector stores. +pub struct VectorSearchTool { + searcher: Arc, +} + +impl VectorSearchTool { + /// Creates a new vector search tool. + pub fn new(searcher: S) -> Self { + Self { + searcher: Arc::new(searcher), + } + } + + /// Creates a new vector search tool from an Arc. + pub fn from_arc(searcher: Arc) -> Self { + Self { searcher } + } +} + +impl Tool for VectorSearchTool { + const NAME: &'static str = "vector_search"; + + type Error = VectorSearchError; + type Args = VectorSearchArgs; + type Output = Vec; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Search for semantically similar documents or chunks using vector embeddings. Returns the most relevant results based on meaning, not just keywords.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query text to find similar documents" + }, + "limit": { + "type": "integer", + "description": "Maximum number of results to return (default: 5)", + "default": 5 + }, + "threshold": { + "type": "number", + "description": "Minimum similarity score threshold (0.0 to 1.0)", + "minimum": 0.0, + "maximum": 1.0 + } + }, + "required": ["query"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + self.searcher + .search(&args.query, args.limit, args.threshold) + .await + } +} diff --git a/crates/nvisy-rig/src/agent/tools/web_fetch.rs b/crates/nvisy-rig/src/agent/tools/web_fetch.rs new file mode 100644 index 0000000..b20985f --- /dev/null +++ b/crates/nvisy-rig/src/agent/tools/web_fetch.rs @@ -0,0 +1,270 @@ +//! Web fetch tool for retrieving content from URLs. + +use async_trait::async_trait; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +/// Error type for web fetch operations. +#[derive(Debug, thiserror::Error)] +pub enum WebFetchError { + #[error("request failed: {0}")] + Request(String), + #[error("invalid URL: {0}")] + InvalidUrl(String), + #[error("timeout")] + Timeout, + #[error("content too large: {size} bytes (max: {max})")] + ContentTooLarge { size: usize, max: usize }, + #[error("unsupported content type: {0}")] + UnsupportedContentType(String), +} + +/// Arguments for web fetch. +#[derive(Debug, Deserialize)] +pub struct WebFetchArgs { + /// The URL to fetch. + pub url: String, + /// Maximum content size in bytes. + #[serde(default = "default_max_size")] + pub max_size: usize, + /// Whether to extract text only (strip HTML). + #[serde(default = "default_extract_text")] + pub extract_text: bool, + /// Timeout in seconds. + #[serde(default = "default_timeout")] + pub timeout_secs: u64, +} + +fn default_max_size() -> usize { + 1_000_000 // 1MB +} + +fn default_extract_text() -> bool { + true +} + +fn default_timeout() -> u64 { + 30 +} + +/// Result of a web fetch operation. +#[derive(Debug, Serialize)] +pub struct WebFetchResult { + /// The fetched content. + pub content: String, + /// The content type. + pub content_type: Option, + /// The final URL (after redirects). + pub final_url: String, + /// Content length in bytes. + pub length: usize, + /// Page title if available. + #[serde(skip_serializing_if = "Option::is_none")] + pub title: Option, +} + +/// Trait for fetching web content. +/// +/// Implementations should handle HTTP requests, redirects, and content extraction. +#[async_trait] +pub trait WebFetcher: Send + Sync { + /// Fetches content from a URL. + /// + /// # Arguments + /// + /// * `url` - The URL to fetch + /// * `max_size` - Maximum content size in bytes + /// * `timeout_secs` - Request timeout in seconds + /// + /// # Returns + /// + /// The fetched content as bytes, the final URL, and the content type. + async fn fetch( + &self, + url: &str, + max_size: usize, + timeout_secs: u64, + ) -> Result; +} + +/// Raw response from a web fetch operation. +#[derive(Debug)] +pub struct FetchResponse { + /// The raw content bytes. + pub bytes: bytes::Bytes, + /// The final URL after redirects. + pub final_url: String, + /// The content type header value. + pub content_type: Option, +} + +/// Tool for fetching web content. +/// +/// This tool uses a pluggable `WebFetcher` implementation for making HTTP requests. +pub struct WebFetchTool { + fetcher: Arc, + max_size: usize, +} + +impl WebFetchTool { + /// Creates a new web fetch tool. + pub fn new(fetcher: F) -> Self { + Self { + fetcher: Arc::new(fetcher), + max_size: default_max_size(), + } + } + + /// Creates a new web fetch tool with a shared fetcher. + pub fn with_arc(fetcher: Arc) -> Self { + Self { + fetcher, + max_size: default_max_size(), + } + } + + /// Creates a new web fetch tool with custom max size. + pub fn with_max_size(fetcher: F, max_size: usize) -> Self { + Self { + fetcher: Arc::new(fetcher), + max_size, + } + } + + /// Extracts text content from HTML. + fn extract_text_from_html(html: &str) -> (String, Option) { + // Simple HTML text extraction + // In production, you might want to use a proper HTML parser like scraper + + // Extract title + let title = html.find("").and_then(|start| { + let start = start + 7; + html[start..] + .find("") + .map(|end| html[start..start + end].trim().to_string()) + }); + + // Remove script and style tags + let mut text = html.to_string(); + + // Remove script tags + while let Some(start) = text.find("") { + text = format!("{}{}", &text[..start], &text[start + end + 9..]); + } else { + break; + } + } + + // Remove style tags + while let Some(start) = text.find("") { + text = format!("{}{}", &text[..start], &text[start + end + 8..]); + } else { + break; + } + } + + // Remove all HTML tags + let mut result = String::new(); + let mut in_tag = false; + for c in text.chars() { + match c { + '<' => in_tag = true, + '>' => in_tag = false, + _ if !in_tag => result.push(c), + _ => {} + } + } + + // Decode common HTML entities + let result = result + .replace(" ", " ") + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(""", "\"") + .replace("'", "'"); + + // Normalize whitespace + let result: String = result.split_whitespace().collect::>().join(" "); + + (result, title) + } +} + +impl Tool for WebFetchTool { + const NAME: &'static str = "web_fetch"; + + type Error = WebFetchError; + type Args = WebFetchArgs; + type Output = WebFetchResult; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Fetch content from a URL. Can retrieve web pages, APIs, or other HTTP resources. Optionally extracts text from HTML.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "The URL to fetch" + }, + "max_size": { + "type": "integer", + "description": "Maximum content size in bytes (default: 1MB)", + "default": 1000000 + }, + "extract_text": { + "type": "boolean", + "description": "Extract text only from HTML (default: true)", + "default": true + }, + "timeout_secs": { + "type": "integer", + "description": "Request timeout in seconds (default: 30)", + "default": 30 + } + }, + "required": ["url"] + }), + } + } + + async fn call(&self, args: Self::Args) -> Result { + let max_size = args.max_size.min(self.max_size); + + let response = self + .fetcher + .fetch(&args.url, max_size, args.timeout_secs) + .await?; + + let content = String::from_utf8_lossy(&response.bytes).to_string(); + let length = content.len(); + + let is_html = response + .content_type + .as_ref() + .map(|ct| ct.contains("text/html")) + .unwrap_or(false) + || content.trim_start().starts_with(" Self { + Self::Parse(message.to_string()) + } + /// Returns true if this error is retryable. pub fn is_retryable(&self) -> bool { matches!(self, Self::Provider { .. }) diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index 609cc20..784a498 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -9,7 +9,7 @@ use super::EngineConfig; use super::context::ExecutionContext; use crate::error::{WorkflowError, WorkflowResult}; use crate::graph::{InputSource, NodeData, NodeId, OutputDestination, WorkflowGraph}; -use crate::provider::{CredentialsRegistry, InputProvider, OutputProvider}; +use crate::provider::{CredentialsRegistry, InputProvider, IntoProvider, OutputProvider}; /// Tracing target for engine operations. const TRACING_TARGET: &str = "nvisy_workflow::engine"; @@ -49,9 +49,16 @@ impl Engine { &self.config } - /// Validates a workflow graph. - pub fn validate(&self, workflow: &WorkflowGraph) -> WorkflowResult<()> { - workflow.validate() + /// Validates a workflow graph against a credentials registry. + /// + /// Checks graph structure, constraints, and that all referenced + /// credentials exist in the registry. + pub fn validate( + &self, + workflow: &WorkflowGraph, + registry: &CredentialsRegistry, + ) -> WorkflowResult<()> { + workflow.validate(registry) } /// Executes a workflow graph with the given credentials. @@ -70,7 +77,7 @@ impl Engine { .await .map_err(|e| WorkflowError::Internal(format!("semaphore closed: {}", e)))?; - workflow.validate()?; + workflow.validate(&credentials)?; let order = workflow.topological_order()?; @@ -119,7 +126,7 @@ impl Engine { InputSource::Provider(params) => { let credentials_id = params.credentials_id(); let credentials = ctx.credentials().get(credentials_id)?.clone(); - let config = params.clone().into_config(credentials)?; + let config = params.clone().into_provider(credentials)?; let provider = config.into_provider()?; PipelineInput::Provider(provider) } @@ -132,7 +139,7 @@ impl Engine { OutputDestination::Provider(params) => { let credentials_id = params.credentials_id(); let credentials = ctx.credentials().get(credentials_id)?.clone(); - let config = params.clone().into_config(credentials)?; + let config = params.clone().into_provider(credentials)?; let provider = config.into_provider().await?; PipelineOutput::Provider(provider) } diff --git a/crates/nvisy-runtime/src/graph/workflow.rs b/crates/nvisy-runtime/src/graph/graph.rs similarity index 77% rename from crates/nvisy-runtime/src/graph/workflow.rs rename to crates/nvisy-runtime/src/graph/graph.rs index ca4af4f..dd0619c 100644 --- a/crates/nvisy-runtime/src/graph/workflow.rs +++ b/crates/nvisy-runtime/src/graph/graph.rs @@ -1,72 +1,19 @@ -//! Workflow graph definition. +//! Workflow graph runtime representation. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; -use jiff::Timestamp; use petgraph::Direction; use petgraph::algo::{is_cyclic_directed, toposort}; use petgraph::graph::{DiGraph, NodeIndex}; use petgraph::visit::EdgeRef; -use semver::Version; -use serde::{Deserialize, Serialize}; +use uuid::Uuid; -use super::edge::EdgeData; -use super::{Edge, NodeData, NodeId}; +use super::input::InputSource; +use super::output::OutputDestination; +use super::transform::TransformerConfig; +use super::workflow::{Edge, EdgeData, NodeData, NodeId, WorkflowDefinition, WorkflowMetadata}; use crate::error::{WorkflowError, WorkflowResult}; - -/// Workflow metadata. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] -pub struct WorkflowMetadata { - /// Workflow name (optional). - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - /// Workflow description. - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, - /// Workflow version (semver, optional). - #[serde(skip_serializing_if = "Option::is_none")] - pub version: Option, - /// Tags for organization. - #[serde(default, skip_serializing_if = "Vec::is_empty")] - pub tags: Vec, - /// Creation timestamp. - #[serde(skip_serializing_if = "Option::is_none")] - pub created_at: Option, - /// Last update timestamp. - #[serde(skip_serializing_if = "Option::is_none")] - pub updated_at: Option, -} - -impl WorkflowMetadata { - /// Creates a new empty metadata. - pub fn new() -> Self { - Self::default() - } - - /// Sets the workflow name. - pub fn with_name(mut self, name: impl Into) -> Self { - self.name = Some(name.into()); - self - } - - /// Sets the workflow description. - pub fn with_description(mut self, description: impl Into) -> Self { - self.description = Some(description.into()); - self - } - - /// Sets the workflow version. - pub fn with_version(mut self, version: Version) -> Self { - self.version = Some(version); - self - } - - /// Adds tags. - pub fn with_tags(mut self, tags: impl IntoIterator>) -> Self { - self.tags = tags.into_iter().map(Into::into).collect(); - self - } -} +use crate::provider::CredentialsRegistry; /// A workflow graph containing nodes and edges. /// @@ -290,8 +237,56 @@ impl WorkflowGraph { .collect() } - /// Validates the workflow graph structure and constraints. - pub fn validate(&self) -> WorkflowResult<()> { + /// Collects all credentials IDs referenced by nodes in the workflow. + /// + /// Returns a set of unique credential UUIDs from input providers, + /// output providers, and AI-powered transformers. + pub fn credentials_ids(&self) -> HashSet { + let mut ids = HashSet::new(); + + for data in self.graph.node_weights() { + match data { + NodeData::Input(input) => { + if let InputSource::Provider(params) = &input.source { + ids.insert(params.credentials_id()); + } + } + NodeData::Output(output) => { + if let OutputDestination::Provider(params) = &output.destination { + ids.insert(params.credentials_id()); + } + } + NodeData::Transformer(config) => match config { + TransformerConfig::Embedding(c) => { + ids.insert(c.provider.credentials_id()); + } + TransformerConfig::Enrich(c) => { + ids.insert(c.provider.credentials_id()); + } + TransformerConfig::Extract(c) => { + ids.insert(c.provider.credentials_id()); + } + TransformerConfig::Derive(c) => { + ids.insert(c.provider.credentials_id()); + } + // Partition and Chunk don't require credentials + TransformerConfig::Partition(_) | TransformerConfig::Chunk(_) => {} + }, + } + } + + ids + } + + /// Validates the workflow graph structure, constraints, and credentials. + /// + /// Checks that: + /// - The graph has at least one node + /// - There is at least one input and one output node + /// - The graph is acyclic + /// - Edge constraints are satisfied for each node type + /// - All referenced credentials exist in the registry + pub fn validate(&self, registry: &CredentialsRegistry) -> WorkflowResult<()> { // Must have at least one node if self.graph.node_count() == 0 { return Err(WorkflowError::InvalidDefinition( @@ -377,6 +372,11 @@ impl WorkflowGraph { } } + // Validate that all referenced credentials exist in the registry + for credentials_id in self.credentials_ids() { + registry.get(credentials_id)?; + } + Ok(()) } @@ -403,52 +403,28 @@ impl WorkflowGraph { pub fn inner_mut(&mut self) -> &mut DiGraph { &mut self.graph } -} - -impl Serialize for WorkflowGraph { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - use serde::ser::SerializeStruct; - - let mut state = serializer.serialize_struct("WorkflowGraph", 3)?; - - // Serialize nodes as a map of NodeId -> NodeData - let nodes: HashMap = self.nodes().collect(); - state.serialize_field("nodes", &nodes)?; - - // Serialize edges - let edges: Vec = self.edges().collect(); - state.serialize_field("edges", &edges)?; - - state.serialize_field("metadata", &self.metadata)?; - state.end() - } -} -impl<'de> Deserialize<'de> for WorkflowGraph { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - #[derive(Deserialize)] - struct WorkflowGraphData { - nodes: HashMap, - edges: Vec, - #[serde(default)] - metadata: WorkflowMetadata, + /// Converts the workflow graph to a serializable definition. + pub fn to_definition(&self) -> WorkflowDefinition { + WorkflowDefinition { + nodes: self.nodes().map(|(id, data)| (id, data.clone())).collect(), + edges: self.edges().collect(), + metadata: self.metadata.clone(), } + } - let data = WorkflowGraphData::deserialize(deserializer)?; - let mut graph = WorkflowGraph::with_metadata(data.metadata); + /// Creates a workflow graph from a definition. + /// + /// Returns an error if any edge references a non-existent node. + pub fn from_definition(definition: WorkflowDefinition) -> WorkflowResult { + let mut graph = Self::with_metadata(definition.metadata); - for (id, node_data) in data.nodes { + for (id, node_data) in definition.nodes { graph.add_node_with_id(id, node_data); } - for edge in data.edges { - graph.add_edge(edge).map_err(serde::de::Error::custom)?; + for edge in definition.edges { + graph.add_edge(edge)?; } Ok(graph) diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index c77629e..277282f 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -2,6 +2,7 @@ //! //! This module provides the graph representation for workflows: //! - [`WorkflowGraph`]: The main graph structure containing nodes and edges +//! - [`WorkflowDefinition`]: Serializable workflow definition (JSON-friendly) //! - [`WorkflowMetadata`]: Metadata about the workflow //! - [`Edge`]: Connections between nodes //! - [`EdgeData`]: Data stored on edges in the underlying petgraph @@ -10,18 +11,17 @@ //! - [`CacheSlot`]: Named cache slot for in-memory data passing //! - [`SwitchNode`]: Conditional routing based on data properties -mod edge; +mod graph; pub mod input; -mod node; pub mod output; pub mod route; pub mod transform; -mod workflow; +pub mod workflow; -pub use edge::{Edge, EdgeData}; +pub use graph::WorkflowGraph; pub use input::{InputNode, InputSource}; -pub use node::{Node, NodeCommon, NodeData, NodeId}; pub use output::{OutputDestination, OutputNode}; pub use route::{CacheSlot, SwitchBranch, SwitchCondition, SwitchNode}; pub use transform::TransformerConfig; -pub use workflow::{WorkflowGraph, WorkflowMetadata}; +pub use workflow::{Edge, EdgeData, Node, NodeCommon, NodeData, NodeId}; +pub use workflow::{WorkflowDefinition, WorkflowMetadata}; diff --git a/crates/nvisy-runtime/src/graph/transform/derive.rs b/crates/nvisy-runtime/src/graph/transform/derive.rs index 14e8dae..ede02f3 100644 --- a/crates/nvisy-runtime/src/graph/transform/derive.rs +++ b/crates/nvisy-runtime/src/graph/transform/derive.rs @@ -1,14 +1,15 @@ //! Derive transformer configuration - generate new content from input. -use nvisy_rig::provider::CompletionModel; use serde::{Deserialize, Serialize}; +use crate::provider::CompletionProviderParams; + /// Configuration for generating new content from input. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct DeriveConfig { - /// Completion model provider configuration. + /// Completion provider parameters (includes credentials_id and model). #[serde(flatten)] - pub provider: CompletionModel, + pub provider: CompletionProviderParams, /// The derivation task to perform. pub task: DeriveTask, diff --git a/crates/nvisy-runtime/src/graph/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs index 1d5eea3..978e847 100644 --- a/crates/nvisy-runtime/src/graph/transform/embedding.rs +++ b/crates/nvisy-runtime/src/graph/transform/embedding.rs @@ -1,14 +1,15 @@ //! Embedding transformer configuration. -use nvisy_rig::provider::EmbeddingModel; use serde::{Deserialize, Serialize}; +use crate::provider::EmbeddingProviderParams; + /// Configuration for generating embeddings. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct EmbeddingConfig { - /// Model to use for embedding generation. + /// Embedding provider parameters (includes credentials_id and model). #[serde(flatten)] - pub model: EmbeddingModel, + pub provider: EmbeddingProviderParams, /// Whether to L2-normalize the output embeddings. #[serde(default)] diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs index d8b22c9..631ea48 100644 --- a/crates/nvisy-runtime/src/graph/transform/enrich.rs +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -1,14 +1,15 @@ //! Enrich transformer configuration - add metadata/descriptions to elements. -use nvisy_rig::provider::CompletionModel; use serde::{Deserialize, Serialize}; +use crate::provider::CompletionProviderParams; + /// Configuration for enriching elements with metadata/descriptions. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct EnrichConfig { - /// Completion model provider configuration. + /// Completion provider parameters (includes credentials_id and model). #[serde(flatten)] - pub provider: CompletionModel, + pub provider: CompletionProviderParams, /// The enrichment task to perform. #[serde(flatten)] diff --git a/crates/nvisy-runtime/src/graph/transform/extract.rs b/crates/nvisy-runtime/src/graph/transform/extract.rs index 7fe1ddd..98344b0 100644 --- a/crates/nvisy-runtime/src/graph/transform/extract.rs +++ b/crates/nvisy-runtime/src/graph/transform/extract.rs @@ -1,14 +1,15 @@ //! Extract transformer configuration - extract structured data or convert formats. -use nvisy_rig::provider::CompletionModel; use serde::{Deserialize, Serialize}; +use crate::provider::CompletionProviderParams; + /// Configuration for extracting structured data or converting formats. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct ExtractConfig { - /// Completion model provider configuration. + /// Completion provider parameters (includes credentials_id and model). #[serde(flatten)] - pub provider: CompletionModel, + pub provider: CompletionProviderParams, /// The extraction task to perform. #[serde(flatten)] diff --git a/crates/nvisy-runtime/src/graph/workflow/definition.rs b/crates/nvisy-runtime/src/graph/workflow/definition.rs new file mode 100644 index 0000000..150e37e --- /dev/null +++ b/crates/nvisy-runtime/src/graph/workflow/definition.rs @@ -0,0 +1,80 @@ +//! Serializable workflow definition. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use super::edge::Edge; +use super::metadata::WorkflowMetadata; +use super::node::{NodeData, NodeId}; +use crate::error::{WorkflowError, WorkflowResult}; +use crate::graph::WorkflowGraph; + +/// Serializable workflow definition. +/// +/// This is the JSON-friendly representation of a workflow graph. +/// Use [`WorkflowGraph::to_definition`] and [`WorkflowGraph::from_definition`] +/// to convert between the two representations. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct WorkflowDefinition { + /// Nodes in the workflow, keyed by their ID. + pub nodes: HashMap, + /// Edges connecting nodes. + pub edges: Vec, + /// Workflow metadata. + #[serde(default)] + pub metadata: WorkflowMetadata, +} + +impl WorkflowDefinition { + /// Creates a new empty workflow definition. + pub fn new() -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + metadata: WorkflowMetadata::default(), + } + } + + /// Creates a workflow definition with metadata. + pub fn with_metadata(metadata: WorkflowMetadata) -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + metadata, + } + } + + /// Converts this definition into a workflow graph. + /// + /// Returns an error if any edge references a non-existent node. + pub fn into_graph(self) -> WorkflowResult { + WorkflowGraph::from_definition(self) + } +} + +impl Default for WorkflowDefinition { + fn default() -> Self { + Self::new() + } +} + +impl TryFrom for WorkflowGraph { + type Error = WorkflowError; + + fn try_from(definition: WorkflowDefinition) -> Result { + Self::from_definition(definition) + } +} + +impl From<&WorkflowGraph> for WorkflowDefinition { + fn from(graph: &WorkflowGraph) -> Self { + graph.to_definition() + } +} + +impl From for WorkflowDefinition { + fn from(graph: WorkflowGraph) -> Self { + graph.to_definition() + } +} diff --git a/crates/nvisy-runtime/src/graph/edge.rs b/crates/nvisy-runtime/src/graph/workflow/edge.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/edge.rs rename to crates/nvisy-runtime/src/graph/workflow/edge.rs diff --git a/crates/nvisy-runtime/src/graph/workflow/metadata.rs b/crates/nvisy-runtime/src/graph/workflow/metadata.rs new file mode 100644 index 0000000..102e49b --- /dev/null +++ b/crates/nvisy-runtime/src/graph/workflow/metadata.rs @@ -0,0 +1,59 @@ +//! Workflow metadata. + +use jiff::Timestamp; +use semver::Version; +use serde::{Deserialize, Serialize}; + +/// Workflow metadata. +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +pub struct WorkflowMetadata { + /// Workflow name (optional). + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + /// Workflow description. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Workflow version (semver, optional). + #[serde(skip_serializing_if = "Option::is_none")] + pub version: Option, + /// Tags for organization. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tags: Vec, + /// Creation timestamp. + #[serde(skip_serializing_if = "Option::is_none")] + pub created_at: Option, + /// Last update timestamp. + #[serde(skip_serializing_if = "Option::is_none")] + pub updated_at: Option, +} + +impl WorkflowMetadata { + /// Creates a new empty metadata. + pub fn new() -> Self { + Self::default() + } + + /// Sets the workflow name. + pub fn with_name(mut self, name: impl Into) -> Self { + self.name = Some(name.into()); + self + } + + /// Sets the workflow description. + pub fn with_description(mut self, description: impl Into) -> Self { + self.description = Some(description.into()); + self + } + + /// Sets the workflow version. + pub fn with_version(mut self, version: Version) -> Self { + self.version = Some(version); + self + } + + /// Adds tags. + pub fn with_tags(mut self, tags: impl IntoIterator>) -> Self { + self.tags = tags.into_iter().map(Into::into).collect(); + self + } +} diff --git a/crates/nvisy-runtime/src/graph/workflow/mod.rs b/crates/nvisy-runtime/src/graph/workflow/mod.rs new file mode 100644 index 0000000..3c7cb3c --- /dev/null +++ b/crates/nvisy-runtime/src/graph/workflow/mod.rs @@ -0,0 +1,18 @@ +//! Workflow graph types. +//! +//! This module provides: +//! - [`WorkflowGraph`]: Runtime graph representation using petgraph +//! - [`WorkflowDefinition`]: Serializable JSON-friendly definition +//! - [`WorkflowMetadata`]: Workflow metadata (name, description, version, etc.) +//! - [`Node`], [`NodeId`], [`NodeData`]: Node types and identifiers +//! - [`Edge`], [`EdgeData`]: Edge types + +mod definition; +mod edge; +mod metadata; +mod node; + +pub use definition::WorkflowDefinition; +pub use edge::{Edge, EdgeData}; +pub use metadata::WorkflowMetadata; +pub use node::{Node, NodeCommon, NodeData, NodeId}; diff --git a/crates/nvisy-runtime/src/graph/node.rs b/crates/nvisy-runtime/src/graph/workflow/node.rs similarity index 96% rename from crates/nvisy-runtime/src/graph/node.rs rename to crates/nvisy-runtime/src/graph/workflow/node.rs index 945e279..eaa4c45 100644 --- a/crates/nvisy-runtime/src/graph/node.rs +++ b/crates/nvisy-runtime/src/graph/workflow/node.rs @@ -6,9 +6,9 @@ use derive_more::{Debug, Display, From, Into}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::input::InputNode; -use super::output::OutputNode; -use super::transform::TransformerConfig; +use crate::graph::input::InputNode; +use crate::graph::output::OutputNode; +use crate::graph::transform::TransformerConfig; /// Unique identifier for a node in a workflow graph. #[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] diff --git a/crates/nvisy-runtime/src/provider/ai.rs b/crates/nvisy-runtime/src/provider/ai.rs new file mode 100644 index 0000000..bd2d08a --- /dev/null +++ b/crates/nvisy-runtime/src/provider/ai.rs @@ -0,0 +1,163 @@ +//! AI provider types and implementations. + +use derive_more::From; +use nvisy_rig::provider::{CompletionProvider, EmbeddingProvider}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::ProviderCredentials; +use super::backend::{ + AnthropicCompletionParams, AnthropicCredentials, CohereCompletionParams, CohereCredentials, + CohereEmbeddingParams, GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams, + IntoProvider, OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams, + PerplexityCompletionParams, PerplexityCredentials, +}; +use crate::error::{WorkflowError, WorkflowResult}; + +/// Completion provider parameters. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[serde(tag = "provider", rename_all = "snake_case")] +pub enum CompletionProviderParams { + /// OpenAI completion. + OpenAi(OpenAiCompletionParams), + /// Anthropic completion. + Anthropic(AnthropicCompletionParams), + /// Cohere completion. + Cohere(CohereCompletionParams), + /// Google Gemini completion. + Gemini(GeminiCompletionParams), + /// Perplexity completion. + Perplexity(PerplexityCompletionParams), +} + +impl CompletionProviderParams { + /// Returns the credentials ID for this provider. + pub fn credentials_id(&self) -> Uuid { + match self { + Self::OpenAi(p) => p.credentials_id, + Self::Anthropic(p) => p.credentials_id, + Self::Cohere(p) => p.credentials_id, + Self::Gemini(p) => p.credentials_id, + Self::Perplexity(p) => p.credentials_id, + } + } + + /// Returns the provider kind as a string. + pub const fn kind(&self) -> &'static str { + match self { + Self::OpenAi(_) => "openai", + Self::Anthropic(_) => "anthropic", + Self::Cohere(_) => "cohere", + Self::Gemini(_) => "gemini", + Self::Perplexity(_) => "perplexity", + } + } +} + +impl IntoProvider for CompletionProviderParams { + type Credentials = ProviderCredentials; + type Output = CompletionProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + match (self, credentials) { + (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c), + (Self::Anthropic(p), ProviderCredentials::Anthropic(c)) => p.into_provider(c), + (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c), + (Self::Gemini(p), ProviderCredentials::Gemini(c)) => p.into_provider(c), + (Self::Perplexity(p), ProviderCredentials::Perplexity(c)) => p.into_provider(c), + (params, creds) => Err(WorkflowError::Internal(format!( + "credentials type mismatch: expected '{}', got '{}'", + params.kind(), + creds.kind() + ))), + } + } +} + +/// Embedding provider parameters. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[serde(tag = "provider", rename_all = "snake_case")] +pub enum EmbeddingProviderParams { + /// OpenAI embedding. + OpenAi(OpenAiEmbeddingParams), + /// Cohere embedding. + Cohere(CohereEmbeddingParams), + /// Google Gemini embedding. + Gemini(GeminiEmbeddingParams), +} + +impl EmbeddingProviderParams { + /// Returns the credentials ID for this provider. + pub fn credentials_id(&self) -> Uuid { + match self { + Self::OpenAi(p) => p.credentials_id, + Self::Cohere(p) => p.credentials_id, + Self::Gemini(p) => p.credentials_id, + } + } + + /// Returns the provider kind as a string. + pub const fn kind(&self) -> &'static str { + match self { + Self::OpenAi(_) => "openai", + Self::Cohere(_) => "cohere", + Self::Gemini(_) => "gemini", + } + } + + /// Returns the embedding dimensions for this provider's model. + pub fn dimensions(&self) -> usize { + match self { + Self::OpenAi(p) => p.model.dimensions(), + Self::Cohere(p) => p.model.dimensions(), + Self::Gemini(p) => p.model.dimensions(), + } + } +} + +impl IntoProvider for EmbeddingProviderParams { + type Credentials = ProviderCredentials; + type Output = EmbeddingProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + match (self, credentials) { + (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c), + (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c), + (Self::Gemini(p), ProviderCredentials::Gemini(c)) => p.into_provider(c), + (params, creds) => Err(WorkflowError::Internal(format!( + "credentials type mismatch: expected '{}', got '{}'", + params.kind(), + creds.kind() + ))), + } + } +} + +/// AI provider credentials (sensitive). +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "provider", rename_all = "snake_case")] +pub enum AiCredentials { + /// OpenAI credentials. + OpenAi(OpenAiCredentials), + /// Anthropic credentials. + Anthropic(AnthropicCredentials), + /// Cohere credentials. + Cohere(CohereCredentials), + /// Gemini credentials. + Gemini(GeminiCredentials), + /// Perplexity credentials. + Perplexity(PerplexityCredentials), +} + +impl AiCredentials { + /// Returns the provider kind as a string. + pub const fn kind(&self) -> &'static str { + match self { + Self::OpenAi(_) => "openai", + Self::Anthropic(_) => "anthropic", + Self::Cohere(_) => "cohere", + Self::Gemini(_) => "gemini", + Self::Perplexity(_) => "perplexity", + } + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/anthropic.rs b/crates/nvisy-runtime/src/provider/backend/anthropic.rs new file mode 100644 index 0000000..f8b5b65 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/anthropic.rs @@ -0,0 +1,48 @@ +//! Anthropic provider. + +use nvisy_rig::provider::{AnthropicModel, CompletionProvider}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::IntoProvider; +use crate::error::{WorkflowError, WorkflowResult}; + +/// Anthropic credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AnthropicCredentials { + /// API key. + pub api_key: String, +} + +/// Anthropic completion parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct AnthropicCompletionParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Model to use. + pub model: AnthropicModel, +} + +impl AnthropicCompletionParams { + /// Creates a new Anthropic completion params. + pub fn new(credentials_id: Uuid, model: AnthropicModel) -> Self { + Self { + credentials_id, + model, + } + } +} + +impl IntoProvider for AnthropicCompletionParams { + type Credentials = AnthropicCredentials; + type Output = CompletionProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + let rig_creds = nvisy_rig::provider::CompletionCredentials::Anthropic { + api_key: credentials.api_key, + }; + let model = nvisy_rig::provider::CompletionModel::Anthropic(self.model); + CompletionProvider::new(&rig_creds, &model) + .map_err(|e| WorkflowError::Internal(e.to_string())) + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/azblob.rs b/crates/nvisy-runtime/src/provider/backend/azblob.rs index cdeb0b7..9b8a94b 100644 --- a/crates/nvisy-runtime/src/provider/backend/azblob.rs +++ b/crates/nvisy-runtime/src/provider/backend/azblob.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::AzblobConfig; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// Azure Blob Storage credentials. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AzblobCredentials { @@ -29,9 +32,11 @@ pub struct AzblobParams { pub prefix: Option, } -impl AzblobParams { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: AzblobCredentials) -> AzblobConfig { +impl IntoProvider for AzblobParams { + type Credentials = AzblobCredentials; + type Output = AzblobConfig; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { let mut config = AzblobConfig::new(credentials.account_name, self.container); if let Some(account_key) = credentials.account_key { @@ -44,6 +49,6 @@ impl AzblobParams { config = config.with_prefix(prefix); } - config + Ok(config) } } diff --git a/crates/nvisy-runtime/src/provider/backend/cohere.rs b/crates/nvisy-runtime/src/provider/backend/cohere.rs new file mode 100644 index 0000000..c358525 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/cohere.rs @@ -0,0 +1,83 @@ +//! Cohere provider. + +use nvisy_rig::provider::{ + CohereCompletionModel, CohereEmbeddingModel, CompletionProvider, EmbeddingProvider, +}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::IntoProvider; +use crate::error::{WorkflowError, WorkflowResult}; + +/// Cohere credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CohereCredentials { + /// API key. + pub api_key: String, +} + +/// Cohere completion parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct CohereCompletionParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Model to use. + pub model: CohereCompletionModel, +} + +impl CohereCompletionParams { + /// Creates a new Cohere completion params. + pub fn new(credentials_id: Uuid, model: CohereCompletionModel) -> Self { + Self { + credentials_id, + model, + } + } +} + +impl IntoProvider for CohereCompletionParams { + type Credentials = CohereCredentials; + type Output = CompletionProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + let rig_creds = nvisy_rig::provider::CompletionCredentials::Cohere { + api_key: credentials.api_key, + }; + let model = nvisy_rig::provider::CompletionModel::Cohere(self.model); + CompletionProvider::new(&rig_creds, &model) + .map_err(|e| WorkflowError::Internal(e.to_string())) + } +} + +/// Cohere embedding parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct CohereEmbeddingParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Model to use. + pub model: CohereEmbeddingModel, +} + +impl CohereEmbeddingParams { + /// Creates a new Cohere embedding params. + pub fn new(credentials_id: Uuid, model: CohereEmbeddingModel) -> Self { + Self { + credentials_id, + model, + } + } +} + +impl IntoProvider for CohereEmbeddingParams { + type Credentials = CohereCredentials; + type Output = EmbeddingProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + let rig_creds = nvisy_rig::provider::EmbeddingCredentials::Cohere { + api_key: credentials.api_key, + }; + let model = nvisy_rig::provider::EmbeddingModel::Cohere(self.model); + EmbeddingProvider::new(&rig_creds, &model) + .map_err(|e| WorkflowError::Internal(e.to_string())) + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/gcs.rs b/crates/nvisy-runtime/src/provider/backend/gcs.rs index 8984f84..cec5247 100644 --- a/crates/nvisy-runtime/src/provider/backend/gcs.rs +++ b/crates/nvisy-runtime/src/provider/backend/gcs.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::GcsConfig; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// Google Cloud Storage credentials. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GcsCredentials { @@ -23,15 +26,17 @@ pub struct GcsParams { pub prefix: Option, } -impl GcsParams { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: GcsCredentials) -> GcsConfig { +impl IntoProvider for GcsParams { + type Credentials = GcsCredentials; + type Output = GcsConfig; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { let mut config = GcsConfig::new(self.bucket).with_credentials(credentials.credentials_json); if let Some(prefix) = self.prefix { config = config.with_prefix(prefix); } - config + Ok(config) } } diff --git a/crates/nvisy-runtime/src/provider/backend/gemini.rs b/crates/nvisy-runtime/src/provider/backend/gemini.rs new file mode 100644 index 0000000..9c98778 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/gemini.rs @@ -0,0 +1,83 @@ +//! Google Gemini provider. + +use nvisy_rig::provider::{ + CompletionProvider, EmbeddingProvider, GeminiCompletionModel, GeminiEmbeddingModel, +}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::IntoProvider; +use crate::error::{WorkflowError, WorkflowResult}; + +/// Gemini credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GeminiCredentials { + /// API key. + pub api_key: String, +} + +/// Gemini completion parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct GeminiCompletionParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Model to use. + pub model: GeminiCompletionModel, +} + +impl GeminiCompletionParams { + /// Creates a new Gemini completion params. + pub fn new(credentials_id: Uuid, model: GeminiCompletionModel) -> Self { + Self { + credentials_id, + model, + } + } +} + +impl IntoProvider for GeminiCompletionParams { + type Credentials = GeminiCredentials; + type Output = CompletionProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + let rig_creds = nvisy_rig::provider::CompletionCredentials::Gemini { + api_key: credentials.api_key, + }; + let model = nvisy_rig::provider::CompletionModel::Gemini(self.model); + CompletionProvider::new(&rig_creds, &model) + .map_err(|e| WorkflowError::Internal(e.to_string())) + } +} + +/// Gemini embedding parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct GeminiEmbeddingParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Model to use. + pub model: GeminiEmbeddingModel, +} + +impl GeminiEmbeddingParams { + /// Creates a new Gemini embedding params. + pub fn new(credentials_id: Uuid, model: GeminiEmbeddingModel) -> Self { + Self { + credentials_id, + model, + } + } +} + +impl IntoProvider for GeminiEmbeddingParams { + type Credentials = GeminiCredentials; + type Output = EmbeddingProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + let rig_creds = nvisy_rig::provider::EmbeddingCredentials::Gemini { + api_key: credentials.api_key, + }; + let model = nvisy_rig::provider::EmbeddingModel::Gemini(self.model); + EmbeddingProvider::new(&rig_creds, &model) + .map_err(|e| WorkflowError::Internal(e.to_string())) + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/milvus.rs b/crates/nvisy-runtime/src/provider/backend/milvus.rs index 5eefc42..37e094a 100644 --- a/crates/nvisy-runtime/src/provider/backend/milvus.rs +++ b/crates/nvisy-runtime/src/provider/backend/milvus.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::MilvusConfig; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// Default Milvus port. fn default_milvus_port() -> u16 { 19530 @@ -40,9 +43,11 @@ pub struct MilvusParams { pub dimensions: Option, } -impl MilvusParams { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: MilvusCredentials) -> MilvusConfig { +impl IntoProvider for MilvusParams { + type Credentials = MilvusCredentials; + type Output = MilvusConfig; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { let mut config = MilvusConfig::new(credentials.host) .with_port(credentials.port) .with_collection(self.collection); @@ -57,6 +62,6 @@ impl MilvusParams { config = config.with_dimensions(dimensions); } - config + Ok(config) } } diff --git a/crates/nvisy-runtime/src/provider/backend/mod.rs b/crates/nvisy-runtime/src/provider/backend/mod.rs index 6da1d7c..627804a 100644 --- a/crates/nvisy-runtime/src/provider/backend/mod.rs +++ b/crates/nvisy-runtime/src/provider/backend/mod.rs @@ -1,32 +1,76 @@ //! Backend provider implementations. //! //! Each provider file contains credentials and params for a specific backend: +//! +//! ## Storage backends //! - [`s3`]: Amazon S3 //! - [`gcs`]: Google Cloud Storage //! - [`azblob`]: Azure Blob Storage //! - [`postgres`]: PostgreSQL //! - [`mysql`]: MySQL +//! +//! ## Vector databases //! - [`qdrant`]: Qdrant vector database //! - [`pinecone`]: Pinecone vector database //! - [`milvus`]: Milvus vector database //! - [`pgvector`]: pgvector (PostgreSQL extension) +//! +//! ## AI providers +//! - [`openai`]: OpenAI (completion + embedding) +//! - [`anthropic`]: Anthropic (completion only) +//! - [`cohere`]: Cohere (completion + embedding) +//! - [`gemini`]: Google Gemini (completion + embedding) +//! - [`perplexity`]: Perplexity (completion only) +use crate::error::WorkflowResult; + +// Storage backends mod azblob; mod gcs; -mod milvus; mod mysql; +mod postgres; +mod s3; + +// Vector databases +mod milvus; mod pgvector; mod pinecone; -mod postgres; mod qdrant; -mod s3; +// AI providers +mod anthropic; +mod cohere; +mod gemini; +mod openai; +mod perplexity; + +// Storage backend exports pub use azblob::{AzblobCredentials, AzblobParams}; pub use gcs::{GcsCredentials, GcsParams}; -pub use milvus::{MilvusCredentials, MilvusParams}; pub use mysql::{MysqlCredentials, MysqlParams}; +pub use postgres::{PostgresCredentials, PostgresParams}; +pub use s3::{S3Credentials, S3Params}; + +// Vector database exports +pub use milvus::{MilvusCredentials, MilvusParams}; pub use pgvector::{PgVectorCredentials, PgVectorParams}; pub use pinecone::{PineconeCredentials, PineconeParams}; -pub use postgres::{PostgresCredentials, PostgresParams}; pub use qdrant::{QdrantCredentials, QdrantParams}; -pub use s3::{S3Credentials, S3Params}; + +// AI provider exports +pub use anthropic::{AnthropicCompletionParams, AnthropicCredentials}; +pub use cohere::{CohereCompletionParams, CohereCredentials, CohereEmbeddingParams}; +pub use gemini::{GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams}; +pub use openai::{OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams}; +pub use perplexity::{PerplexityCompletionParams, PerplexityCredentials}; + +/// Trait for provider parameters that can be combined with credentials to create a provider/config. +pub trait IntoProvider { + /// The credentials type required by this provider. + type Credentials; + /// The output type (provider instance or config). + type Output; + + /// Combines params with credentials to create the output. + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult; +} diff --git a/crates/nvisy-runtime/src/provider/backend/mysql.rs b/crates/nvisy-runtime/src/provider/backend/mysql.rs index 51e1b95..e76777f 100644 --- a/crates/nvisy-runtime/src/provider/backend/mysql.rs +++ b/crates/nvisy-runtime/src/provider/backend/mysql.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::MysqlConfig; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// MySQL credentials. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MysqlCredentials { @@ -23,15 +26,17 @@ pub struct MysqlParams { pub database: Option, } -impl MysqlParams { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: MysqlCredentials) -> MysqlConfig { +impl IntoProvider for MysqlParams { + type Credentials = MysqlCredentials; + type Output = MysqlConfig; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { let mut config = MysqlConfig::new(credentials.connection_string).with_table(self.table); if let Some(database) = self.database { config = config.with_database(database); } - config + Ok(config) } } diff --git a/crates/nvisy-runtime/src/provider/backend/openai.rs b/crates/nvisy-runtime/src/provider/backend/openai.rs new file mode 100644 index 0000000..fd1fcf6 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/openai.rs @@ -0,0 +1,83 @@ +//! OpenAI provider. + +use nvisy_rig::provider::{ + CompletionProvider, EmbeddingProvider, OpenAiCompletionModel, OpenAiEmbeddingModel, +}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::IntoProvider; +use crate::error::{WorkflowError, WorkflowResult}; + +/// OpenAI credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OpenAiCredentials { + /// API key. + pub api_key: String, +} + +/// OpenAI completion parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OpenAiCompletionParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Model to use. + pub model: OpenAiCompletionModel, +} + +impl OpenAiCompletionParams { + /// Creates a new OpenAI completion params. + pub fn new(credentials_id: Uuid, model: OpenAiCompletionModel) -> Self { + Self { + credentials_id, + model, + } + } +} + +impl IntoProvider for OpenAiCompletionParams { + type Credentials = OpenAiCredentials; + type Output = CompletionProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + let rig_creds = nvisy_rig::provider::CompletionCredentials::OpenAi { + api_key: credentials.api_key, + }; + let model = nvisy_rig::provider::CompletionModel::OpenAi(self.model); + CompletionProvider::new(&rig_creds, &model) + .map_err(|e| WorkflowError::Internal(e.to_string())) + } +} + +/// OpenAI embedding parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OpenAiEmbeddingParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Model to use. + pub model: OpenAiEmbeddingModel, +} + +impl OpenAiEmbeddingParams { + /// Creates a new OpenAI embedding params. + pub fn new(credentials_id: Uuid, model: OpenAiEmbeddingModel) -> Self { + Self { + credentials_id, + model, + } + } +} + +impl IntoProvider for OpenAiEmbeddingParams { + type Credentials = OpenAiCredentials; + type Output = EmbeddingProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + let rig_creds = nvisy_rig::provider::EmbeddingCredentials::OpenAi { + api_key: credentials.api_key, + }; + let model = nvisy_rig::provider::EmbeddingModel::OpenAi(self.model); + EmbeddingProvider::new(&rig_creds, &model) + .map_err(|e| WorkflowError::Internal(e.to_string())) + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/perplexity.rs b/crates/nvisy-runtime/src/provider/backend/perplexity.rs new file mode 100644 index 0000000..763f2d6 --- /dev/null +++ b/crates/nvisy-runtime/src/provider/backend/perplexity.rs @@ -0,0 +1,48 @@ +//! Perplexity provider. + +use nvisy_rig::provider::{CompletionProvider, PerplexityModel}; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +use super::IntoProvider; +use crate::error::{WorkflowError, WorkflowResult}; + +/// Perplexity credentials. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PerplexityCredentials { + /// API key. + pub api_key: String, +} + +/// Perplexity completion parameters. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PerplexityCompletionParams { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Model to use. + pub model: PerplexityModel, +} + +impl PerplexityCompletionParams { + /// Creates a new Perplexity completion params. + pub fn new(credentials_id: Uuid, model: PerplexityModel) -> Self { + Self { + credentials_id, + model, + } + } +} + +impl IntoProvider for PerplexityCompletionParams { + type Credentials = PerplexityCredentials; + type Output = CompletionProvider; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + let rig_creds = nvisy_rig::provider::CompletionCredentials::Perplexity { + api_key: credentials.api_key, + }; + let model = nvisy_rig::provider::CompletionModel::Perplexity(self.model); + CompletionProvider::new(&rig_creds, &model) + .map_err(|e| WorkflowError::Internal(e.to_string())) + } +} diff --git a/crates/nvisy-runtime/src/provider/backend/pgvector.rs b/crates/nvisy-runtime/src/provider/backend/pgvector.rs index e051306..eefdcd3 100644 --- a/crates/nvisy-runtime/src/provider/backend/pgvector.rs +++ b/crates/nvisy-runtime/src/provider/backend/pgvector.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::PgVectorConfig; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// pgvector credentials. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PgVectorCredentials { @@ -22,9 +25,11 @@ pub struct PgVectorParams { pub dimensions: usize, } -impl PgVectorParams { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: PgVectorCredentials) -> PgVectorConfig { - PgVectorConfig::new(credentials.connection_url, self.dimensions).with_table(self.table) +impl IntoProvider for PgVectorParams { + type Credentials = PgVectorCredentials; + type Output = PgVectorConfig; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + Ok(PgVectorConfig::new(credentials.connection_url, self.dimensions).with_table(self.table)) } } diff --git a/crates/nvisy-runtime/src/provider/backend/pinecone.rs b/crates/nvisy-runtime/src/provider/backend/pinecone.rs index f09fb99..6858a44 100644 --- a/crates/nvisy-runtime/src/provider/backend/pinecone.rs +++ b/crates/nvisy-runtime/src/provider/backend/pinecone.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::PineconeConfig; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// Pinecone credentials. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PineconeCredentials { @@ -28,9 +31,11 @@ pub struct PineconeParams { pub dimensions: Option, } -impl PineconeParams { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: PineconeCredentials) -> PineconeConfig { +impl IntoProvider for PineconeParams { + type Credentials = PineconeCredentials; + type Output = PineconeConfig; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { let mut config = PineconeConfig::new(credentials.api_key, credentials.environment, self.index); @@ -41,6 +46,6 @@ impl PineconeParams { config = config.with_dimensions(dimensions); } - config + Ok(config) } } diff --git a/crates/nvisy-runtime/src/provider/backend/postgres.rs b/crates/nvisy-runtime/src/provider/backend/postgres.rs index 93c79d9..537a976 100644 --- a/crates/nvisy-runtime/src/provider/backend/postgres.rs +++ b/crates/nvisy-runtime/src/provider/backend/postgres.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::PostgresConfig; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// PostgreSQL credentials. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PostgresCredentials { @@ -23,15 +26,17 @@ pub struct PostgresParams { pub schema: Option, } -impl PostgresParams { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: PostgresCredentials) -> PostgresConfig { +impl IntoProvider for PostgresParams { + type Credentials = PostgresCredentials; + type Output = PostgresConfig; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { let mut config = PostgresConfig::new(credentials.connection_string).with_table(self.table); if let Some(schema) = self.schema { config = config.with_schema(schema); } - config + Ok(config) } } diff --git a/crates/nvisy-runtime/src/provider/backend/qdrant.rs b/crates/nvisy-runtime/src/provider/backend/qdrant.rs index 0a115c3..0c48d0a 100644 --- a/crates/nvisy-runtime/src/provider/backend/qdrant.rs +++ b/crates/nvisy-runtime/src/provider/backend/qdrant.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::QdrantConfig; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// Qdrant credentials. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct QdrantCredentials { @@ -26,9 +29,11 @@ pub struct QdrantParams { pub dimensions: Option, } -impl QdrantParams { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: QdrantCredentials) -> QdrantConfig { +impl IntoProvider for QdrantParams { + type Credentials = QdrantCredentials; + type Output = QdrantConfig; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { let mut config = QdrantConfig::new(credentials.url).with_collection(self.collection); if let Some(api_key) = credentials.api_key { @@ -38,6 +43,6 @@ impl QdrantParams { config = config.with_dimensions(dimensions); } - config + Ok(config) } } diff --git a/crates/nvisy-runtime/src/provider/backend/s3.rs b/crates/nvisy-runtime/src/provider/backend/s3.rs index ec1218a..2931018 100644 --- a/crates/nvisy-runtime/src/provider/backend/s3.rs +++ b/crates/nvisy-runtime/src/provider/backend/s3.rs @@ -4,6 +4,9 @@ use nvisy_dal::provider::S3Config; use serde::{Deserialize, Serialize}; use uuid::Uuid; +use super::IntoProvider; +use crate::error::WorkflowResult; + /// Amazon S3 credentials. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct S3Credentials { @@ -30,9 +33,11 @@ pub struct S3Params { pub prefix: Option, } -impl S3Params { - /// Combines params with credentials to create a full provider config. - pub fn into_config(self, credentials: S3Credentials) -> S3Config { +impl IntoProvider for S3Params { + type Credentials = S3Credentials; + type Output = S3Config; + + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { let mut config = S3Config::new(self.bucket, credentials.region) .with_credentials(credentials.access_key_id, credentials.secret_access_key); @@ -43,6 +48,6 @@ impl S3Params { config = config.with_prefix(prefix); } - config + Ok(config) } } diff --git a/crates/nvisy-runtime/src/provider/inputs.rs b/crates/nvisy-runtime/src/provider/inputs.rs index e34707b..c6889e9 100644 --- a/crates/nvisy-runtime/src/provider/inputs.rs +++ b/crates/nvisy-runtime/src/provider/inputs.rs @@ -11,7 +11,9 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::ProviderCredentials; -use super::backend::{AzblobParams, GcsParams, MysqlParams, PostgresParams, S3Params}; +use super::backend::{ + AzblobParams, GcsParams, IntoProvider, MysqlParams, PostgresParams, S3Params, +}; use crate::error::{WorkflowError, WorkflowResult}; /// Input provider parameters (storage backends only, no vector DBs). @@ -60,29 +62,28 @@ impl InputProviderParams { Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, } } +} + +impl IntoProvider for InputProviderParams { + type Credentials = ProviderCredentials; + type Output = InputProviderConfig; - /// Combines params with credentials to create a full provider config. - /// - /// Returns an error if the credentials type doesn't match the params type. - pub fn into_config( - self, - credentials: ProviderCredentials, - ) -> WorkflowResult { + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { match (self, credentials) { (Self::S3(p), ProviderCredentials::S3(c)) => { - Ok(InputProviderConfig::S3(p.into_config(c))) + Ok(InputProviderConfig::S3(p.into_provider(c)?)) } (Self::Gcs(p), ProviderCredentials::Gcs(c)) => { - Ok(InputProviderConfig::Gcs(p.into_config(c))) + Ok(InputProviderConfig::Gcs(p.into_provider(c)?)) } (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { - Ok(InputProviderConfig::Azblob(p.into_config(c))) + Ok(InputProviderConfig::Azblob(p.into_provider(c)?)) } (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { - Ok(InputProviderConfig::Postgres(p.into_config(c))) + Ok(InputProviderConfig::Postgres(p.into_provider(c)?)) } (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { - Ok(InputProviderConfig::Mysql(p.into_config(c))) + Ok(InputProviderConfig::Mysql(p.into_provider(c)?)) } (params, creds) => Err(WorkflowError::Internal(format!( "credentials type mismatch: expected '{}', got '{}'", diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs index eaa2ac2..c8aaefa 100644 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -2,7 +2,9 @@ //! //! This module separates provider configuration into: //! - [`ProviderCredentials`]: Sensitive credentials (stored per workspace) +//! - [`AiCredentials`]: AI provider credentials (stored per workspace) //! - [`InputProviderParams`] / [`OutputProviderParams`]: Non-sensitive parameters (part of node definition) +//! - [`CompletionProviderParams`] / [`EmbeddingProviderParams`]: AI provider parameters //! - [`CredentialsRegistry`]: In-memory registry for credentials lookup //! //! # Module Structure @@ -10,30 +12,51 @@ //! - [`backend`]: Individual provider implementations (credentials + params) //! - [`inputs`]: Input provider types and read operations //! - [`outputs`]: Output provider types and write operations +//! - [`ai`]: AI provider types (completion + embedding) //! - [`registry`]: Credentials registry for workflow execution +mod ai; pub mod backend; mod inputs; mod outputs; mod registry; pub mod runtime; +// Storage backend exports pub use backend::{ - AzblobCredentials, AzblobParams, GcsCredentials, GcsParams, MilvusCredentials, MilvusParams, - MysqlCredentials, MysqlParams, PgVectorCredentials, PgVectorParams, PineconeCredentials, - PineconeParams, PostgresCredentials, PostgresParams, QdrantCredentials, QdrantParams, - S3Credentials, S3Params, + AzblobCredentials, AzblobParams, GcsCredentials, GcsParams, MysqlCredentials, MysqlParams, + PostgresCredentials, PostgresParams, S3Credentials, S3Params, }; + +// Vector database exports +pub use backend::{ + MilvusCredentials, MilvusParams, PgVectorCredentials, PgVectorParams, PineconeCredentials, + PineconeParams, QdrantCredentials, QdrantParams, +}; + +// AI provider exports +pub use backend::{ + AnthropicCompletionParams, AnthropicCredentials, CohereCompletionParams, CohereCredentials, + CohereEmbeddingParams, GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams, + OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams, PerplexityCompletionParams, + PerplexityCredentials, +}; + use derive_more::From; pub use inputs::{InputProvider, InputProviderConfig, InputProviderParams}; pub use outputs::{OutputProvider, OutputProviderConfig, OutputProviderParams}; pub use registry::CredentialsRegistry; use serde::{Deserialize, Serialize}; +// AI provider enum exports +pub use ai::{AiCredentials, CompletionProviderParams, EmbeddingProviderParams}; +pub use backend::IntoProvider; + /// Provider credentials (sensitive). #[derive(Debug, Clone, From, Serialize, Deserialize)] #[serde(tag = "provider", rename_all = "snake_case")] pub enum ProviderCredentials { + // Storage backends /// Amazon S3 credentials. S3(S3Credentials), /// Google Cloud Storage credentials. @@ -44,6 +67,8 @@ pub enum ProviderCredentials { Postgres(PostgresCredentials), /// MySQL credentials. Mysql(MysqlCredentials), + + // Vector databases /// Qdrant credentials. Qdrant(QdrantCredentials), /// Pinecone credentials. @@ -52,21 +77,41 @@ pub enum ProviderCredentials { Milvus(MilvusCredentials), /// pgvector credentials. PgVector(PgVectorCredentials), + + // AI providers + /// OpenAI credentials. + OpenAi(OpenAiCredentials), + /// Anthropic credentials. + Anthropic(AnthropicCredentials), + /// Cohere credentials. + Cohere(CohereCredentials), + /// Google Gemini credentials. + Gemini(GeminiCredentials), + /// Perplexity credentials. + Perplexity(PerplexityCredentials), } impl ProviderCredentials { /// Returns the provider kind as a string. pub const fn kind(&self) -> &'static str { match self { + // Storage backends Self::S3(_) => "s3", Self::Gcs(_) => "gcs", Self::Azblob(_) => "azblob", Self::Postgres(_) => "postgres", Self::Mysql(_) => "mysql", + // Vector databases Self::Qdrant(_) => "qdrant", Self::Pinecone(_) => "pinecone", Self::Milvus(_) => "milvus", Self::PgVector(_) => "pgvector", + // AI providers + Self::OpenAi(_) => "openai", + Self::Anthropic(_) => "anthropic", + Self::Cohere(_) => "cohere", + Self::Gemini(_) => "gemini", + Self::Perplexity(_) => "perplexity", } } } diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs index ef8fc1d..a21f81f 100644 --- a/crates/nvisy-runtime/src/provider/outputs.rs +++ b/crates/nvisy-runtime/src/provider/outputs.rs @@ -13,8 +13,8 @@ use uuid::Uuid; use super::ProviderCredentials; use super::backend::{ - AzblobParams, GcsParams, MilvusParams, MysqlParams, PgVectorParams, PineconeParams, - PostgresParams, QdrantParams, S3Params, + AzblobParams, GcsParams, IntoProvider, MilvusParams, MysqlParams, PgVectorParams, + PineconeParams, PostgresParams, QdrantParams, S3Params, }; use crate::error::{WorkflowError, WorkflowResult}; @@ -83,41 +83,40 @@ impl OutputProviderParams { } } } +} + +impl IntoProvider for OutputProviderParams { + type Credentials = ProviderCredentials; + type Output = OutputProviderConfig; - /// Combines params with credentials to create a full provider config. - /// - /// Returns an error if the credentials type doesn't match the params type. - pub fn into_config( - self, - credentials: ProviderCredentials, - ) -> WorkflowResult { + fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { match (self, credentials) { (Self::S3(p), ProviderCredentials::S3(c)) => { - Ok(OutputProviderConfig::S3(p.into_config(c))) + Ok(OutputProviderConfig::S3(p.into_provider(c)?)) } (Self::Gcs(p), ProviderCredentials::Gcs(c)) => { - Ok(OutputProviderConfig::Gcs(p.into_config(c))) + Ok(OutputProviderConfig::Gcs(p.into_provider(c)?)) } (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { - Ok(OutputProviderConfig::Azblob(p.into_config(c))) + Ok(OutputProviderConfig::Azblob(p.into_provider(c)?)) } (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { - Ok(OutputProviderConfig::Postgres(p.into_config(c))) + Ok(OutputProviderConfig::Postgres(p.into_provider(c)?)) } (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { - Ok(OutputProviderConfig::Mysql(p.into_config(c))) + Ok(OutputProviderConfig::Mysql(p.into_provider(c)?)) } (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => { - Ok(OutputProviderConfig::Qdrant(p.into_config(c))) + Ok(OutputProviderConfig::Qdrant(p.into_provider(c)?)) } (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => { - Ok(OutputProviderConfig::Pinecone(p.into_config(c))) + Ok(OutputProviderConfig::Pinecone(p.into_provider(c)?)) } (Self::Milvus(p), ProviderCredentials::Milvus(c)) => { - Ok(OutputProviderConfig::Milvus(p.into_config(c))) + Ok(OutputProviderConfig::Milvus(p.into_provider(c)?)) } (Self::PgVector(p), ProviderCredentials::PgVector(c)) => { - Ok(OutputProviderConfig::PgVector(p.into_config(c))) + Ok(OutputProviderConfig::PgVector(p.into_provider(c)?)) } (params, creds) => Err(WorkflowError::Internal(format!( "credentials type mismatch: expected '{}', got '{}'", diff --git a/crates/nvisy-server/src/handler/request/pipelines.rs b/crates/nvisy-server/src/handler/request/pipelines.rs index f3966d3..2a6d399 100644 --- a/crates/nvisy-server/src/handler/request/pipelines.rs +++ b/crates/nvisy-server/src/handler/request/pipelines.rs @@ -6,7 +6,7 @@ use nvisy_postgres::model::{NewPipeline, UpdatePipeline as UpdatePipelineModel}; use nvisy_postgres::types::PipelineStatus; -use nvisy_runtime::graph::WorkflowGraph; +use nvisy_runtime::graph::WorkflowDefinition; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -50,7 +50,7 @@ impl CreatePipeline { /// Request payload to update an existing pipeline. /// /// All fields are optional; only provided fields will be updated. -/// The definition field accepts a strictly typed WorkflowGraph. +/// The definition field accepts a strictly typed WorkflowDefinition. #[must_use] #[derive(Debug, Default, Serialize, Deserialize, JsonSchema, Validate)] #[serde(rename_all = "camelCase")] @@ -63,9 +63,9 @@ pub struct UpdatePipeline { pub description: Option, /// New status for the pipeline. pub status: Option, - /// New definition for the pipeline (strictly typed workflow graph). + /// New definition for the pipeline (strictly typed workflow definition). #[schemars(with = "Option")] - pub definition: Option, + pub definition: Option, } impl UpdatePipeline { @@ -76,7 +76,7 @@ impl UpdatePipeline { description: self.description.map(Some), status: self.status, definition: self.definition.map(|d| { - serde_json::to_value(d).expect("WorkflowGraph serialization should not fail") + serde_json::to_value(d).expect("WorkflowDefinition serialization should not fail") }), ..Default::default() } diff --git a/crates/nvisy-server/src/handler/response/pipelines.rs b/crates/nvisy-server/src/handler/response/pipelines.rs index 316a1e4..bda56cc 100644 --- a/crates/nvisy-server/src/handler/response/pipelines.rs +++ b/crates/nvisy-server/src/handler/response/pipelines.rs @@ -3,7 +3,7 @@ use jiff::Timestamp; use nvisy_postgres::model; use nvisy_postgres::types::PipelineStatus; -use nvisy_runtime::graph::WorkflowGraph; +use nvisy_runtime::graph::WorkflowDefinition; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -29,7 +29,7 @@ pub struct Pipeline { pub status: PipelineStatus, /// Pipeline definition (workflow graph). #[schemars(with = "serde_json::Value")] - pub definition: WorkflowGraph, + pub definition: WorkflowDefinition, /// Timestamp when the pipeline was created. pub created_at: Timestamp, /// Timestamp when the pipeline was last updated. @@ -39,7 +39,7 @@ pub struct Pipeline { impl Pipeline { /// Creates a new instance of [`Pipeline`] from the database model. pub fn from_model(pipeline: model::Pipeline) -> Self { - let definition: WorkflowGraph = + let definition: WorkflowDefinition = serde_json::from_value(pipeline.definition).unwrap_or_default(); Self { pipeline_id: pipeline.id, From 2629c3884b217670a6ed7345cd2c7f1fccfc0199 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Thu, 22 Jan 2026 13:13:28 +0100 Subject: [PATCH 16/28] feat(runtime): split graph module into definition and compiled types - Add definition/ module with serializable, frontend-friendly types: - WorkflowDefinition, NodeDef, InputDef, OutputDef, SwitchDef - CacheSlot for inter-node data passing - Edge, EdgeData, NodeId, WorkflowMetadata - Add compiled/ module with runtime execution types: - CompiledGraph, CompiledNode, CompiledInput, CompiledOutput - CompiledTransform with processor structs for each transform type - CompiledSwitch for runtime routing - InputStream, OutputStream, DataStream, DataSink - Add compiler.rs for WorkflowDefinition -> CompiledGraph compilation: - Cache slot resolution - Credential lookup and provider instantiation - Graph validation - Refactor provider backends to return providers directly: - Make IntoProvider trait async using async_trait - Remove intermediate Config types - Simplify flow: Params + Credentials -> Provider - Remove legacy graph modules: - graph/input/, graph/output/, graph/route/, graph/workflow/ - graph/core.rs (WorkflowGraph) - Legacy executor methods - Update engine executor to use only CompiledGraph BREAKING CHANGE: WorkflowGraph removed, use WorkflowDefinition + compiler --- crates/nvisy-dal/src/provider/azblob/mod.rs | 8 +- crates/nvisy-dal/src/provider/gcs/mod.rs | 8 +- crates/nvisy-dal/src/provider/s3/mod.rs | 8 +- crates/nvisy-runtime/README.md | 110 +++-- crates/nvisy-runtime/src/engine/context.rs | 36 +- crates/nvisy-runtime/src/engine/executor.rs | 242 ++++------ crates/nvisy-runtime/src/error.rs | 4 +- .../nvisy-runtime/src/graph/compiled/graph.rs | 187 ++++++++ .../nvisy-runtime/src/graph/compiled/input.rs | 36 ++ .../nvisy-runtime/src/graph/compiled/mod.rs | 29 ++ .../nvisy-runtime/src/graph/compiled/node.rs | 140 ++++++ .../src/graph/compiled/output.rs | 36 ++ .../nvisy-runtime/src/graph/compiled/route.rs | 200 ++++++++ .../src/graph/compiled/stream.rs | 216 +++++++++ .../src/graph/compiled/transform.rs | 362 +++++++++++++++ crates/nvisy-runtime/src/graph/compiler.rs | 413 +++++++++++++++++ .../graph/{workflow => definition}/edge.rs | 2 +- .../src/graph/definition/input.rs | 60 +++ .../{workflow => definition}/metadata.rs | 0 .../nvisy-runtime/src/graph/definition/mod.rs | 30 ++ .../graph/{workflow => definition}/node.rs | 45 +- .../src/graph/definition/output.rs | 60 +++ .../{route/switch.rs => definition/route.rs} | 111 +++-- .../src/graph/definition/transform.rs | 7 + .../src/graph/definition/workflow.rs | 294 ++++++++++++ crates/nvisy-runtime/src/graph/graph.rs | 432 ------------------ crates/nvisy-runtime/src/graph/input/mod.rs | 82 ---- crates/nvisy-runtime/src/graph/mod.rs | 55 ++- crates/nvisy-runtime/src/graph/output/mod.rs | 82 ---- crates/nvisy-runtime/src/graph/route/cache.rs | 33 -- crates/nvisy-runtime/src/graph/route/mod.rs | 11 - .../src/graph/transform/chunk.rs | 23 +- .../src/graph/transform/derive.rs | 23 +- .../src/graph/transform/embedding.rs | 23 +- .../src/graph/transform/enrich.rs | 23 +- .../src/graph/transform/extract.rs | 23 +- .../nvisy-runtime/src/graph/transform/mod.rs | 72 ++- .../src/graph/transform/partition.rs | 23 +- .../src/graph/workflow/definition.rs | 80 ---- .../nvisy-runtime/src/graph/workflow/mod.rs | 18 - crates/nvisy-runtime/src/lib.rs | 2 +- crates/nvisy-runtime/src/provider/ai.rs | 28 +- .../src/provider/backend/anthropic.rs | 8 +- .../src/provider/backend/azblob.rs | 11 +- .../src/provider/backend/cohere.rs | 14 +- .../nvisy-runtime/src/provider/backend/gcs.rs | 11 +- .../src/provider/backend/gemini.rs | 14 +- .../src/provider/backend/milvus.rs | 13 +- .../nvisy-runtime/src/provider/backend/mod.rs | 41 +- .../src/provider/backend/mysql.rs | 11 +- .../src/provider/backend/openai.rs | 14 +- .../src/provider/backend/perplexity.rs | 8 +- .../src/provider/backend/pgvector.rs | 15 +- .../src/provider/backend/pinecone.rs | 13 +- .../src/provider/backend/postgres.rs | 11 +- .../src/provider/backend/qdrant.rs | 13 +- .../nvisy-runtime/src/provider/backend/s3.rs | 11 +- crates/nvisy-runtime/src/provider/inputs.rs | 97 ++-- crates/nvisy-runtime/src/provider/mod.rs | 12 +- crates/nvisy-runtime/src/provider/outputs.rs | 193 +++++--- crates/nvisy-runtime/src/provider/registry.rs | 15 +- 61 files changed, 2865 insertions(+), 1337 deletions(-) create mode 100644 crates/nvisy-runtime/src/graph/compiled/graph.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/input.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/mod.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/node.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/output.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/route.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/stream.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/transform.rs create mode 100644 crates/nvisy-runtime/src/graph/compiler.rs rename crates/nvisy-runtime/src/graph/{workflow => definition}/edge.rs (97%) create mode 100644 crates/nvisy-runtime/src/graph/definition/input.rs rename crates/nvisy-runtime/src/graph/{workflow => definition}/metadata.rs (100%) create mode 100644 crates/nvisy-runtime/src/graph/definition/mod.rs rename crates/nvisy-runtime/src/graph/{workflow => definition}/node.rs (75%) create mode 100644 crates/nvisy-runtime/src/graph/definition/output.rs rename crates/nvisy-runtime/src/graph/{route/switch.rs => definition/route.rs} (67%) create mode 100644 crates/nvisy-runtime/src/graph/definition/transform.rs create mode 100644 crates/nvisy-runtime/src/graph/definition/workflow.rs delete mode 100644 crates/nvisy-runtime/src/graph/graph.rs delete mode 100644 crates/nvisy-runtime/src/graph/input/mod.rs delete mode 100644 crates/nvisy-runtime/src/graph/output/mod.rs delete mode 100644 crates/nvisy-runtime/src/graph/route/cache.rs delete mode 100644 crates/nvisy-runtime/src/graph/route/mod.rs delete mode 100644 crates/nvisy-runtime/src/graph/workflow/definition.rs delete mode 100644 crates/nvisy-runtime/src/graph/workflow/mod.rs diff --git a/crates/nvisy-dal/src/provider/azblob/mod.rs b/crates/nvisy-dal/src/provider/azblob/mod.rs index 78d0c33..a0300ec 100644 --- a/crates/nvisy-dal/src/provider/azblob/mod.rs +++ b/crates/nvisy-dal/src/provider/azblob/mod.rs @@ -71,10 +71,10 @@ impl DataInput for AzblobProvider { match op.read(&path).await { Ok(data) => { let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await { - if let Some(ct) = meta.content_type() { - blob = blob.with_content_type(ct); - } + if let Ok(meta) = op.stat(&path).await + && let Some(ct) = meta.content_type() + { + blob = blob.with_content_type(ct); } Some(Ok(blob)) } diff --git a/crates/nvisy-dal/src/provider/gcs/mod.rs b/crates/nvisy-dal/src/provider/gcs/mod.rs index a765aad..9746bdd 100644 --- a/crates/nvisy-dal/src/provider/gcs/mod.rs +++ b/crates/nvisy-dal/src/provider/gcs/mod.rs @@ -65,10 +65,10 @@ impl DataInput for GcsProvider { match op.read(&path).await { Ok(data) => { let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await { - if let Some(ct) = meta.content_type() { - blob = blob.with_content_type(ct); - } + if let Ok(meta) = op.stat(&path).await + && let Some(ct) = meta.content_type() + { + blob = blob.with_content_type(ct); } Some(Ok(blob)) } diff --git a/crates/nvisy-dal/src/provider/s3/mod.rs b/crates/nvisy-dal/src/provider/s3/mod.rs index 411814f..e05e757 100644 --- a/crates/nvisy-dal/src/provider/s3/mod.rs +++ b/crates/nvisy-dal/src/provider/s3/mod.rs @@ -75,10 +75,10 @@ impl DataInput for S3Provider { match op.read(&path).await { Ok(data) => { let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await { - if let Some(ct) = meta.content_type() { - blob = blob.with_content_type(ct); - } + if let Ok(meta) = op.stat(&path).await + && let Some(ct) = meta.content_type() + { + blob = blob.with_content_type(ct); } Some(Ok(blob)) } diff --git a/crates/nvisy-runtime/README.md b/crates/nvisy-runtime/README.md index 5ec8383..99fee4b 100644 --- a/crates/nvisy-runtime/README.md +++ b/crates/nvisy-runtime/README.md @@ -7,63 +7,77 @@ data processing workflows as directed acyclic graphs (DAGs). ## Architecture -Workflows are represented as graphs with three types of nodes: +Workflows are represented as graphs with four types of nodes: -- **Source nodes**: Read or produce data (entry points) -- **Transformer nodes**: Process or transform data (intermediate) -- **Sink nodes**: Write or consume data (exit points) +- **Input nodes**: Read or produce data (entry points) +- **Transform nodes**: Process or transform data (intermediate) +- **Output nodes**: Write or consume data (exit points) +- **Switch nodes**: Route data conditionally based on properties + +### Definition vs Compiled Types + +The crate separates workflow representation into two layers: + +- **Definition types** (`graph::definition`): JSON-serializable types for + storing, editing, and transmitting workflows. These include `WorkflowDefinition`, + `NodeDef`, `InputDef`, `OutputDef`, and `CacheSlot`. + +- **Compiled types** (`graph::compiled`): Runtime-optimized types for execution. + These include `CompiledGraph`, `CompiledNode`, and processor types like + `EmbeddingProcessor` and `EnrichProcessor`. + +Use the `graph::compiler` module to transform definitions into executable graphs. ## Example -```rust -use nvisy_runtime::prelude::*; +```rust,ignore +use nvisy_runtime::graph::definition::{ + InputDef, NodeDef, OutputDef, WorkflowDefinition, +}; +use nvisy_runtime::graph::compiler::compile; +use nvisy_runtime::engine::Engine; +use nvisy_runtime::provider::CredentialsRegistry; -// Create a workflow graph -let mut graph = WorkflowGraph::new(); +// Create a workflow definition +let mut workflow = WorkflowDefinition::new(); -// Add nodes -let source = graph.add_node(SourceNode::new("s3_input", SourceKind::S3)); -let transform = graph.add_node(TransformerNode::new("extract_text", TransformerKind::ExtractText)); -let sink = graph.add_node(SinkNode::new("store_output", SinkKind::Database)); +// Add input, transform, and output nodes... +// Connect nodes with edges... -// Connect nodes -graph.connect(source, transform).unwrap(); -graph.connect(transform, sink).unwrap(); +// Compile the definition +let registry = CredentialsRegistry::default(); +let ctx = nvisy_dal::core::Context::default(); +let compiled = compile(workflow, ®istry, ctx).await?; -// Validate the workflow -graph.validate().unwrap(); +// Execute the compiled graph +let engine = Engine::with_defaults(); +let result = engine.execute_compiled(compiled, registry).await?; ``` ## Node Types -### Source Nodes -- `S3` - Amazon S3 compatible storage -- `Gcs` - Google Cloud Storage -- `AzureBlob` - Azure Blob Storage -- `GoogleDrive` - Google Drive -- `Dropbox` - Dropbox cloud storage -- `OneDrive` - Microsoft OneDrive -- `HttpUpload` - Receive files from HTTP upload -- `ApiEndpoint` - Fetch from an external API - -### Transformer Nodes -- `ExtractText` - Extract text from documents -- `ChunkContent` - Split content into chunks -- `GenerateEmbeddings` - Generate vector embeddings -- `LlmTransform` - Transform using an LLM -- `ConvertFormat` - Convert file format -- `Validate` - Validate content against schema -- `Filter` - Filter data based on conditions -- `Merge` - Merge multiple inputs - -### Sink Nodes -- `S3` - Amazon S3 compatible storage -- `Gcs` - Google Cloud Storage -- `AzureBlob` - Azure Blob Storage -- `GoogleDrive` - Google Drive -- `Dropbox` - Dropbox cloud storage -- `OneDrive` - Microsoft OneDrive -- `Database` - Store in database -- `VectorStore` - Store vector embeddings -- `Webhook` - Send to webhook -- `ApiEndpoint` - Send to external API +### Input Nodes +Input nodes read data from external sources: +- Amazon S3, Google Cloud Storage, Azure Blob Storage +- PostgreSQL, MySQL databases + +### Transform Nodes +- `Partition` - Extract elements from documents +- `Chunk` - Split content into smaller chunks +- `Embedding` - Generate vector embeddings +- `Enrich` - Add metadata/descriptions using LLMs +- `Extract` - Extract structured data or convert formats +- `Derive` - Generate new content (summaries, titles) + +### Output Nodes +Output nodes write data to external destinations: +- Amazon S3, Google Cloud Storage, Azure Blob Storage +- PostgreSQL, MySQL databases +- Qdrant, Pinecone, Milvus, pgvector (vector databases) + +### Switch Nodes +Route data based on conditions: +- Content type (image, document, text, etc.) +- File size thresholds +- Metadata presence/values +- File name patterns diff --git a/crates/nvisy-runtime/src/engine/context.rs b/crates/nvisy-runtime/src/engine/context.rs index 5920d8a..09fc2c9 100644 --- a/crates/nvisy-runtime/src/engine/context.rs +++ b/crates/nvisy-runtime/src/engine/context.rs @@ -1,7 +1,5 @@ //! Execution context for workflow runs. -use std::collections::HashMap; - use derive_builder::Builder; use nvisy_dal::AnyDataValue; @@ -9,9 +7,8 @@ use crate::provider::CredentialsRegistry; /// Execution context for a workflow run. /// -/// Manages the current data items flowing through the pipeline, holds -/// credentials for provider access, and provides named cache slots for -/// data sharing between workflow branches. +/// Manages the current data items flowing through the pipeline and holds +/// credentials for provider access. /// /// A single input can produce multiple outputs (e.g., 1 document → 1000 embeddings), /// so the context holds a `Vec` of values at each stage. @@ -27,9 +24,6 @@ pub struct ExecutionContext { /// Current data items being processed (can expand: 1 input → N outputs). #[builder(default)] current: Vec, - /// Named cache slots for data sharing between workflow branches. - #[builder(default)] - cache: HashMap>, /// Total input items processed in this execution. #[builder(default)] items_processed: usize, @@ -50,7 +44,6 @@ impl ExecutionContext { Self { credentials, current: Vec::new(), - cache: HashMap::new(), items_processed: 0, } } @@ -109,29 +102,4 @@ impl ExecutionContext { pub fn clear(&mut self) { self.current.clear(); } - - /// Writes data to a named cache slot. - pub fn write_cache(&mut self, name: &str, data: Vec) { - self.cache.entry(name.to_string()).or_default().extend(data); - } - - /// Reads data from a named cache slot (returns empty vec if not found). - pub fn read_cache(&self, name: &str) -> Vec { - self.cache.get(name).cloned().unwrap_or_default() - } - - /// Clears a named cache slot. - pub fn clear_cache(&mut self, name: &str) { - self.cache.remove(name); - } - - /// Clears all cache slots. - pub fn clear_all_caches(&mut self) { - self.cache.clear(); - } - - /// Returns the names of all cache slots. - pub fn cache_names(&self) -> Vec<&str> { - self.cache.keys().map(|s| s.as_str()).collect() - } } diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index 784a498..dedb0d8 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -2,14 +2,15 @@ use std::sync::Arc; -use nvisy_dal::core::Context; +use futures::{SinkExt, StreamExt}; use tokio::sync::Semaphore; use super::EngineConfig; use super::context::ExecutionContext; -use crate::error::{WorkflowError, WorkflowResult}; -use crate::graph::{InputSource, NodeData, NodeId, OutputDestination, WorkflowGraph}; -use crate::provider::{CredentialsRegistry, InputProvider, IntoProvider, OutputProvider}; +use crate::error::{Error, Result}; +use crate::graph::NodeId; +use crate::graph::compiled::{CompiledGraph, CompiledNode, InputStream, OutputStream}; +use crate::provider::CredentialsRegistry; /// Tracing target for engine operations. const TRACING_TARGET: &str = "nvisy_workflow::engine"; @@ -49,37 +50,28 @@ impl Engine { &self.config } - /// Validates a workflow graph against a credentials registry. + /// Executes a pre-compiled workflow graph. /// - /// Checks graph structure, constraints, and that all referenced - /// credentials exist in the registry. - pub fn validate( - &self, - workflow: &WorkflowGraph, - registry: &CredentialsRegistry, - ) -> WorkflowResult<()> { - workflow.validate(registry) - } - - /// Executes a workflow graph with the given credentials. + /// The graph should be compiled using [`crate::graph::compiler::compile`] + /// before execution. /// /// Execution is pipe-based: items are read from inputs one at a time, /// flow through all transformers, and are written to outputs before /// the next item is processed. pub async fn execute( &self, - workflow: &WorkflowGraph, + mut graph: CompiledGraph, credentials: CredentialsRegistry, - ) -> WorkflowResult { + ) -> Result { let _permit = self .semaphore .acquire() .await - .map_err(|e| WorkflowError::Internal(format!("semaphore closed: {}", e)))?; + .map_err(|e| Error::Internal(format!("semaphore closed: {}", e)))?; - workflow.validate(&credentials)?; - - let order = workflow.topological_order()?; + let order = graph + .topological_order() + .ok_or_else(|| Error::InvalidDefinition("compiled graph contains a cycle".into()))?; tracing::debug!( target: TRACING_TARGET, @@ -89,12 +81,8 @@ impl Engine { let mut ctx = ExecutionContext::new(credentials); - // Build the pipeline: create providers for input and output nodes - let pipeline = self.build_pipeline(workflow, &order, &ctx).await?; - - // Execute the pipeline: stream items through - self.execute_pipeline(workflow, &order, &pipeline, &mut ctx) - .await?; + // Execute the compiled pipeline + self.execute_pipeline(&mut graph, &order, &mut ctx).await?; tracing::debug!( target: TRACING_TARGET, @@ -105,121 +93,96 @@ impl Engine { Ok(ctx) } - /// Builds the pipeline by creating providers for input and output nodes. - async fn build_pipeline( + /// Executes a compiled pipeline by streaming items through. + async fn execute_pipeline( &self, - workflow: &WorkflowGraph, + graph: &mut CompiledGraph, order: &[NodeId], - ctx: &ExecutionContext, - ) -> WorkflowResult { - let mut inputs = Vec::new(); - let mut outputs = Vec::new(); - - for node_id in order { - let Some(node) = workflow.get_node(*node_id) else { - continue; - }; - - match node { - NodeData::Input(input_node) => { - let input = match &input_node.source { - InputSource::Provider(params) => { - let credentials_id = params.credentials_id(); - let credentials = ctx.credentials().get(credentials_id)?.clone(); - let config = params.clone().into_provider(credentials)?; - let provider = config.into_provider()?; - PipelineInput::Provider(provider) - } - InputSource::Cache(slot) => PipelineInput::Cache(slot.slot.clone()), - }; - inputs.push((*node_id, input)); - } - NodeData::Output(output_node) => { - let output = match &output_node.destination { - OutputDestination::Provider(params) => { - let credentials_id = params.credentials_id(); - let credentials = ctx.credentials().get(credentials_id)?.clone(); - let config = params.clone().into_provider(credentials)?; - let provider = config.into_provider().await?; - PipelineOutput::Provider(provider) - } - OutputDestination::Cache(slot) => PipelineOutput::Cache(slot.slot.clone()), - }; - outputs.push((*node_id, output)); - } - NodeData::Transformer(_) => { - // Transformers don't need pre-built providers - } + ctx: &mut ExecutionContext, + ) -> Result<()> { + // Collect input and output node IDs + let input_ids: Vec = order + .iter() + .filter(|id| graph.node(id).map(|n| n.is_input()).unwrap_or(false)) + .copied() + .collect(); + + let output_ids: Vec = order + .iter() + .filter(|id| graph.node(id).map(|n| n.is_output()).unwrap_or(false)) + .copied() + .collect(); + + let transform_ids: Vec = order + .iter() + .filter(|id| graph.node(id).map(|n| n.is_transform()).unwrap_or(false)) + .copied() + .collect(); + + // Take ownership of input streams + let mut input_streams: Vec<(NodeId, InputStream)> = Vec::new(); + for id in &input_ids { + if let Some(node) = graph.node_mut(id) + && let CompiledNode::Input(compiled_input) = node + { + // Create a placeholder stream and swap with the real one + let placeholder = InputStream::new(Box::pin(futures::stream::empty())); + let stream = std::mem::replace(compiled_input.stream_mut(), placeholder); + input_streams.push((*id, stream)); } } - Ok(Pipeline { inputs, outputs }) - } + // Take ownership of output streams + let mut output_streams: Vec<(NodeId, OutputStream)> = Vec::new(); + for id in &output_ids { + if let Some(CompiledNode::Output(compiled_output)) = graph.node_mut(id) { + // Create a placeholder sink + let placeholder = OutputStream::new(Box::pin(futures::sink::drain().sink_map_err( + |_: std::convert::Infallible| Error::Internal("drain sink error".into()), + ))); + let stream = std::mem::replace(compiled_output.stream_mut(), placeholder); + output_streams.push((*id, stream)); + } + } - /// Executes the pipeline by streaming items through. - /// - /// For each input item: - /// 1. Set as current (single item) - /// 2. Run through transformers (can expand: 1 item → N items) - /// 3. Write all resulting items to outputs - async fn execute_pipeline( - &self, - workflow: &WorkflowGraph, - order: &[NodeId], - pipeline: &Pipeline, - ctx: &mut ExecutionContext, - ) -> WorkflowResult<()> { - // For each input, stream items through the pipeline - for (input_node_id, input) in &pipeline.inputs { + // Process each input stream + for (input_node_id, mut input_stream) in input_streams { tracing::debug!( target: TRACING_TARGET, node_id = %input_node_id, - "Reading from input" + "Reading from input stream" ); - let items = match input { - PipelineInput::Provider(provider) => { - let dal_ctx = Context::default(); - provider.read(&dal_ctx).await? - } - PipelineInput::Cache(name) => ctx.read_cache(name), - }; + while let Some(result) = input_stream.next().await { + let item = result?; - // Process each input item through the pipeline - for item in items { // Start with single input item ctx.set_current_single(item); - // Execute transformers in order (each can expand 1→N) - for node_id in order { - let Some(node) = workflow.get_node(*node_id) else { - continue; - }; - - if let NodeData::Transformer(transformer_node) = node { - self.execute_transformer(*node_id, transformer_node, ctx)?; + // Execute transforms in order + for transform_id in &transform_ids { + if let Some(node) = graph.node(transform_id) + && let Some(transform) = node.as_transform() + { + let input_data = ctx.take_current(); + let output_data = transform.process(input_data).await?; + ctx.set_current(output_data); } } - // Write all resulting items to outputs + // Write to outputs let output_data = ctx.take_current(); if !output_data.is_empty() { - for (output_node_id, output) in &pipeline.outputs { + for (output_node_id, output_stream) in &mut output_streams { tracing::trace!( target: TRACING_TARGET, node_id = %output_node_id, item_count = output_data.len(), - "Writing to output" + "Writing to output stream" ); - match output { - PipelineOutput::Provider(provider) => { - let dal_ctx = Context::default(); - provider.write(&dal_ctx, output_data.clone()).await?; - } - PipelineOutput::Cache(name) => { - ctx.write_cache(name, output_data.clone()); - } + for item in output_data.clone() { + output_stream.send(item).await?; } } } @@ -229,25 +192,10 @@ impl Engine { } } - Ok(()) - } - - /// Executes a transformer node on the current data. - fn execute_transformer( - &self, - node_id: NodeId, - _transformer_config: &crate::graph::TransformerConfig, - ctx: &mut ExecutionContext, - ) -> WorkflowResult<()> { - // TODO: Apply transformation based on transformer_node.config - // For now, pass through data unchanged - - tracing::trace!( - target: TRACING_TARGET, - node_id = %node_id, - has_data = ctx.has_current(), - "Transformer node executed (passthrough)" - ); + // Close all output streams + for (_, mut output_stream) in output_streams { + output_stream.close().await?; + } Ok(()) } @@ -258,28 +206,6 @@ impl Engine { } } -/// Pre-built pipeline with inputs and outputs ready for execution. -struct Pipeline { - inputs: Vec<(NodeId, PipelineInput)>, - outputs: Vec<(NodeId, PipelineOutput)>, -} - -/// Input source in the pipeline. -enum PipelineInput { - /// Read from a storage provider. - Provider(InputProvider), - /// Read from a named cache slot. - Cache(String), -} - -/// Output destination in the pipeline. -enum PipelineOutput { - /// Write to a storage provider. - Provider(OutputProvider), - /// Write to a named cache slot. - Cache(String), -} - impl std::fmt::Debug for Engine { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Engine") diff --git a/crates/nvisy-runtime/src/error.rs b/crates/nvisy-runtime/src/error.rs index 4c23a88..45a0427 100644 --- a/crates/nvisy-runtime/src/error.rs +++ b/crates/nvisy-runtime/src/error.rs @@ -6,11 +6,11 @@ use uuid::Uuid; use crate::graph::NodeId; /// Result type for workflow operations. -pub type WorkflowResult = Result; +pub type Result = std::result::Result; /// Errors that can occur during workflow operations. #[derive(Debug, Error)] -pub enum WorkflowError { +pub enum Error { /// Workflow definition is invalid. #[error("invalid workflow definition: {0}")] InvalidDefinition(String), diff --git a/crates/nvisy-runtime/src/graph/compiled/graph.rs b/crates/nvisy-runtime/src/graph/compiled/graph.rs new file mode 100644 index 0000000..56ce789 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/graph.rs @@ -0,0 +1,187 @@ +//! Compiled workflow graph. + +use std::collections::HashMap; + +use petgraph::Direction; +use petgraph::graph::{DiGraph, NodeIndex}; + +use super::input::CompiledInput; +use super::node::CompiledNode; +use super::output::CompiledOutput; +use super::route::CompiledSwitch; +use crate::graph::definition::{EdgeData, NodeId, WorkflowMetadata}; + +/// A compiled workflow graph ready for execution. +/// +/// This is the runtime representation of a workflow after compilation. +/// All cache slots are resolved into direct edges, and all node definitions +/// are compiled into their executable forms. +pub struct CompiledGraph { + /// The underlying directed graph. + graph: DiGraph, + /// Map from node IDs to graph indices. + node_indices: HashMap, + /// Map from graph indices to node IDs. + index_to_id: HashMap, + /// Workflow metadata. + metadata: WorkflowMetadata, +} + +impl CompiledGraph { + /// Creates a new compiled graph. + pub fn new( + graph: DiGraph, + node_indices: HashMap, + metadata: WorkflowMetadata, + ) -> Self { + let index_to_id = node_indices.iter().map(|(k, v)| (*v, *k)).collect(); + Self { + graph, + node_indices, + index_to_id, + metadata, + } + } + + /// Returns the workflow metadata. + pub fn metadata(&self) -> &WorkflowMetadata { + &self.metadata + } + + /// Returns the number of nodes in the graph. + pub fn node_count(&self) -> usize { + self.graph.node_count() + } + + /// Returns the number of edges in the graph. + pub fn edge_count(&self) -> usize { + self.graph.edge_count() + } + + /// Returns a reference to a node by ID. + pub fn node(&self, id: &NodeId) -> Option<&CompiledNode> { + self.node_indices + .get(id) + .and_then(|&idx| self.graph.node_weight(idx)) + } + + /// Returns a mutable reference to a node by ID. + pub fn node_mut(&mut self, id: &NodeId) -> Option<&mut CompiledNode> { + self.node_indices + .get(id) + .copied() + .and_then(|idx| self.graph.node_weight_mut(idx)) + } + + /// Returns the node ID for a graph index. + pub fn node_id(&self, index: NodeIndex) -> Option { + self.index_to_id.get(&index).copied() + } + + /// Returns the graph index for a node ID. + pub fn node_index(&self, id: &NodeId) -> Option { + self.node_indices.get(id).copied() + } + + /// Returns an iterator over all node IDs. + pub fn node_ids(&self) -> impl Iterator { + self.node_indices.keys() + } + + /// Returns an iterator over all nodes with their IDs. + pub fn nodes(&self) -> impl Iterator { + self.node_indices + .iter() + .filter_map(|(id, &idx)| self.graph.node_weight(idx).map(|node| (id, node))) + } + + /// Returns an iterator over input nodes. + pub fn input_nodes(&self) -> impl Iterator { + self.nodes() + .filter_map(|(id, node)| node.as_input().map(|input| (id, input))) + } + + /// Returns an iterator over output nodes. + pub fn output_nodes(&self) -> impl Iterator { + self.nodes() + .filter_map(|(id, node)| node.as_output().map(|output| (id, output))) + } + + /// Returns the predecessors (incoming nodes) of a node. + pub fn predecessors(&self, id: &NodeId) -> impl Iterator { + self.node_indices.get(id).into_iter().flat_map(|&idx| { + self.graph + .neighbors_directed(idx, Direction::Incoming) + .filter_map(|pred_idx| self.index_to_id.get(&pred_idx)) + }) + } + + /// Returns the successors (outgoing nodes) of a node. + pub fn successors(&self, id: &NodeId) -> impl Iterator { + self.node_indices.get(id).into_iter().flat_map(|&idx| { + self.graph + .neighbors_directed(idx, Direction::Outgoing) + .filter_map(|succ_idx| self.index_to_id.get(&succ_idx)) + }) + } + + /// Returns the edge data between two nodes, if an edge exists. + pub fn edge(&self, from: &NodeId, to: &NodeId) -> Option<&EdgeData> { + let from_idx = self.node_indices.get(from)?; + let to_idx = self.node_indices.get(to)?; + self.graph + .find_edge(*from_idx, *to_idx) + .and_then(|e| self.graph.edge_weight(e)) + } + + /// Returns topologically sorted node IDs (sources first). + /// + /// Returns `None` if the graph contains a cycle. + pub fn topological_order(&self) -> Option> { + petgraph::algo::toposort(&self.graph, None) + .ok() + .map(|indices| { + indices + .into_iter() + .filter_map(|idx| self.index_to_id.get(&idx).copied()) + .collect() + }) + } + + /// Consumes the graph and returns ownership of all nodes. + /// + /// Returns a map from node IDs to compiled nodes. + pub fn into_nodes(mut self) -> HashMap { + let mut nodes = HashMap::with_capacity(self.node_indices.len()); + for (id, &idx) in &self.node_indices { + if let Some(node) = self.graph.node_weight_mut(idx) { + // Use mem::replace with a placeholder to take ownership + // This is safe because we won't access the graph again + let placeholder = CompiledNode::Switch(CompiledSwitch::new(vec![], None)); + let owned = std::mem::replace(node, placeholder); + nodes.insert(*id, owned); + } + } + nodes + } + + /// Returns a reference to the underlying petgraph. + pub fn inner(&self) -> &DiGraph { + &self.graph + } + + /// Returns a mutable reference to the underlying petgraph. + pub fn inner_mut(&mut self) -> &mut DiGraph { + &mut self.graph + } +} + +impl std::fmt::Debug for CompiledGraph { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompiledGraph") + .field("node_count", &self.graph.node_count()) + .field("edge_count", &self.graph.edge_count()) + .field("metadata", &self.metadata) + .finish() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/input.rs b/crates/nvisy-runtime/src/graph/compiled/input.rs new file mode 100644 index 0000000..e008f22 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/input.rs @@ -0,0 +1,36 @@ +//! Compiled input node types. + +use super::stream::InputStream; + +/// Compiled input node - ready to stream data. +/// +/// This is the runtime representation of an input node after compilation. +/// Cache slots are resolved during compilation, so compiled inputs always +/// wrap concrete input streams. +#[derive(Debug)] +pub struct CompiledInput { + /// The input stream for reading data. + stream: InputStream, +} + +impl CompiledInput { + /// Creates a new compiled input from an input stream. + pub fn new(stream: InputStream) -> Self { + Self { stream } + } + + /// Returns a reference to the input stream. + pub fn stream(&self) -> &InputStream { + &self.stream + } + + /// Returns a mutable reference to the input stream. + pub fn stream_mut(&mut self) -> &mut InputStream { + &mut self.stream + } + + /// Consumes this compiled input and returns the underlying stream. + pub fn into_stream(self) -> InputStream { + self.stream + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/mod.rs b/crates/nvisy-runtime/src/graph/compiled/mod.rs new file mode 100644 index 0000000..c3d65c1 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/mod.rs @@ -0,0 +1,29 @@ +//! Compiled workflow types for execution. +//! +//! This module contains runtime-optimized types for executing workflows. +//! These types are created by compiling workflow definitions and are +//! optimized for: +//! - Fast execution without lookups +//! - Pre-resolved cache slots +//! - Pre-instantiated providers and agents +//! +//! To create compiled types, use the [`crate::graph::compiler`] module. + +mod graph; +mod input; +mod node; +mod output; +mod route; +mod stream; +mod transform; + +pub use graph::CompiledGraph; +pub use input::CompiledInput; +pub use node::CompiledNode; +pub use output::CompiledOutput; +pub use route::CompiledSwitch; +pub use stream::{DataSink, DataStream, InputStream, OutputStream}; +pub use transform::{ + ChunkProcessor, CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, + ExtractProcessor, PartitionProcessor, +}; diff --git a/crates/nvisy-runtime/src/graph/compiled/node.rs b/crates/nvisy-runtime/src/graph/compiled/node.rs new file mode 100644 index 0000000..d848613 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/node.rs @@ -0,0 +1,140 @@ +//! Compiled node types. + +use super::input::CompiledInput; +use super::output::CompiledOutput; +use super::route::CompiledSwitch; +use super::transform::CompiledTransform; + +/// Compiled node enum for workflow execution. +/// +/// This is the runtime representation of a node after compilation. +/// Cache slots are resolved during compilation, so compiled nodes +/// only contain concrete processing types. +#[derive(Debug)] +pub enum CompiledNode { + /// Data input node - ready to stream data. + Input(CompiledInput), + /// Data output node - ready to receive data. + Output(CompiledOutput), + /// Data transform node - ready to process data. + /// Boxed to reduce enum size variance (transform processors are large). + Transform(Box), + /// Conditional routing node - evaluates conditions. + Switch(CompiledSwitch), +} + +impl CompiledNode { + /// Returns whether this is an input node. + pub const fn is_input(&self) -> bool { + matches!(self, CompiledNode::Input(_)) + } + + /// Returns whether this is an output node. + pub const fn is_output(&self) -> bool { + matches!(self, CompiledNode::Output(_)) + } + + /// Returns whether this is a transform node. + pub const fn is_transform(&self) -> bool { + matches!(self, CompiledNode::Transform(_)) + } + + /// Returns whether this is a switch node. + pub const fn is_switch(&self) -> bool { + matches!(self, CompiledNode::Switch(_)) + } + + /// Returns this node as an input, if it is one. + pub fn as_input(&self) -> Option<&CompiledInput> { + match self { + CompiledNode::Input(input) => Some(input), + _ => None, + } + } + + /// Returns this node as an output, if it is one. + pub fn as_output(&self) -> Option<&CompiledOutput> { + match self { + CompiledNode::Output(output) => Some(output), + _ => None, + } + } + + /// Returns this node as a transform, if it is one. + pub fn as_transform(&self) -> Option<&CompiledTransform> { + match self { + CompiledNode::Transform(transform) => Some(transform.as_ref()), + _ => None, + } + } + + /// Returns this node as a switch, if it is one. + pub fn as_switch(&self) -> Option<&CompiledSwitch> { + match self { + CompiledNode::Switch(switch) => Some(switch), + _ => None, + } + } + + /// Consumes this node and returns the input, if it is one. + pub fn into_input(self) -> Option { + match self { + CompiledNode::Input(input) => Some(input), + _ => None, + } + } + + /// Consumes this node and returns the output, if it is one. + pub fn into_output(self) -> Option { + match self { + CompiledNode::Output(output) => Some(output), + _ => None, + } + } + + /// Consumes this node and returns the transform, if it is one. + pub fn into_transform(self) -> Option> { + match self { + CompiledNode::Transform(transform) => Some(transform), + _ => None, + } + } + + /// Consumes this node and returns the switch, if it is one. + pub fn into_switch(self) -> Option { + match self { + CompiledNode::Switch(switch) => Some(switch), + _ => None, + } + } +} + +impl From for CompiledNode { + fn from(input: CompiledInput) -> Self { + CompiledNode::Input(input) + } +} + +impl From for CompiledNode { + fn from(output: CompiledOutput) -> Self { + CompiledNode::Output(output) + } +} + +impl From for CompiledNode { + fn from(transform: CompiledTransform) -> Self { + CompiledNode::Transform(Box::new(transform)) + } +} + +impl From> for CompiledNode { + fn from(transform: Box) -> Self { + CompiledNode::Transform(transform) + } +} + +impl From for CompiledNode { + fn from(switch: CompiledSwitch) -> Self { + CompiledNode::Switch(switch) + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/output.rs b/crates/nvisy-runtime/src/graph/compiled/output.rs new file mode 100644 index 0000000..a4e1682 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/output.rs @@ -0,0 +1,36 @@ +//! Compiled output node types. + +use super::stream::OutputStream; + +/// Compiled output node - ready to receive data. +/// +/// This is the runtime representation of an output node after compilation. +/// Cache slots are resolved during compilation, so compiled outputs always +/// wrap concrete output streams. +#[derive(Debug)] +pub struct CompiledOutput { + /// The output stream for writing data. + stream: OutputStream, +} + +impl CompiledOutput { + /// Creates a new compiled output from an output stream. + pub fn new(stream: OutputStream) -> Self { + Self { stream } + } + + /// Returns a reference to the output stream. + pub fn stream(&self) -> &OutputStream { + &self.stream + } + + /// Returns a mutable reference to the output stream. + pub fn stream_mut(&mut self) -> &mut OutputStream { + &mut self.stream + } + + /// Consumes this compiled output and returns the underlying stream. + pub fn into_stream(self) -> OutputStream { + self.stream + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/route.rs b/crates/nvisy-runtime/src/graph/compiled/route.rs new file mode 100644 index 0000000..890a1f0 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/route.rs @@ -0,0 +1,200 @@ +//! Compiled routing node types. + +use nvisy_dal::AnyDataValue; + +use crate::graph::definition::{ContentTypeCategory, SwitchBranch, SwitchCondition}; + +/// Compiled switch node - ready to route data. +/// +/// Evaluates conditions against input data and determines +/// which branch to route the data to. +#[derive(Debug, Clone)] +pub struct CompiledSwitch { + /// Branches with conditions and targets. + branches: Vec, + /// Default target if no condition matches. + default: Option, +} + +impl CompiledSwitch { + /// Creates a new compiled switch from branches and default target. + pub fn new(branches: Vec, default: Option) -> Self { + Self { branches, default } + } + + /// Returns the branches. + pub fn branches(&self) -> &[SwitchBranch] { + &self.branches + } + + /// Returns the default target. + pub fn default(&self) -> Option<&str> { + self.default.as_deref() + } + + /// Evaluates the switch conditions against input data. + /// + /// Returns the target slot name for routing, or None if no match + /// and no default is configured. + pub fn evaluate(&self, data: &AnyDataValue) -> Option<&str> { + for branch in &self.branches { + if self.evaluate_condition(&branch.condition, data) { + return Some(&branch.target); + } + } + self.default.as_deref() + } + + /// Evaluates a single condition against the data. + fn evaluate_condition(&self, condition: &SwitchCondition, data: &AnyDataValue) -> bool { + match condition { + SwitchCondition::Always => true, + SwitchCondition::ContentType { category } => { + // Check if data matches the content type category + match data { + AnyDataValue::Blob(blob) => { + let mime = blob + .content_type + .as_deref() + .unwrap_or("application/octet-stream"); + match category { + ContentTypeCategory::Image => mime.starts_with("image/"), + ContentTypeCategory::Document => { + mime == "application/pdf" + || mime.starts_with("application/vnd.") + || mime == "application/msword" + } + ContentTypeCategory::Text => { + mime.starts_with("text/") || mime == "application/json" + } + ContentTypeCategory::Audio => mime.starts_with("audio/"), + ContentTypeCategory::Video => mime.starts_with("video/"), + ContentTypeCategory::Spreadsheet => { + mime == "application/vnd.ms-excel" + || mime.contains("spreadsheet") + || mime == "text/csv" + } + ContentTypeCategory::Presentation => { + mime == "application/vnd.ms-powerpoint" + || mime.contains("presentation") + } + ContentTypeCategory::Archive => { + mime == "application/zip" + || mime == "application/x-tar" + || mime == "application/gzip" + } + ContentTypeCategory::Code => { + mime.starts_with("text/x-") + || mime == "application/javascript" + || mime == "application/typescript" + } + } + } + _ => false, + } + } + SwitchCondition::FileSizeAbove { threshold_bytes } => match data { + AnyDataValue::Blob(blob) => blob.data.len() as u64 > *threshold_bytes, + _ => false, + }, + SwitchCondition::FileSizeBelow { threshold_bytes } => match data { + AnyDataValue::Blob(blob) => (blob.data.len() as u64) < *threshold_bytes, + _ => false, + }, + SwitchCondition::HasMetadata { key } => { + // Check if the data has metadata with the given key + match data { + AnyDataValue::Blob(blob) => blob.metadata.contains_key(key), + AnyDataValue::Record(record) => record.columns.contains_key(key), + _ => false, + } + } + SwitchCondition::MetadataEquals { key, value } => { + // Check if metadata key equals value + match data { + AnyDataValue::Blob(blob) => { + blob.metadata.get(key).map(|v| v == value).unwrap_or(false) + } + _ => false, + } + } + // TODO: Implement remaining conditions + SwitchCondition::PageCountAbove { .. } => false, + SwitchCondition::DurationAbove { .. } => false, + SwitchCondition::Language { .. } => false, + SwitchCondition::DateNewerThan { .. } => false, + SwitchCondition::FileNameMatches { pattern } => match data { + AnyDataValue::Blob(blob) => { + // Simple glob-style matching for common patterns + glob_match(pattern, &blob.path) + } + _ => false, + }, + SwitchCondition::FileExtension { extension } => match data { + AnyDataValue::Blob(blob) => blob + .path + .rsplit('.') + .next() + .map(|ext| ext.eq_ignore_ascii_case(extension)) + .unwrap_or(false), + _ => false, + }, + } + } +} + +impl From for CompiledSwitch { + fn from(def: crate::graph::definition::SwitchDef) -> Self { + Self::new(def.branches, def.default) + } +} + +/// Simple glob-style pattern matching. +/// +/// Supports: +/// - `*` matches any sequence of characters (except path separators) +/// - `?` matches any single character +/// - Literal matching for other characters +fn glob_match(pattern: &str, text: &str) -> bool { + let mut pattern_chars = pattern.chars().peekable(); + let mut text_chars = text.chars().peekable(); + + while let Some(p) = pattern_chars.next() { + match p { + '*' => { + // Try matching zero or more characters + if pattern_chars.peek().is_none() { + // Pattern ends with *, matches everything remaining + return true; + } + // Try each position in the remaining text + loop { + let remaining_pattern: String = pattern_chars.clone().collect(); + let remaining_text: String = text_chars.clone().collect(); + if glob_match(&remaining_pattern, &remaining_text) { + return true; + } + if text_chars.next().is_none() { + return false; + } + } + } + '?' => { + // Match any single character + if text_chars.next().is_none() { + return false; + } + } + c => { + // Literal match (case-insensitive for file matching) + match text_chars.next() { + Some(t) if c.eq_ignore_ascii_case(&t) => {} + _ => return false, + } + } + } + } + + // Pattern is exhausted, text should also be exhausted + text_chars.peek().is_none() +} diff --git a/crates/nvisy-runtime/src/graph/compiled/stream.rs b/crates/nvisy-runtime/src/graph/compiled/stream.rs new file mode 100644 index 0000000..9a5163b --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/stream.rs @@ -0,0 +1,216 @@ +//! Stream types for compiled workflow data flow. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use futures::stream::BoxStream; +use futures::{Sink, SinkExt, Stream, StreamExt}; +use nvisy_dal::AnyDataValue; + +use crate::error::{Error, Result}; + +/// A boxed stream of workflow data values. +pub type DataStream = BoxStream<'static, Result>; + +/// A boxed sink for workflow data values. +pub type DataSink = Pin + Send + 'static>>; + +/// Input stream for reading data in a workflow. +/// +/// Wraps a boxed stream and provides metadata about the source. +pub struct InputStream { + /// The underlying data stream. + stream: DataStream, + /// Optional cursor for pagination. + cursor: Option, + /// Optional limit on items to read. + limit: Option, + /// Number of items read so far. + items_read: usize, +} + +impl InputStream { + /// Creates a new input stream. + pub fn new(stream: DataStream) -> Self { + Self { + stream, + cursor: None, + limit: None, + items_read: 0, + } + } + + /// Creates an input stream with a cursor for pagination. + pub fn with_cursor(stream: DataStream, cursor: impl Into) -> Self { + Self { + stream, + cursor: Some(cursor.into()), + limit: None, + items_read: 0, + } + } + + /// Creates an input stream with a limit on items to read. + pub fn with_limit(stream: DataStream, limit: usize) -> Self { + Self { + stream: Box::pin(stream.take(limit)), + cursor: None, + limit: Some(limit), + items_read: 0, + } + } + + /// Creates an input stream with both cursor and limit. + pub fn with_cursor_and_limit( + stream: DataStream, + cursor: impl Into, + limit: usize, + ) -> Self { + Self { + stream: Box::pin(stream.take(limit)), + cursor: Some(cursor.into()), + limit: Some(limit), + items_read: 0, + } + } + + /// Returns the cursor for the next page, if any. + pub fn cursor(&self) -> Option<&str> { + self.cursor.as_deref() + } + + /// Returns the limit on items to read, if set. + pub fn limit(&self) -> Option { + self.limit + } + + /// Returns the number of items read so far. + pub fn items_read(&self) -> usize { + self.items_read + } + + /// Consumes the stream and returns the inner boxed stream. + pub fn into_inner(self) -> DataStream { + self.stream + } + + /// Consumes the stream and returns all parts. + pub fn into_parts(self) -> (DataStream, Option, Option) { + (self.stream, self.cursor, self.limit) + } +} + +impl Stream for InputStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let result = Pin::new(&mut self.stream).poll_next(cx); + if let Poll::Ready(Some(Ok(_))) = &result { + self.items_read += 1; + } + result + } + + fn size_hint(&self) -> (usize, Option) { + self.stream.size_hint() + } +} + +impl std::fmt::Debug for InputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("InputStream") + .field("cursor", &self.cursor) + .field("limit", &self.limit) + .field("items_read", &self.items_read) + .finish_non_exhaustive() + } +} + +/// Output stream for writing data in a workflow. +/// +/// Wraps a boxed sink and tracks write statistics. +pub struct OutputStream { + /// The underlying data sink. + sink: DataSink, + /// Optional buffer size for batching. + buffer_size: Option, + /// Number of items written so far. + items_written: usize, +} + +impl OutputStream { + /// Creates a new output stream. + pub fn new(sink: DataSink) -> Self { + Self { + sink, + buffer_size: None, + items_written: 0, + } + } + + /// Creates an output stream with buffering for batched writes. + pub fn with_buffer(sink: DataSink, buffer_size: usize) -> Self { + Self { + sink: Box::pin(sink.buffer(buffer_size)), + buffer_size: Some(buffer_size), + items_written: 0, + } + } + + /// Returns the buffer size, if set. + pub fn buffer_size(&self) -> Option { + self.buffer_size + } + + /// Returns the number of items written so far. + pub fn items_written(&self) -> usize { + self.items_written + } + + /// Consumes the stream and returns the inner boxed sink. + pub fn into_inner(self) -> DataSink { + self.sink + } +} + +impl Sink for OutputStream { + type Error = Error; + + fn poll_ready( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_ready(cx) + } + + fn start_send( + mut self: Pin<&mut Self>, + item: AnyDataValue, + ) -> std::result::Result<(), Self::Error> { + self.items_written += 1; + self.sink.as_mut().start_send(item) + } + + fn poll_flush( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_flush(cx) + } + + fn poll_close( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_close(cx) + } +} + +impl std::fmt::Debug for OutputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OutputStream") + .field("buffer_size", &self.buffer_size) + .field("items_written", &self.items_written) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform.rs b/crates/nvisy-runtime/src/graph/compiled/transform.rs new file mode 100644 index 0000000..fb78b3e --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/transform.rs @@ -0,0 +1,362 @@ +//! Compiled transform node types. +//! +//! Processors are the runtime representation of transform nodes. Each processor +//! encapsulates the logic and dependencies needed to execute a specific transform. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::Agents; +use nvisy_rig::provider::EmbeddingProvider; + +use crate::error::Result; +use crate::graph::transform::{ + ChunkStrategy, DeriveTask, EnrichTask, ExtractTask, PartitionStrategy, +}; + +/// Compiled transform node - ready to process data. +/// +/// Each variant wraps a dedicated processor that encapsulates +/// the transform logic and any required external dependencies. +#[derive(Debug)] +pub enum CompiledTransform { + /// Partition documents into elements. + Partition(PartitionProcessor), + /// Chunk content into smaller pieces. + Chunk(ChunkProcessor), + /// Generate vector embeddings. + Embedding(EmbeddingProcessor), + /// Enrich elements with metadata/descriptions. + Enrich(EnrichProcessor), + /// Extract structured data or convert formats. + Extract(ExtractProcessor), + /// Generate new content from input. + Derive(DeriveProcessor), +} + +impl CompiledTransform { + /// Processes input data through the transform. + pub async fn process(&self, input: Vec) -> Result> { + match self { + Self::Partition(p) => p.process(input).await, + Self::Chunk(p) => p.process(input).await, + Self::Embedding(p) => p.process(input).await, + Self::Enrich(p) => p.process(input).await, + Self::Extract(p) => p.process(input).await, + Self::Derive(p) => p.process(input).await, + } + } +} + +// ============================================================================ +// Partition Processor +// ============================================================================ + +/// Processor for partitioning documents into elements. +#[derive(Debug)] +pub struct PartitionProcessor { + /// Partitioning strategy to use. + strategy: PartitionStrategy, + /// Whether to include page break markers. + include_page_breaks: bool, + /// Whether to discard unsupported element types. + discard_unsupported: bool, +} + +impl PartitionProcessor { + /// Creates a new partition processor. + pub fn new( + strategy: PartitionStrategy, + include_page_breaks: bool, + discard_unsupported: bool, + ) -> Self { + Self { + strategy, + include_page_breaks, + discard_unsupported, + } + } + + /// Returns the partitioning strategy. + pub fn strategy(&self) -> PartitionStrategy { + self.strategy + } + + /// Returns whether page breaks are included. + pub fn include_page_breaks(&self) -> bool { + self.include_page_breaks + } + + /// Returns whether unsupported types are discarded. + pub fn discard_unsupported(&self) -> bool { + self.discard_unsupported + } + + /// Processes input data through the partition transform. + pub async fn process(&self, input: Vec) -> Result> { + // TODO: Implement document partitioning based on strategy + // For now, pass through unchanged + Ok(input) + } +} + +// ============================================================================ +// Chunk Processor +// ============================================================================ + +/// Processor for chunking content into smaller pieces. +pub struct ChunkProcessor { + /// Chunking strategy to use. + strategy: ChunkStrategy, + /// Whether to use LLM-powered contextual chunking. + contextual_chunking: bool, + /// Agents for contextual chunking (if enabled). + agents: Option, +} + +impl ChunkProcessor { + /// Creates a new chunk processor without contextual chunking. + pub fn new(strategy: ChunkStrategy) -> Self { + Self { + strategy, + contextual_chunking: false, + agents: None, + } + } + + /// Creates a new chunk processor with contextual chunking enabled. + pub fn with_contextual_chunking(strategy: ChunkStrategy, agents: Agents) -> Self { + Self { + strategy, + contextual_chunking: true, + agents: Some(agents), + } + } + + /// Returns the chunking strategy. + pub fn strategy(&self) -> &ChunkStrategy { + &self.strategy + } + + /// Returns whether contextual chunking is enabled. + pub fn contextual_chunking(&self) -> bool { + self.contextual_chunking + } + + /// Processes input data through the chunk transform. + pub async fn process(&self, input: Vec) -> Result> { + // TODO: Implement chunking based on strategy + // If contextual_chunking is enabled, use agents for context generation + Ok(input) + } +} + +impl std::fmt::Debug for ChunkProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ChunkProcessor") + .field("strategy", &self.strategy) + .field("contextual_chunking", &self.contextual_chunking) + .field("has_agents", &self.agents.is_some()) + .finish() + } +} + +// ============================================================================ +// Embedding Processor +// ============================================================================ + +/// Processor for generating vector embeddings. +pub struct EmbeddingProcessor { + /// The embedding provider for generating embeddings. + provider: EmbeddingProvider, + /// Whether to L2-normalize output embeddings. + normalize: bool, +} + +impl EmbeddingProcessor { + /// Creates a new embedding processor. + pub fn new(provider: EmbeddingProvider, normalize: bool) -> Self { + Self { + provider, + normalize, + } + } + + /// Returns whether normalization is enabled. + pub fn normalize(&self) -> bool { + self.normalize + } + + /// Processes input data through the embedding transform. + pub async fn process(&self, input: Vec) -> Result> { + // TODO: Implement embedding generation using provider + // For now, pass through unchanged + let _ = &self.provider; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for EmbeddingProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EmbeddingProcessor") + .field("normalize", &self.normalize) + .finish_non_exhaustive() + } +} + +// ============================================================================ +// Enrich Processor +// ============================================================================ + +/// Processor for enriching elements with metadata/descriptions. +pub struct EnrichProcessor { + /// Agents for enrichment tasks. + agents: Agents, + /// The enrichment task to perform. + task: EnrichTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl EnrichProcessor { + /// Creates a new enrich processor. + pub fn new(agents: Agents, task: EnrichTask, override_prompt: Option) -> Self { + Self { + agents, + task, + override_prompt, + } + } + + /// Returns the enrichment task. + pub fn task(&self) -> &EnrichTask { + &self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } + + /// Processes input data through the enrich transform. + pub async fn process(&self, input: Vec) -> Result> { + // TODO: Implement enrichment using agents + // Use self.agents.vision_agent for image tasks + // Use self.agents.table_agent for table tasks + let _ = &self.agents; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for EnrichProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EnrichProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} + +// ============================================================================ +// Extract Processor +// ============================================================================ + +/// Processor for extracting structured data or converting formats. +pub struct ExtractProcessor { + /// Agents for extraction tasks. + agents: Agents, + /// The extraction task to perform. + task: ExtractTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl ExtractProcessor { + /// Creates a new extract processor. + pub fn new(agents: Agents, task: ExtractTask, override_prompt: Option) -> Self { + Self { + agents, + task, + override_prompt, + } + } + + /// Returns the extraction task. + pub fn task(&self) -> &ExtractTask { + &self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } + + /// Processes input data through the extract transform. + pub async fn process(&self, input: Vec) -> Result> { + // TODO: Implement extraction using agents + // Use self.agents.text_analysis_agent for NER, keywords, classification, sentiment + // Use self.agents.table_agent for table conversion + // Use self.agents.structured_output_agent for JSON conversion + let _ = &self.agents; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for ExtractProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExtractProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} + +// ============================================================================ +// Derive Processor +// ============================================================================ + +/// Processor for generating new content from input. +pub struct DeriveProcessor { + /// Agents for derivation tasks. + agents: Agents, + /// The derivation task to perform. + task: DeriveTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl DeriveProcessor { + /// Creates a new derive processor. + pub fn new(agents: Agents, task: DeriveTask, override_prompt: Option) -> Self { + Self { + agents, + task, + override_prompt, + } + } + + /// Returns the derivation task. + pub fn task(&self) -> DeriveTask { + self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } + + /// Processes input data through the derive transform. + pub async fn process(&self, input: Vec) -> Result> { + // TODO: Implement derivation using agents + // Use self.agents.text_generation_agent for summarization and title generation + let _ = &self.agents; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for DeriveProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DeriveProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiler.rs b/crates/nvisy-runtime/src/graph/compiler.rs new file mode 100644 index 0000000..1341faf --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiler.rs @@ -0,0 +1,413 @@ +//! Workflow compiler for transforming definitions into executable graphs. +//! +//! The compiler takes a [`WorkflowDefinition`] and a [`CredentialsRegistry`] +//! and produces a [`CompiledGraph`] that can be executed by the engine. +//! +//! # Compilation Process +//! +//! 1. **Validation**: Check that the definition is structurally valid +//! 2. **Cache Resolution**: Connect cache slot inputs to outputs +//! 3. **Node Compilation**: Create processors and streams for each node +//! 4. **Graph Building**: Build the petgraph structure with compiled nodes + +use std::collections::HashMap; + +use nvisy_dal::core::Context; +use nvisy_rig::agent::Agents; +use nvisy_rig::provider::CompletionProvider; +use petgraph::graph::{DiGraph, NodeIndex}; + +use crate::error::{Error, Result}; +use crate::provider::{ + CompletionProviderParams, CredentialsRegistry, EmbeddingProviderParams, InputProviderParams, + IntoProvider, OutputProviderParams, +}; + +use super::compiled::{ + ChunkProcessor, CompiledGraph, CompiledInput, CompiledNode, CompiledOutput, CompiledSwitch, + CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, ExtractProcessor, + InputStream, OutputStream, PartitionProcessor, +}; +use super::definition::{EdgeData, InputSource, NodeDef, NodeId, OutputTarget, WorkflowDefinition}; + +/// Workflow compiler that transforms definitions into executable graphs. +pub struct WorkflowCompiler<'a> { + /// Credentials registry for resolving provider credentials. + registry: &'a CredentialsRegistry, + /// Execution context for provider initialization. + ctx: Context, +} + +impl<'a> WorkflowCompiler<'a> { + /// Creates a new workflow compiler. + pub fn new(registry: &'a CredentialsRegistry, ctx: Context) -> Self { + Self { registry, ctx } + } + + /// Compiles a workflow definition into an executable graph. + pub async fn compile(&self, def: WorkflowDefinition) -> Result { + // Phase 1: Validate definition structure + def.validate() + .map_err(|e| Error::InvalidDefinition(format!("validation failed: {}", e)))?; + + // Phase 2: Resolve cache slots + let resolved = self.resolve_cache_slots(&def)?; + + // Phase 3: Compile each node + let mut compiled_nodes = HashMap::new(); + for (id, node) in &def.nodes { + // Skip cache slot nodes - they're resolved during edge building + if self.is_cache_only_node(&node.inner) { + continue; + } + let compiled = self.compile_node(&node.inner).await?; + compiled_nodes.insert(*id, compiled); + } + + // Phase 4: Build petgraph + let (graph, node_indices) = self.build_graph(compiled_nodes, &resolved.edges)?; + + Ok(CompiledGraph::new(graph, node_indices, def.metadata)) + } + + /// Checks if a node is a cache-only node (input from cache or output to cache). + fn is_cache_only_node(&self, def: &NodeDef) -> bool { + match def { + NodeDef::Input(input) => matches!(input.source, InputSource::CacheSlot(_)), + NodeDef::Output(output) => matches!(output.target, OutputTarget::Cache(_)), + _ => false, + } + } + + /// Resolves cache slots by connecting cache inputs to cache outputs. + fn resolve_cache_slots(&self, def: &WorkflowDefinition) -> Result { + // Collect cache slot outputs (nodes that write to cache slots) + let mut cache_outputs: HashMap> = HashMap::new(); + for (id, node) in &def.nodes { + if let NodeDef::Output(output) = &node.inner + && let OutputTarget::Cache(slot) = &output.target + { + cache_outputs + .entry(slot.slot.clone()) + .or_default() + .push(*id); + } + } + + // Collect cache slot inputs (nodes that read from cache slots) + let mut cache_inputs: HashMap> = HashMap::new(); + for (id, node) in &def.nodes { + if let NodeDef::Input(input) = &node.inner + && let InputSource::CacheSlot(slot) = &input.source + { + cache_inputs.entry(slot.slot.clone()).or_default().push(*id); + } + } + + // Build resolved edges + let mut resolved_edges = Vec::new(); + + // Add original edges (excluding edges to/from cache nodes) + for edge in &def.edges { + let from_node = def.nodes.get(&edge.from); + let to_node = def.nodes.get(&edge.to); + + let from_is_cache = from_node + .map(|n| self.is_cache_only_node(&n.inner)) + .unwrap_or(false); + let to_is_cache = to_node + .map(|n| self.is_cache_only_node(&n.inner)) + .unwrap_or(false); + + if !from_is_cache && !to_is_cache { + resolved_edges.push(ResolvedEdge { + from: edge.from, + to: edge.to, + data: EdgeData { + from_port: edge.from_port.clone(), + to_port: edge.to_port.clone(), + }, + }); + } + } + + // Connect nodes writing to cache slots with nodes reading from them + // by looking at incoming/outgoing edges + for (slot_name, output_ids) in &cache_outputs { + if let Some(input_ids) = cache_inputs.get(slot_name) { + // For each cache output node, find what writes to it + for output_id in output_ids { + let writers: Vec = def + .edges + .iter() + .filter(|e| e.to == *output_id) + .map(|e| e.from) + .collect(); + + // For each cache input node, find what reads from it + for input_id in input_ids { + let readers: Vec = def + .edges + .iter() + .filter(|e| e.from == *input_id) + .map(|e| e.to) + .collect(); + + // Connect writers directly to readers + for writer in &writers { + for reader in &readers { + resolved_edges.push(ResolvedEdge { + from: *writer, + to: *reader, + data: EdgeData { + from_port: None, + to_port: None, + }, + }); + } + } + } + } + } + } + + Ok(ResolvedDefinition { + edges: resolved_edges, + }) + } + + /// Compiles a single node definition into a compiled node. + async fn compile_node(&self, def: &NodeDef) -> Result { + match def { + NodeDef::Input(input) => { + let stream = self.create_input_stream(input).await?; + Ok(CompiledNode::Input(CompiledInput::new(stream))) + } + NodeDef::Output(output) => { + let stream = self.create_output_stream(output).await?; + Ok(CompiledNode::Output(CompiledOutput::new(stream))) + } + NodeDef::Transform(transformer) => { + let processor = self.create_processor(transformer).await?; + Ok(CompiledNode::Transform(Box::new(processor))) + } + NodeDef::Switch(switch) => { + Ok(CompiledNode::Switch(CompiledSwitch::from(switch.clone()))) + } + } + } + + /// Creates an input stream from an input definition. + async fn create_input_stream( + &self, + input: &super::definition::InputDef, + ) -> Result { + match &input.source { + InputSource::Provider(provider_def) => { + let stream = self + .create_provider_input_stream(&provider_def.provider) + .await?; + Ok(stream) + } + InputSource::CacheSlot(_) => { + // Cache inputs are resolved during cache slot resolution + // This shouldn't be called for cache inputs + Err(Error::Internal( + "cache input nodes should be resolved before compilation".into(), + )) + } + } + } + + /// Creates an input stream from provider parameters. + async fn create_provider_input_stream( + &self, + params: &InputProviderParams, + ) -> Result { + let creds = self.registry.get(params.credentials_id())?; + + let provider = params.clone().into_provider(creds.clone()).await?; + let stream = provider.read_stream(&self.ctx).await?; + + // Map the stream to our Result type + use futures::StreamExt; + let mapped = stream.map(|r| r.map_err(|e| Error::Internal(e.to_string()))); + + Ok(InputStream::new(Box::pin(mapped))) + } + + /// Creates an output stream from an output definition. + async fn create_output_stream( + &self, + output: &super::definition::OutputDef, + ) -> Result { + match &output.target { + OutputTarget::Provider(provider_def) => { + let stream = self + .create_provider_output_stream(&provider_def.provider) + .await?; + Ok(stream) + } + OutputTarget::Cache(_) => { + // Cache outputs are resolved during cache slot resolution + Err(Error::Internal( + "cache output nodes should be resolved before compilation".into(), + )) + } + } + } + + /// Creates an output stream from provider parameters. + async fn create_provider_output_stream( + &self, + params: &OutputProviderParams, + ) -> Result { + let creds = self.registry.get(params.credentials_id())?; + + let provider = params.clone().into_provider(creds.clone()).await?; + let sink = provider.write_sink(&self.ctx).await?; + + Ok(OutputStream::new(sink)) + } + + /// Creates a processor from a transformer definition. + async fn create_processor( + &self, + transformer: &super::definition::Transformer, + ) -> Result { + use super::definition::Transformer; + + match transformer { + Transformer::Partition(p) => Ok(CompiledTransform::Partition(PartitionProcessor::new( + p.strategy, + p.include_page_breaks, + p.discard_unsupported, + ))), + Transformer::Chunk(c) => { + if c.contextual_chunking { + // Need completion provider for contextual chunking + // For now, we don't have provider params in chunk definition + // So contextual chunking won't have agents + Ok(CompiledTransform::Chunk(ChunkProcessor::new( + c.chunk_strategy.clone(), + ))) + } else { + Ok(CompiledTransform::Chunk(ChunkProcessor::new( + c.chunk_strategy.clone(), + ))) + } + } + Transformer::Embedding(e) => { + let provider = self.create_embedding_provider(&e.provider).await?; + Ok(CompiledTransform::Embedding(EmbeddingProcessor::new( + provider, + e.normalize, + ))) + } + Transformer::Enrich(e) => { + let agents = self.create_agents(&e.provider).await?; + Ok(CompiledTransform::Enrich(EnrichProcessor::new( + agents, + e.task.clone(), + e.override_prompt.clone(), + ))) + } + Transformer::Extract(e) => { + let agents = self.create_agents(&e.provider).await?; + Ok(CompiledTransform::Extract(ExtractProcessor::new( + agents, + e.task.clone(), + e.override_prompt.clone(), + ))) + } + Transformer::Derive(d) => { + let agents = self.create_agents(&d.provider).await?; + Ok(CompiledTransform::Derive(DeriveProcessor::new( + agents, + d.task, + d.override_prompt.clone(), + ))) + } + } + } + + /// Creates an embedding provider from parameters. + async fn create_embedding_provider( + &self, + params: &EmbeddingProviderParams, + ) -> Result { + let creds = self.registry.get(params.credentials_id())?; + params.clone().into_provider(creds.clone()).await + } + + /// Creates agents from completion provider parameters. + async fn create_agents(&self, params: &CompletionProviderParams) -> Result { + let provider = self.create_completion_provider(params).await?; + Ok(Agents::new(provider)) + } + + /// Creates a completion provider from parameters. + async fn create_completion_provider( + &self, + params: &CompletionProviderParams, + ) -> Result { + let creds = self.registry.get(params.credentials_id())?; + params.clone().into_provider(creds.clone()).await + } + + /// Builds the petgraph from compiled nodes and resolved edges. + fn build_graph( + &self, + nodes: HashMap, + edges: &[ResolvedEdge], + ) -> Result<(DiGraph, HashMap)> { + let mut graph = DiGraph::new(); + let mut node_indices = HashMap::new(); + + // Add nodes + for (id, node) in nodes { + let idx = graph.add_node(node); + node_indices.insert(id, idx); + } + + // Add edges + for edge in edges { + let from_idx = node_indices.get(&edge.from).ok_or_else(|| { + Error::InvalidDefinition(format!("edge references unknown node: {}", edge.from)) + })?; + let to_idx = node_indices.get(&edge.to).ok_or_else(|| { + Error::InvalidDefinition(format!("edge references unknown node: {}", edge.to)) + })?; + + graph.add_edge(*from_idx, *to_idx, edge.data.clone()); + } + + // Verify acyclic + if petgraph::algo::is_cyclic_directed(&graph) { + return Err(Error::InvalidDefinition("workflow contains a cycle".into())); + } + + Ok((graph, node_indices)) + } +} + +/// Resolved edge after cache slot resolution. +struct ResolvedEdge { + from: NodeId, + to: NodeId, + data: EdgeData, +} + +/// Resolved workflow definition after cache slot resolution. +struct ResolvedDefinition { + edges: Vec, +} + +/// Convenience function to compile a workflow definition. +pub async fn compile( + def: WorkflowDefinition, + registry: &CredentialsRegistry, + ctx: Context, +) -> Result { + WorkflowCompiler::new(registry, ctx).compile(def).await +} diff --git a/crates/nvisy-runtime/src/graph/workflow/edge.rs b/crates/nvisy-runtime/src/graph/definition/edge.rs similarity index 97% rename from crates/nvisy-runtime/src/graph/workflow/edge.rs rename to crates/nvisy-runtime/src/graph/definition/edge.rs index 44d16f6..6e5638a 100644 --- a/crates/nvisy-runtime/src/graph/workflow/edge.rs +++ b/crates/nvisy-runtime/src/graph/definition/edge.rs @@ -58,7 +58,7 @@ impl Edge { } } -/// Edge data stored in the graph. +/// Edge data stored in the compiled graph. #[derive(Debug, Clone, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] pub struct EdgeData { /// Optional port/slot name on the source node. diff --git a/crates/nvisy-runtime/src/graph/definition/input.rs b/crates/nvisy-runtime/src/graph/definition/input.rs new file mode 100644 index 0000000..efcf8d3 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/definition/input.rs @@ -0,0 +1,60 @@ +//! Input node definition types. + +use serde::{Deserialize, Serialize}; + +use crate::provider::InputProviderParams; + +use super::route::CacheSlot; + +/// Input provider definition for workflow nodes. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct InputProvider { + /// Provider parameters (contains credentials_id). + pub provider: InputProviderParams, +} + +/// Source of input data for an input node. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum InputSource { + /// Read from external provider (S3, Postgres, etc.). + Provider(InputProvider), + /// Read from named cache slot (resolved at compile time). + CacheSlot(CacheSlot), +} + +/// Input node definition. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct InputDef { + /// Source of input data. + pub source: InputSource, +} + +impl InputDef { + /// Creates a new input definition from a provider. + pub fn from_provider(provider: InputProviderParams) -> Self { + Self { + source: InputSource::Provider(InputProvider { provider }), + } + } + + /// Creates a new input definition from a cache slot. + pub fn from_cache(slot: impl Into) -> Self { + Self { + source: InputSource::CacheSlot(CacheSlot { + slot: slot.into(), + priority: None, + }), + } + } + + /// Creates a new input definition from a cache slot with priority. + pub fn from_cache_with_priority(slot: impl Into, priority: u32) -> Self { + Self { + source: InputSource::CacheSlot(CacheSlot { + slot: slot.into(), + priority: Some(priority), + }), + } + } +} diff --git a/crates/nvisy-runtime/src/graph/workflow/metadata.rs b/crates/nvisy-runtime/src/graph/definition/metadata.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/workflow/metadata.rs rename to crates/nvisy-runtime/src/graph/definition/metadata.rs diff --git a/crates/nvisy-runtime/src/graph/definition/mod.rs b/crates/nvisy-runtime/src/graph/definition/mod.rs new file mode 100644 index 0000000..108431e --- /dev/null +++ b/crates/nvisy-runtime/src/graph/definition/mod.rs @@ -0,0 +1,30 @@ +//! Workflow definition types. +//! +//! This module contains serializable, frontend-friendly types for defining workflows. +//! These types are designed for: +//! - Easy serialization to/from JSON +//! - Frontend consumption and editing +//! - Storage in databases +//! +//! To execute a workflow, definitions must be compiled into runtime types +//! using the [`crate::graph::compiler`] module. + +mod edge; +mod input; +mod metadata; +mod node; +mod output; +mod route; +mod transform; +mod workflow; + +pub use edge::{Edge, EdgeData}; +pub use input::{InputDef, InputProvider, InputSource}; +pub use metadata::WorkflowMetadata; +pub use node::{Node, NodeCommon, NodeDef, NodeId}; +pub use output::{OutputDef, OutputProviderDef, OutputTarget}; +pub use route::{ + CacheSlot, ContentTypeCategory, DateField, SwitchBranch, SwitchCondition, SwitchDef, +}; +pub use transform::{Chunk, Derive, Embedding, Enrich, Extract, Partition, Transform, Transformer}; +pub use workflow::{ValidationError, WorkflowDefinition}; diff --git a/crates/nvisy-runtime/src/graph/workflow/node.rs b/crates/nvisy-runtime/src/graph/definition/node.rs similarity index 75% rename from crates/nvisy-runtime/src/graph/workflow/node.rs rename to crates/nvisy-runtime/src/graph/definition/node.rs index eaa4c45..2e13537 100644 --- a/crates/nvisy-runtime/src/graph/workflow/node.rs +++ b/crates/nvisy-runtime/src/graph/definition/node.rs @@ -1,4 +1,4 @@ -//! Generic node wrapper, node identifier, and node data types. +//! Node definition types. use std::str::FromStr; @@ -6,9 +6,10 @@ use derive_more::{Debug, Display, From, Into}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use crate::graph::input::InputNode; -use crate::graph::output::OutputNode; -use crate::graph::transform::TransformerConfig; +use super::input::InputDef; +use super::output::OutputDef; +use super::route::SwitchDef; +use super::transform::Transformer; /// Unique identifier for a node in a workflow graph. #[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] @@ -101,39 +102,47 @@ impl NodeCommon { } } -/// A workflow node with id, name, description, and node data. -pub type Node = NodeCommon; +/// A workflow node definition with common metadata. +pub type Node = NodeCommon; -/// Data associated with a workflow node. +/// Node definition enum for workflow graphs. /// /// Nodes are categorized by their role in data flow: /// - **Input**: Reads/produces data (entry points) -/// - **Transformer**: Processes/transforms data (intermediate) +/// - **Transform**: Processes/transforms data (intermediate) /// - **Output**: Writes/consumes data (exit points) +/// - **Switch**: Routes data based on conditions #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, From)] #[serde(tag = "type", rename_all = "snake_case")] -pub enum NodeData { +pub enum NodeDef { /// Data input node, reads or produces data. - Input(InputNode), + Input(InputDef), /// Data transformer node, processes or transforms data. - Transformer(TransformerConfig), + Transform(Transformer), /// Data output node, writes or consumes data. - Output(OutputNode), + Output(OutputDef), + /// Conditional routing node. + Switch(SwitchDef), } -impl NodeData { +impl NodeDef { /// Returns whether this is an input node. pub const fn is_input(&self) -> bool { - matches!(self, NodeData::Input(_)) + matches!(self, NodeDef::Input(_)) } - /// Returns whether this is a transformer node. - pub const fn is_transformer(&self) -> bool { - matches!(self, NodeData::Transformer(_)) + /// Returns whether this is a transform node. + pub const fn is_transform(&self) -> bool { + matches!(self, NodeDef::Transform(_)) } /// Returns whether this is an output node. pub const fn is_output(&self) -> bool { - matches!(self, NodeData::Output(_)) + matches!(self, NodeDef::Output(_)) + } + + /// Returns whether this is a switch node. + pub const fn is_switch(&self) -> bool { + matches!(self, NodeDef::Switch(_)) } } diff --git a/crates/nvisy-runtime/src/graph/definition/output.rs b/crates/nvisy-runtime/src/graph/definition/output.rs new file mode 100644 index 0000000..0396713 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/definition/output.rs @@ -0,0 +1,60 @@ +//! Output node definition types. + +use serde::{Deserialize, Serialize}; + +use crate::provider::OutputProviderParams; + +use super::route::CacheSlot; + +/// Output provider definition for workflow nodes. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OutputProviderDef { + /// Provider parameters (contains credentials_id). + pub provider: OutputProviderParams, +} + +/// Target destination for an output node. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum OutputTarget { + /// Write to external provider (S3, Qdrant, etc.). + Provider(OutputProviderDef), + /// Write to named cache slot (resolved at compile time). + Cache(CacheSlot), +} + +/// Output node definition. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OutputDef { + /// Target destination for output data. + pub target: OutputTarget, +} + +impl OutputDef { + /// Creates a new output definition from a provider. + pub fn from_provider(provider: OutputProviderParams) -> Self { + Self { + target: OutputTarget::Provider(OutputProviderDef { provider }), + } + } + + /// Creates a new output definition from a cache slot. + pub fn from_cache(slot: impl Into) -> Self { + Self { + target: OutputTarget::Cache(CacheSlot { + slot: slot.into(), + priority: None, + }), + } + } + + /// Creates a new output definition from a cache slot with priority. + pub fn from_cache_with_priority(slot: impl Into, priority: u32) -> Self { + Self { + target: OutputTarget::Cache(CacheSlot { + slot: slot.into(), + priority: Some(priority), + }), + } + } +} diff --git a/crates/nvisy-runtime/src/graph/route/switch.rs b/crates/nvisy-runtime/src/graph/definition/route.rs similarity index 67% rename from crates/nvisy-runtime/src/graph/route/switch.rs rename to crates/nvisy-runtime/src/graph/definition/route.rs index fa80510..46c193d 100644 --- a/crates/nvisy-runtime/src/graph/route/switch.rs +++ b/crates/nvisy-runtime/src/graph/definition/route.rs @@ -1,14 +1,48 @@ -//! Switch node for conditional routing. +//! Routing types for conditional data flow. +//! +//! This module provides types for controlling data flow in workflows: +//! - [`CacheSlot`]: Named connection point for linking workflow branches +//! - [`SwitchDef`]: Conditional routing based on data properties use serde::{Deserialize, Serialize}; -/// A switch node that routes data to different branches based on conditions. +/// A cache slot reference for in-memory data passing. +/// +/// Cache slots act as named connection points that link different parts +/// of a workflow graph. During compilation, cache slots are resolved by +/// connecting incoming edges directly to outgoing edges with matching slot names. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CacheSlot { + /// Slot identifier (used as the key for matching inputs to outputs). + pub slot: String, + /// Priority for ordering when multiple slots are available. + #[serde(skip_serializing_if = "Option::is_none")] + pub priority: Option, +} + +impl CacheSlot { + /// Creates a new cache slot with the given slot name. + pub fn new(slot: impl Into) -> Self { + Self { + slot: slot.into(), + priority: None, + } + } + + /// Sets the priority. + pub fn with_priority(mut self, priority: u32) -> Self { + self.priority = Some(priority); + self + } +} + +/// A switch node definition that routes data to different branches based on conditions. /// /// Switch nodes evaluate conditions against incoming data and route it /// to the appropriate output branch. Each branch has a condition and a /// target cache slot or output. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SwitchNode { +pub struct SwitchDef { /// Branches to evaluate in order. pub branches: Vec, /// Default branch if no conditions match. @@ -16,6 +50,22 @@ pub struct SwitchNode { pub default: Option, } +impl SwitchDef { + /// Creates a new switch definition with the given branches. + pub fn new(branches: Vec) -> Self { + Self { + branches, + default: None, + } + } + + /// Sets the default target for unmatched data. + pub fn with_default(mut self, target: impl Into) -> Self { + self.default = Some(target.into()); + self + } +} + /// A single branch in a switch node. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct SwitchBranch { @@ -25,10 +75,22 @@ pub struct SwitchBranch { pub target: String, } +impl SwitchBranch { + /// Creates a new branch with the given condition and target. + pub fn new(condition: SwitchCondition, target: impl Into) -> Self { + Self { + condition, + target: target.into(), + } + } +} + /// Condition for switch branch evaluation. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum SwitchCondition { + /// Always matches (catch-all). + Always, /// Match by content type category. ContentType { /// Content type category to match. @@ -39,6 +101,11 @@ pub enum SwitchCondition { /// Size threshold in bytes. threshold_bytes: u64, }, + /// Match when file size is below threshold. + FileSizeBelow { + /// Size threshold in bytes. + threshold_bytes: u64, + }, /// Match when page count exceeds threshold. PageCountAbove { /// Page count threshold. @@ -75,6 +142,18 @@ pub enum SwitchCondition { /// Extension to match (without dot, e.g., "pdf", "docx"). extension: String, }, + /// Match when metadata key exists. + HasMetadata { + /// Metadata key to check for. + key: String, + }, + /// Match when metadata key equals value. + MetadataEquals { + /// Metadata key to check. + key: String, + /// Value to match. + value: String, + }, } /// Content type categories for routing. @@ -115,29 +194,3 @@ pub enum DateField { fn default_confidence() -> f32 { 0.8 } - -impl SwitchNode { - /// Creates a new switch node with the given branches. - pub fn new(branches: Vec) -> Self { - Self { - branches, - default: None, - } - } - - /// Sets the default target for unmatched data. - pub fn with_default(mut self, target: impl Into) -> Self { - self.default = Some(target.into()); - self - } -} - -impl SwitchBranch { - /// Creates a new branch with the given condition and target. - pub fn new(condition: SwitchCondition, target: impl Into) -> Self { - Self { - condition, - target: target.into(), - } - } -} diff --git a/crates/nvisy-runtime/src/graph/definition/transform.rs b/crates/nvisy-runtime/src/graph/definition/transform.rs new file mode 100644 index 0000000..9c8faaf --- /dev/null +++ b/crates/nvisy-runtime/src/graph/definition/transform.rs @@ -0,0 +1,7 @@ +//! Transform node definition types. +//! +//! This module re-exports the transform types from the transform module. + +pub use crate::graph::transform::{ + Chunk, Derive, Embedding, Enrich, Extract, Partition, Transform, Transformer, +}; diff --git a/crates/nvisy-runtime/src/graph/definition/workflow.rs b/crates/nvisy-runtime/src/graph/definition/workflow.rs new file mode 100644 index 0000000..5e98ef3 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/definition/workflow.rs @@ -0,0 +1,294 @@ +//! Serializable workflow definition. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use super::edge::Edge; +use super::metadata::WorkflowMetadata; +use super::node::{Node, NodeDef, NodeId}; + +/// Serializable workflow definition. +/// +/// This is the JSON-friendly representation of a workflow graph. +/// It contains all the information needed to compile and execute a workflow. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct WorkflowDefinition { + /// Nodes in the workflow, keyed by their ID. + pub nodes: HashMap, + /// Edges connecting nodes. + pub edges: Vec, + /// Workflow metadata. + #[serde(default)] + pub metadata: WorkflowMetadata, +} + +impl WorkflowDefinition { + /// Creates a new empty workflow definition. + pub fn new() -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + metadata: WorkflowMetadata::default(), + } + } + + /// Creates a workflow definition with metadata. + pub fn with_metadata(metadata: WorkflowMetadata) -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + metadata, + } + } + + /// Adds a node to the workflow. + pub fn add_node(&mut self, id: NodeId, node: Node) -> &mut Self { + self.nodes.insert(id, node); + self + } + + /// Adds a node definition with default metadata. + pub fn add_node_def(&mut self, id: NodeId, def: NodeDef) -> &mut Self { + self.nodes.insert(id, Node::new(def)); + self + } + + /// Adds an edge to the workflow. + pub fn add_edge(&mut self, edge: Edge) -> &mut Self { + self.edges.push(edge); + self + } + + /// Adds a simple edge between two nodes. + pub fn connect(&mut self, from: NodeId, to: NodeId) -> &mut Self { + self.edges.push(Edge::new(from, to)); + self + } + + /// Returns an iterator over input nodes. + pub fn input_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.inner.is_input()) + } + + /// Returns an iterator over output nodes. + pub fn output_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.inner.is_output()) + } + + /// Returns an iterator over transform nodes. + pub fn transform_nodes(&self) -> impl Iterator { + self.nodes + .iter() + .filter(|(_, node)| node.inner.is_transform()) + } + + /// Returns an iterator over switch nodes. + pub fn switch_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.inner.is_switch()) + } + + /// Validates the workflow definition structure. + /// + /// Checks that: + /// - All edge endpoints reference existing nodes + /// - There are no orphan nodes (nodes with no connections) + /// - The graph has at least one input and one output node + pub fn validate(&self) -> Result<(), ValidationError> { + // Check edge references + for edge in &self.edges { + if !self.nodes.contains_key(&edge.from) { + return Err(ValidationError::MissingNode(edge.from)); + } + if !self.nodes.contains_key(&edge.to) { + return Err(ValidationError::MissingNode(edge.to)); + } + } + + // Check for at least one input and output + let has_input = self.nodes.values().any(|n| n.inner.is_input()); + let has_output = self.nodes.values().any(|n| n.inner.is_output()); + + if !has_input { + return Err(ValidationError::NoInputNode); + } + if !has_output { + return Err(ValidationError::NoOutputNode); + } + + Ok(()) + } +} + +impl Default for WorkflowDefinition { + fn default() -> Self { + Self::new() + } +} + +/// Validation errors for workflow definitions. +#[derive(Debug, Clone, thiserror::Error)] +pub enum ValidationError { + /// An edge references a non-existent node. + #[error("edge references non-existent node: {0}")] + MissingNode(NodeId), + /// The workflow has no input nodes. + #[error("workflow must have at least one input node")] + NoInputNode, + /// The workflow has no output nodes. + #[error("workflow must have at least one output node")] + NoOutputNode, + /// The workflow contains a cycle. + #[error("workflow contains a cycle")] + CycleDetected, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::graph::definition::{ + CacheSlot, InputDef, InputSource, OutputDef, OutputTarget, Transformer, + }; + use crate::graph::transform::Partition; + use uuid::Uuid; + + /// Creates a deterministic NodeId for testing. + fn test_node_id(n: u128) -> NodeId { + NodeId::from_uuid(Uuid::from_u128(n)) + } + + fn input_node_cache(slot: &str) -> Node { + Node::new(NodeDef::Input(InputDef { + source: InputSource::CacheSlot(CacheSlot { + slot: slot.to_string(), + priority: None, + }), + })) + } + + fn output_node_cache(slot: &str) -> Node { + Node::new(NodeDef::Output(OutputDef { + target: OutputTarget::Cache(CacheSlot { + slot: slot.to_string(), + priority: None, + }), + })) + } + + fn transform_node_partition() -> Node { + Node::new(NodeDef::Transform(Transformer::Partition(Partition { + strategy: Default::default(), + include_page_breaks: false, + discard_unsupported: false, + }))) + } + + #[test] + fn test_workflow_definition_new() { + let def = WorkflowDefinition::new(); + assert!(def.nodes.is_empty()); + assert!(def.edges.is_empty()); + } + + #[test] + fn test_workflow_definition_add_node() { + let mut def = WorkflowDefinition::new(); + let id = test_node_id(1); + def.add_node(id, input_node_cache("test")); + assert_eq!(def.nodes.len(), 1); + assert!(def.nodes.contains_key(&id)); + } + + #[test] + fn test_workflow_definition_connect() { + let mut def = WorkflowDefinition::new(); + let id1 = test_node_id(1); + let id2 = test_node_id(2); + def.add_node(id1, input_node_cache("in")) + .add_node(id2, output_node_cache("out")) + .connect(id1, id2); + + assert_eq!(def.edges.len(), 1); + assert_eq!(def.edges[0].from, id1); + assert_eq!(def.edges[0].to, id2); + } + + #[test] + fn test_workflow_definition_validate_valid() { + let mut def = WorkflowDefinition::new(); + let id1 = test_node_id(1); + let id2 = test_node_id(2); + def.add_node(id1, input_node_cache("in")) + .add_node(id2, output_node_cache("out")) + .connect(id1, id2); + + assert!(def.validate().is_ok()); + } + + #[test] + fn test_workflow_definition_validate_missing_node() { + let mut def = WorkflowDefinition::new(); + let id1 = test_node_id(1); + let id2 = test_node_id(2); + let id_invalid = test_node_id(99); + def.add_node(id1, input_node_cache("in")) + .add_node(id2, output_node_cache("out")) + .connect(id1, id_invalid); // Invalid reference + + let result = def.validate(); + assert!(result.is_err()); + assert!(matches!(result, Err(ValidationError::MissingNode(_)))); + } + + #[test] + fn test_workflow_definition_validate_no_input() { + let mut def = WorkflowDefinition::new(); + def.add_node(test_node_id(1), output_node_cache("out")); + + let result = def.validate(); + assert!(result.is_err()); + assert!(matches!(result, Err(ValidationError::NoInputNode))); + } + + #[test] + fn test_workflow_definition_validate_no_output() { + let mut def = WorkflowDefinition::new(); + def.add_node(test_node_id(1), input_node_cache("in")); + + let result = def.validate(); + assert!(result.is_err()); + assert!(matches!(result, Err(ValidationError::NoOutputNode))); + } + + #[test] + fn test_workflow_definition_node_iterators() { + let mut def = WorkflowDefinition::new(); + def.add_node(test_node_id(1), input_node_cache("in")) + .add_node(test_node_id(2), transform_node_partition()) + .add_node(test_node_id(3), output_node_cache("out")); + + assert_eq!(def.input_nodes().count(), 1); + assert_eq!(def.transform_nodes().count(), 1); + assert_eq!(def.output_nodes().count(), 1); + } + + #[test] + fn test_workflow_definition_serialization() { + let mut def = WorkflowDefinition::new(); + let id1 = test_node_id(1); + let id2 = test_node_id(2); + def.add_node(id1, input_node_cache("in")) + .add_node(id2, output_node_cache("out")) + .connect(id1, id2); + + // Serialize to JSON + let json = serde_json::to_string(&def).expect("serialization failed"); + + // Deserialize back + let deserialized: WorkflowDefinition = + serde_json::from_str(&json).expect("deserialization failed"); + + assert_eq!(def.nodes.len(), deserialized.nodes.len()); + assert_eq!(def.edges.len(), deserialized.edges.len()); + } +} diff --git a/crates/nvisy-runtime/src/graph/graph.rs b/crates/nvisy-runtime/src/graph/graph.rs deleted file mode 100644 index dd0619c..0000000 --- a/crates/nvisy-runtime/src/graph/graph.rs +++ /dev/null @@ -1,432 +0,0 @@ -//! Workflow graph runtime representation. - -use std::collections::{HashMap, HashSet}; - -use petgraph::Direction; -use petgraph::algo::{is_cyclic_directed, toposort}; -use petgraph::graph::{DiGraph, NodeIndex}; -use petgraph::visit::EdgeRef; -use uuid::Uuid; - -use super::input::InputSource; -use super::output::OutputDestination; -use super::transform::TransformerConfig; -use super::workflow::{Edge, EdgeData, NodeData, NodeId, WorkflowDefinition, WorkflowMetadata}; -use crate::error::{WorkflowError, WorkflowResult}; -use crate::provider::CredentialsRegistry; - -/// A workflow graph containing nodes and edges. -/// -/// Internally uses petgraph's `DiGraph` for efficient graph operations. -#[derive(Debug, Clone, Default)] -pub struct WorkflowGraph { - /// The underlying directed graph. - graph: DiGraph, - /// Mapping from NodeId to petgraph's NodeIndex. - node_indices: HashMap, - /// Reverse mapping from NodeIndex to NodeId. - index_to_id: HashMap, - /// Workflow metadata. - pub metadata: WorkflowMetadata, -} - -impl WorkflowGraph { - /// Creates a new empty workflow graph. - pub fn new() -> Self { - Self::default() - } - - /// Creates a new workflow graph with metadata. - pub fn with_metadata(metadata: WorkflowMetadata) -> Self { - Self { - metadata, - ..Default::default() - } - } - - /// Returns the number of nodes in the graph. - pub fn node_count(&self) -> usize { - self.graph.node_count() - } - - /// Returns the number of edges in the graph. - pub fn edge_count(&self) -> usize { - self.graph.edge_count() - } - - /// Returns whether the graph is empty. - pub fn is_empty(&self) -> bool { - self.graph.node_count() == 0 - } - - /// Adds a node to the graph and returns its ID. - pub fn add_node(&mut self, data: impl Into) -> NodeId { - let id = NodeId::new(); - let index = self.graph.add_node(data.into()); - self.node_indices.insert(id, index); - self.index_to_id.insert(index, id); - id - } - - /// Adds a node with a specific ID. - pub fn add_node_with_id(&mut self, id: NodeId, data: impl Into) { - let index = self.graph.add_node(data.into()); - self.node_indices.insert(id, index); - self.index_to_id.insert(index, id); - } - - /// Removes a node and all its connected edges. - pub fn remove_node(&mut self, id: NodeId) -> Option { - let index = self.node_indices.remove(&id)?; - self.index_to_id.remove(&index); - self.graph.remove_node(index) - } - - /// Returns a reference to a node's data. - pub fn get_node(&self, id: NodeId) -> Option<&NodeData> { - let index = self.node_indices.get(&id)?; - self.graph.node_weight(*index) - } - - /// Returns a mutable reference to a node's data. - pub fn get_node_mut(&mut self, id: NodeId) -> Option<&mut NodeData> { - let index = self.node_indices.get(&id)?; - self.graph.node_weight_mut(*index) - } - - /// Returns whether a node exists. - pub fn contains_node(&self, id: NodeId) -> bool { - self.node_indices.contains_key(&id) - } - - /// Returns an iterator over all nodes. - pub fn nodes(&self) -> impl Iterator { - self.graph.node_indices().filter_map(|index| { - let id = self.index_to_id.get(&index)?; - let data = self.graph.node_weight(index)?; - Some((*id, data)) - }) - } - - /// Returns an iterator over all node IDs. - pub fn node_ids(&self) -> impl Iterator + '_ { - self.node_indices.keys().copied() - } - - /// Adds an edge between two nodes. - pub fn add_edge(&mut self, edge: Edge) -> WorkflowResult<()> { - let from_index = self.node_indices.get(&edge.from).ok_or_else(|| { - WorkflowError::InvalidDefinition(format!("source node {} does not exist", edge.from)) - })?; - let to_index = self.node_indices.get(&edge.to).ok_or_else(|| { - WorkflowError::InvalidDefinition(format!("target node {} does not exist", edge.to)) - })?; - - let edge_data = EdgeData { - from_port: edge.from_port, - to_port: edge.to_port, - }; - - self.graph.add_edge(*from_index, *to_index, edge_data); - Ok(()) - } - - /// Connects two nodes with a simple edge. - pub fn connect(&mut self, from: NodeId, to: NodeId) -> WorkflowResult<()> { - self.add_edge(Edge::new(from, to)) - } - - /// Returns an iterator over all edges. - pub fn edges(&self) -> impl Iterator + '_ { - self.graph.edge_references().filter_map(|edge_ref| { - let from = *self.index_to_id.get(&edge_ref.source())?; - let to = *self.index_to_id.get(&edge_ref.target())?; - let data = edge_ref.weight(); - Some(Edge { - from, - to, - from_port: data.from_port.clone(), - to_port: data.to_port.clone(), - }) - }) - } - - /// Returns edges originating from a node. - pub fn outgoing_edges(&self, id: NodeId) -> impl Iterator + '_ { - let index = self.node_indices.get(&id).copied(); - self.graph - .edges_directed( - index.unwrap_or(NodeIndex::new(usize::MAX)), - Direction::Outgoing, - ) - .filter_map(move |edge_ref| { - let from = *self.index_to_id.get(&edge_ref.source())?; - let to = *self.index_to_id.get(&edge_ref.target())?; - let data = edge_ref.weight(); - Some(Edge { - from, - to, - from_port: data.from_port.clone(), - to_port: data.to_port.clone(), - }) - }) - } - - /// Returns edges targeting a node. - pub fn incoming_edges(&self, id: NodeId) -> impl Iterator + '_ { - let index = self.node_indices.get(&id).copied(); - self.graph - .edges_directed( - index.unwrap_or(NodeIndex::new(usize::MAX)), - Direction::Incoming, - ) - .filter_map(move |edge_ref| { - let from = *self.index_to_id.get(&edge_ref.source())?; - let to = *self.index_to_id.get(&edge_ref.target())?; - let data = edge_ref.weight(); - Some(Edge { - from, - to, - from_port: data.from_port.clone(), - to_port: data.to_port.clone(), - }) - }) - } - - /// Returns all input nodes (nodes marked as Input or with no incoming edges). - pub fn input_nodes(&self) -> Vec { - self.graph - .node_indices() - .filter_map(|index| { - let id = self.index_to_id.get(&index)?; - let data = self.graph.node_weight(index)?; - if data.is_input() - || self - .graph - .edges_directed(index, Direction::Incoming) - .next() - .is_none() - { - Some(*id) - } else { - None - } - }) - .collect() - } - - /// Returns all output nodes (nodes marked as Output or with no outgoing edges). - pub fn output_nodes(&self) -> Vec { - self.graph - .node_indices() - .filter_map(|index| { - let id = self.index_to_id.get(&index)?; - let data = self.graph.node_weight(index)?; - if data.is_output() - || self - .graph - .edges_directed(index, Direction::Outgoing) - .next() - .is_none() - { - Some(*id) - } else { - None - } - }) - .collect() - } - - /// Collects all credentials IDs referenced by nodes in the workflow. - /// - /// Returns a set of unique credential UUIDs from input providers, - /// output providers, and AI-powered transformers. - pub fn credentials_ids(&self) -> HashSet { - let mut ids = HashSet::new(); - - for data in self.graph.node_weights() { - match data { - NodeData::Input(input) => { - if let InputSource::Provider(params) = &input.source { - ids.insert(params.credentials_id()); - } - } - NodeData::Output(output) => { - if let OutputDestination::Provider(params) = &output.destination { - ids.insert(params.credentials_id()); - } - } - NodeData::Transformer(config) => match config { - TransformerConfig::Embedding(c) => { - ids.insert(c.provider.credentials_id()); - } - TransformerConfig::Enrich(c) => { - ids.insert(c.provider.credentials_id()); - } - TransformerConfig::Extract(c) => { - ids.insert(c.provider.credentials_id()); - } - TransformerConfig::Derive(c) => { - ids.insert(c.provider.credentials_id()); - } - // Partition and Chunk don't require credentials - TransformerConfig::Partition(_) | TransformerConfig::Chunk(_) => {} - }, - } - } - - ids - } - - /// Validates the workflow graph structure, constraints, and credentials. - /// - /// Checks that: - /// - The graph has at least one node - /// - There is at least one input and one output node - /// - The graph is acyclic - /// - Edge constraints are satisfied for each node type - /// - All referenced credentials exist in the registry - pub fn validate(&self, registry: &CredentialsRegistry) -> WorkflowResult<()> { - // Must have at least one node - if self.graph.node_count() == 0 { - return Err(WorkflowError::InvalidDefinition( - "workflow must have at least one node".into(), - )); - } - - // Must have at least one input node - let has_input = self.graph.node_weights().any(|data| data.is_input()); - if !has_input { - return Err(WorkflowError::InvalidDefinition( - "workflow must have at least one input node".into(), - )); - } - - // Must have at least one output node - let has_output = self.graph.node_weights().any(|data| data.is_output()); - if !has_output { - return Err(WorkflowError::InvalidDefinition( - "workflow must have at least one output node".into(), - )); - } - - // Check for cycles - if is_cyclic_directed(&self.graph) { - return Err(WorkflowError::InvalidDefinition( - "cycle detected in workflow graph".into(), - )); - } - - // Validate edge constraints for each node - for index in self.graph.node_indices() { - let node_id = self - .index_to_id - .get(&index) - .copied() - .ok_or_else(|| WorkflowError::InvalidDefinition("invalid node index".into()))?; - - let data = self - .graph - .node_weight(index) - .ok_or_else(|| WorkflowError::InvalidDefinition("missing node data".into()))?; - - let incoming_count = self - .graph - .edges_directed(index, Direction::Incoming) - .count(); - let outgoing_count = self - .graph - .edges_directed(index, Direction::Outgoing) - .count(); - - // Input nodes must not have incoming edges - if data.is_input() && incoming_count > 0 { - return Err(WorkflowError::InvalidDefinition(format!( - "input node {} must not have incoming edges", - node_id - ))); - } - - // Output nodes must not have outgoing edges - if data.is_output() && outgoing_count > 0 { - return Err(WorkflowError::InvalidDefinition(format!( - "output node {} must not have outgoing edges", - node_id - ))); - } - - // Transformer nodes must have at least one incoming edge - if data.is_transformer() && incoming_count == 0 { - return Err(WorkflowError::InvalidDefinition(format!( - "transformer node {} must have at least one incoming edge", - node_id - ))); - } - - // Transformer nodes must have at least one outgoing edge - if data.is_transformer() && outgoing_count == 0 { - return Err(WorkflowError::InvalidDefinition(format!( - "transformer node {} must have at least one outgoing edge", - node_id - ))); - } - } - - // Validate that all referenced credentials exist in the registry - for credentials_id in self.credentials_ids() { - registry.get(credentials_id)?; - } - - Ok(()) - } - - /// Returns nodes in topological order. - pub fn topological_order(&self) -> WorkflowResult> { - toposort(&self.graph, None) - .map(|indices| { - indices - .into_iter() - .filter_map(|index| self.index_to_id.get(&index).copied()) - .collect() - }) - .map_err(|_| { - WorkflowError::InvalidDefinition("cycle detected in workflow graph".into()) - }) - } - - /// Returns a reference to the underlying petgraph. - pub fn inner(&self) -> &DiGraph { - &self.graph - } - - /// Returns a mutable reference to the underlying petgraph. - pub fn inner_mut(&mut self) -> &mut DiGraph { - &mut self.graph - } - - /// Converts the workflow graph to a serializable definition. - pub fn to_definition(&self) -> WorkflowDefinition { - WorkflowDefinition { - nodes: self.nodes().map(|(id, data)| (id, data.clone())).collect(), - edges: self.edges().collect(), - metadata: self.metadata.clone(), - } - } - - /// Creates a workflow graph from a definition. - /// - /// Returns an error if any edge references a non-existent node. - pub fn from_definition(definition: WorkflowDefinition) -> WorkflowResult { - let mut graph = Self::with_metadata(definition.metadata); - - for (id, node_data) in definition.nodes { - graph.add_node_with_id(id, node_data); - } - - for edge in definition.edges { - graph.add_edge(edge)?; - } - - Ok(graph) - } -} diff --git a/crates/nvisy-runtime/src/graph/input/mod.rs b/crates/nvisy-runtime/src/graph/input/mod.rs deleted file mode 100644 index df6e49c..0000000 --- a/crates/nvisy-runtime/src/graph/input/mod.rs +++ /dev/null @@ -1,82 +0,0 @@ -//! Input node types for reading data from storage backends or cache. - -use derive_more::From; -use nvisy_dal::DataTypeId; -use serde::{Deserialize, Serialize}; - -use super::route::CacheSlot; -use crate::provider::InputProviderParams; - -/// Source of input data. -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] -#[serde(tag = "source", rename_all = "snake_case")] -pub enum InputSource { - /// Read from a storage provider. - Provider(InputProviderParams), - /// Read from a cache slot. - Cache(CacheSlot), -} - -/// A data input node that reads or produces data. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct InputNode { - /// Input source (provider or cache). - #[serde(flatten)] - pub source: InputSource, -} - -impl InputNode { - /// Creates a new input node from a provider. - pub fn from_provider(provider: InputProviderParams) -> Self { - Self { - source: InputSource::Provider(provider), - } - } - - /// Creates a new input node from a cache slot. - pub fn from_cache(slot: CacheSlot) -> Self { - Self { - source: InputSource::Cache(slot), - } - } - - /// Returns the output data type based on the source kind. - /// - /// For cache slots, the type is unknown at compile time. - pub fn output_type(&self) -> Option { - match &self.source { - InputSource::Provider(p) => Some(p.output_type()), - InputSource::Cache(_) => None, - } - } - - /// Returns whether this input reads from a provider. - pub const fn is_provider(&self) -> bool { - matches!(self.source, InputSource::Provider(_)) - } - - /// Returns whether this input reads from a cache slot. - pub const fn is_cache(&self) -> bool { - matches!(self.source, InputSource::Cache(_)) - } - - /// Returns the cache slot name if this is a cache input. - pub fn cache_slot(&self) -> Option<&str> { - match &self.source { - InputSource::Cache(slot) => Some(&slot.slot), - _ => None, - } - } -} - -impl From for InputNode { - fn from(provider: InputProviderParams) -> Self { - Self::from_provider(provider) - } -} - -impl From for InputNode { - fn from(slot: CacheSlot) -> Self { - Self::from_cache(slot) - } -} diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index 277282f..78a3fb9 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -1,27 +1,40 @@ //! Workflow graph structures and node types. //! //! This module provides the graph representation for workflows: -//! - [`WorkflowGraph`]: The main graph structure containing nodes and edges -//! - [`WorkflowDefinition`]: Serializable workflow definition (JSON-friendly) -//! - [`WorkflowMetadata`]: Metadata about the workflow -//! - [`Edge`]: Connections between nodes -//! - [`EdgeData`]: Data stored on edges in the underlying petgraph -//! - [`NodeId`]: Unique identifier for nodes -//! - [`NodeData`]: Data associated with each node (Input, Transformer, Output) -//! - [`CacheSlot`]: Named cache slot for in-memory data passing -//! - [`SwitchNode`]: Conditional routing based on data properties +//! +//! ## Definition Types +//! Serializable, frontend-friendly types in [`definition`]: +//! - [`definition::WorkflowDefinition`]: JSON-serializable workflow structure +//! - [`definition::NodeDef`]: Node definition enum (Input, Transform, Output, Switch) +//! - [`definition::InputDef`], [`definition::OutputDef`]: I/O node definitions +//! - [`definition::CacheSlot`]: Named cache slot for inter-node data passing +//! +//! ## Compiled Types +//! Runtime-optimized types in [`compiled`]: +//! - [`compiled::CompiledGraph`]: Execution-ready graph with resolved cache slots +//! - [`compiled::CompiledNode`]: Compiled node enum (Input, Output, Transform, Switch) +//! - [`compiled::CompiledInput`], [`compiled::CompiledOutput`]: Compiled I/O nodes +//! - [`compiled::CompiledTransform`]: Compiled transform with processor structs +//! +//! ## Transform Types +//! Transform definitions in [`transform`]: +//! - [`transform::Transformer`]: Enum of all transform types +//! - [`transform::Transform`]: Trait for data transformation +//! +//! ## Compiler +//! The [`compiler`] module compiles definitions into executable graphs. -mod graph; -pub mod input; -pub mod output; -pub mod route; +pub mod compiled; +pub mod compiler; +pub mod definition; pub mod transform; -pub mod workflow; -pub use graph::WorkflowGraph; -pub use input::{InputNode, InputSource}; -pub use output::{OutputDestination, OutputNode}; -pub use route::{CacheSlot, SwitchBranch, SwitchCondition, SwitchNode}; -pub use transform::TransformerConfig; -pub use workflow::{Edge, EdgeData, Node, NodeCommon, NodeData, NodeId}; -pub use workflow::{WorkflowDefinition, WorkflowMetadata}; +// Re-export commonly used types from definition module +pub use definition::{ + CacheSlot, Edge, EdgeData, InputDef, InputProvider, InputSource, Node, NodeCommon, NodeDef, + NodeId, OutputDef, OutputProviderDef, OutputTarget, SwitchBranch, SwitchCondition, SwitchDef, + ValidationError, WorkflowDefinition, WorkflowMetadata, +}; + +// Re-export transform types +pub use transform::Transformer; diff --git a/crates/nvisy-runtime/src/graph/output/mod.rs b/crates/nvisy-runtime/src/graph/output/mod.rs deleted file mode 100644 index 0fa5f4a..0000000 --- a/crates/nvisy-runtime/src/graph/output/mod.rs +++ /dev/null @@ -1,82 +0,0 @@ -//! Output node types for writing data to storage backends, vector databases, or cache. - -use derive_more::From; -use nvisy_dal::DataTypeId; -use serde::{Deserialize, Serialize}; - -use super::route::CacheSlot; -use crate::provider::OutputProviderParams; - -/// Destination for output data. -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] -#[serde(tag = "destination", rename_all = "snake_case")] -pub enum OutputDestination { - /// Write to a storage provider or vector database. - Provider(OutputProviderParams), - /// Write to a cache slot. - Cache(CacheSlot), -} - -/// A data output node that writes or consumes data. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OutputNode { - /// Output destination (provider or cache). - #[serde(flatten)] - pub destination: OutputDestination, -} - -impl OutputNode { - /// Creates a new output node from a provider. - pub fn from_provider(provider: OutputProviderParams) -> Self { - Self { - destination: OutputDestination::Provider(provider), - } - } - - /// Creates a new output node from a cache slot. - pub fn from_cache(slot: CacheSlot) -> Self { - Self { - destination: OutputDestination::Cache(slot), - } - } - - /// Returns the expected input data type based on the destination kind. - /// - /// For cache slots, the type is unknown at compile time. - pub fn input_type(&self) -> Option { - match &self.destination { - OutputDestination::Provider(p) => Some(p.output_type()), - OutputDestination::Cache(_) => None, - } - } - - /// Returns whether this output writes to a provider. - pub const fn is_provider(&self) -> bool { - matches!(self.destination, OutputDestination::Provider(_)) - } - - /// Returns whether this output writes to a cache slot. - pub const fn is_cache(&self) -> bool { - matches!(self.destination, OutputDestination::Cache(_)) - } - - /// Returns the cache slot name if this is a cache output. - pub fn cache_slot(&self) -> Option<&str> { - match &self.destination { - OutputDestination::Cache(slot) => Some(&slot.slot), - _ => None, - } - } -} - -impl From for OutputNode { - fn from(provider: OutputProviderParams) -> Self { - Self::from_provider(provider) - } -} - -impl From for OutputNode { - fn from(slot: CacheSlot) -> Self { - Self::from_cache(slot) - } -} diff --git a/crates/nvisy-runtime/src/graph/route/cache.rs b/crates/nvisy-runtime/src/graph/route/cache.rs deleted file mode 100644 index 4b28dfc..0000000 --- a/crates/nvisy-runtime/src/graph/route/cache.rs +++ /dev/null @@ -1,33 +0,0 @@ -//! Cache slot node type for in-memory data passing. - -use serde::{Deserialize, Serialize}; - -/// A cache slot node that can store and retrieve data within a workflow. -/// -/// Cache slots act as named temporary storage that can be used as both -/// input (read from cache) and output (write to cache) within the same workflow. -/// This enables data sharing between different branches of a workflow graph. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct CacheSlot { - /// Slot identifier (used as the key for storage/retrieval). - pub slot: String, - /// Priority for ordering when multiple slots are available. - #[serde(skip_serializing_if = "Option::is_none")] - pub priority: Option, -} - -impl CacheSlot { - /// Creates a new cache slot with the given slot name. - pub fn new(slot: impl Into) -> Self { - Self { - slot: slot.into(), - priority: None, - } - } - - /// Sets the priority. - pub fn with_priority(mut self, priority: u32) -> Self { - self.priority = Some(priority); - self - } -} diff --git a/crates/nvisy-runtime/src/graph/route/mod.rs b/crates/nvisy-runtime/src/graph/route/mod.rs deleted file mode 100644 index 6659fb7..0000000 --- a/crates/nvisy-runtime/src/graph/route/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -//! Routing nodes for conditional data flow. -//! -//! This module provides nodes for controlling data flow in workflows: -//! - [`CacheSlot`]: Named temporary storage for data sharing between branches -//! - [`SwitchNode`]: Conditional routing based on data properties - -mod cache; -mod switch; - -pub use cache::CacheSlot; -pub use switch::{ContentTypeCategory, DateField, SwitchBranch, SwitchCondition, SwitchNode}; diff --git a/crates/nvisy-runtime/src/graph/transform/chunk.rs b/crates/nvisy-runtime/src/graph/transform/chunk.rs index 41484e1..eda56d3 100644 --- a/crates/nvisy-runtime/src/graph/transform/chunk.rs +++ b/crates/nvisy-runtime/src/graph/transform/chunk.rs @@ -1,10 +1,15 @@ -//! Chunk transformer configuration. +//! Chunk transformer. +use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -/// Configuration for chunking content. +use super::Transform; +use crate::error::Result; +use crate::provider::CredentialsRegistry; + +/// Chunking transformer for splitting content into smaller pieces. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct ChunkConfig { +pub struct Chunk { /// Chunking strategy. #[serde(flatten)] pub chunk_strategy: ChunkStrategy, @@ -17,6 +22,18 @@ pub struct ChunkConfig { pub contextual_chunking: bool, } +impl Transform for Chunk { + async fn transform( + &self, + input: Vec, + _registry: &CredentialsRegistry, + ) -> Result> { + // TODO: Implement chunking based on strategy + // For now, pass through unchanged + Ok(input) + } +} + /// Chunking strategy. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "chunk_strategy", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/transform/derive.rs b/crates/nvisy-runtime/src/graph/transform/derive.rs index ede02f3..34060f1 100644 --- a/crates/nvisy-runtime/src/graph/transform/derive.rs +++ b/crates/nvisy-runtime/src/graph/transform/derive.rs @@ -1,12 +1,15 @@ -//! Derive transformer configuration - generate new content from input. +//! Derive transformer - generate new content from input. +use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use crate::provider::CompletionProviderParams; +use super::Transform; +use crate::error::Result; +use crate::provider::{CompletionProviderParams, CredentialsRegistry}; -/// Configuration for generating new content from input. +/// Derive transformer for generating new content from input. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct DeriveConfig { +pub struct Derive { /// Completion provider parameters (includes credentials_id and model). #[serde(flatten)] pub provider: CompletionProviderParams, @@ -19,6 +22,18 @@ pub struct DeriveConfig { pub override_prompt: Option, } +impl Transform for Derive { + async fn transform( + &self, + input: Vec, + _registry: &CredentialsRegistry, + ) -> Result> { + // TODO: Implement derivation using completion provider + // For now, pass through unchanged + Ok(input) + } +} + /// Tasks for generating new content from input. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs index 978e847..25d747b 100644 --- a/crates/nvisy-runtime/src/graph/transform/embedding.rs +++ b/crates/nvisy-runtime/src/graph/transform/embedding.rs @@ -1,12 +1,15 @@ -//! Embedding transformer configuration. +//! Embedding transformer. +use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use crate::provider::EmbeddingProviderParams; +use super::Transform; +use crate::error::Result; +use crate::provider::{CredentialsRegistry, EmbeddingProviderParams}; -/// Configuration for generating embeddings. +/// Embedding transformer for generating vector embeddings. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct EmbeddingConfig { +pub struct Embedding { /// Embedding provider parameters (includes credentials_id and model). #[serde(flatten)] pub provider: EmbeddingProviderParams, @@ -15,3 +18,15 @@ pub struct EmbeddingConfig { #[serde(default)] pub normalize: bool, } + +impl Transform for Embedding { + async fn transform( + &self, + input: Vec, + _registry: &CredentialsRegistry, + ) -> Result> { + // TODO: Implement embedding generation using provider + // For now, pass through unchanged + Ok(input) + } +} diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs index 631ea48..14e8783 100644 --- a/crates/nvisy-runtime/src/graph/transform/enrich.rs +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -1,12 +1,15 @@ -//! Enrich transformer configuration - add metadata/descriptions to elements. +//! Enrich transformer - add metadata/descriptions to elements. +use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use crate::provider::CompletionProviderParams; +use super::Transform; +use crate::error::Result; +use crate::provider::{CompletionProviderParams, CredentialsRegistry}; -/// Configuration for enriching elements with metadata/descriptions. +/// Enrich transformer for adding metadata/descriptions to elements. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct EnrichConfig { +pub struct Enrich { /// Completion provider parameters (includes credentials_id and model). #[serde(flatten)] pub provider: CompletionProviderParams, @@ -20,6 +23,18 @@ pub struct EnrichConfig { pub override_prompt: Option, } +impl Transform for Enrich { + async fn transform( + &self, + input: Vec, + _registry: &CredentialsRegistry, + ) -> Result> { + // TODO: Implement enrichment using completion provider + // For now, pass through unchanged + Ok(input) + } +} + /// Tasks for adding metadata/descriptions to elements. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "input_type", content = "task", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/transform/extract.rs b/crates/nvisy-runtime/src/graph/transform/extract.rs index 98344b0..f5c5c76 100644 --- a/crates/nvisy-runtime/src/graph/transform/extract.rs +++ b/crates/nvisy-runtime/src/graph/transform/extract.rs @@ -1,12 +1,15 @@ -//! Extract transformer configuration - extract structured data or convert formats. +//! Extract transformer - extract structured data or convert formats. +use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use crate::provider::CompletionProviderParams; +use super::Transform; +use crate::error::Result; +use crate::provider::{CompletionProviderParams, CredentialsRegistry}; -/// Configuration for extracting structured data or converting formats. +/// Extract transformer for extracting structured data or converting formats. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct ExtractConfig { +pub struct Extract { /// Completion provider parameters (includes credentials_id and model). #[serde(flatten)] pub provider: CompletionProviderParams, @@ -20,6 +23,18 @@ pub struct ExtractConfig { pub override_prompt: Option, } +impl Transform for Extract { + async fn transform( + &self, + input: Vec, + _registry: &CredentialsRegistry, + ) -> Result> { + // TODO: Implement extraction using completion provider + // For now, pass through unchanged + Ok(input) + } +} + /// Tasks for extracting structured data or converting formats. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "task_type", content = "task", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs index 18b8c8d..e2001d7 100644 --- a/crates/nvisy-runtime/src/graph/transform/mod.rs +++ b/crates/nvisy-runtime/src/graph/transform/mod.rs @@ -7,30 +7,74 @@ mod enrich; mod extract; mod partition; -pub use chunk::{ChunkConfig, ChunkStrategy}; -pub use derive::{DeriveConfig, DeriveTask}; -pub use embedding::EmbeddingConfig; -pub use enrich::{EnrichConfig, EnrichTask, ImageEnrichTask, TableEnrichTask}; +use std::future::Future; + +pub use chunk::{Chunk, ChunkStrategy}; +pub use derive::{Derive, DeriveTask}; +pub use embedding::Embedding; +pub use enrich::{Enrich, EnrichTask, ImageEnrichTask, TableEnrichTask}; pub use extract::{ - AnalyzeTask, ConvertTask, ExtractConfig, ExtractTask, TableConvertTask, TextConvertTask, + AnalyzeTask, ConvertTask, Extract, ExtractTask, TableConvertTask, TextConvertTask, }; -pub use partition::{PartitionConfig, PartitionStrategy}; +use nvisy_dal::AnyDataValue; +pub use partition::{Partition, PartitionStrategy}; use serde::{Deserialize, Serialize}; -/// Transformer node configuration. +use crate::error::Result; +use crate::provider::CredentialsRegistry; + +/// Trait for transforming data in a workflow pipeline. +/// +/// Transforms take input data items and produce output data items. +/// A single input can produce multiple outputs (e.g., chunking splits one document +/// into many chunks, or embedding generates one vector per chunk). +pub trait Transform { + /// Transforms input data items into output data items. + /// + /// # Arguments + /// * `input` - The input data items to transform + /// * `registry` - Credentials registry for accessing external services + /// + /// # Returns + /// A vector of transformed data items (may be more or fewer than input) + fn transform( + &self, + input: Vec, + registry: &CredentialsRegistry, + ) -> impl Future>> + Send; +} + +/// Transformer node variant. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] -pub enum TransformerConfig { +pub enum Transformer { /// Partition documents into elements. - Partition(PartitionConfig), + Partition(Partition), /// Chunk content into smaller pieces. - Chunk(ChunkConfig), + Chunk(Chunk), /// Generate vector embeddings. - Embedding(EmbeddingConfig), + Embedding(Embedding), /// Enrich elements with metadata/descriptions. - Enrich(EnrichConfig), + Enrich(Enrich), /// Extract structured data or convert formats. - Extract(ExtractConfig), + Extract(Extract), /// Generate new content from input. - Derive(DeriveConfig), + Derive(Derive), +} + +impl Transform for Transformer { + async fn transform( + &self, + input: Vec, + registry: &CredentialsRegistry, + ) -> Result> { + match self { + Self::Partition(t) => t.transform(input, registry).await, + Self::Chunk(t) => t.transform(input, registry).await, + Self::Embedding(t) => t.transform(input, registry).await, + Self::Enrich(t) => t.transform(input, registry).await, + Self::Extract(t) => t.transform(input, registry).await, + Self::Derive(t) => t.transform(input, registry).await, + } + } } diff --git a/crates/nvisy-runtime/src/graph/transform/partition.rs b/crates/nvisy-runtime/src/graph/transform/partition.rs index 02ed0e9..8f6df97 100644 --- a/crates/nvisy-runtime/src/graph/transform/partition.rs +++ b/crates/nvisy-runtime/src/graph/transform/partition.rs @@ -1,10 +1,15 @@ -//! Partition transformer configuration. +//! Partition transformer. +use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -/// Configuration for partitioning documents into elements. +use super::Transform; +use crate::error::Result; +use crate::provider::CredentialsRegistry; + +/// Partition transformer for partitioning documents into elements. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct PartitionConfig { +pub struct Partition { /// Partitioning strategy. pub strategy: PartitionStrategy, @@ -17,6 +22,18 @@ pub struct PartitionConfig { pub discard_unsupported: bool, } +impl Transform for Partition { + async fn transform( + &self, + input: Vec, + _registry: &CredentialsRegistry, + ) -> Result> { + // TODO: Implement document partitioning based on strategy + // For now, pass through unchanged + Ok(input) + } +} + /// Partitioning strategy for document element extraction. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/workflow/definition.rs b/crates/nvisy-runtime/src/graph/workflow/definition.rs deleted file mode 100644 index 150e37e..0000000 --- a/crates/nvisy-runtime/src/graph/workflow/definition.rs +++ /dev/null @@ -1,80 +0,0 @@ -//! Serializable workflow definition. - -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; - -use super::edge::Edge; -use super::metadata::WorkflowMetadata; -use super::node::{NodeData, NodeId}; -use crate::error::{WorkflowError, WorkflowResult}; -use crate::graph::WorkflowGraph; - -/// Serializable workflow definition. -/// -/// This is the JSON-friendly representation of a workflow graph. -/// Use [`WorkflowGraph::to_definition`] and [`WorkflowGraph::from_definition`] -/// to convert between the two representations. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct WorkflowDefinition { - /// Nodes in the workflow, keyed by their ID. - pub nodes: HashMap, - /// Edges connecting nodes. - pub edges: Vec, - /// Workflow metadata. - #[serde(default)] - pub metadata: WorkflowMetadata, -} - -impl WorkflowDefinition { - /// Creates a new empty workflow definition. - pub fn new() -> Self { - Self { - nodes: HashMap::new(), - edges: Vec::new(), - metadata: WorkflowMetadata::default(), - } - } - - /// Creates a workflow definition with metadata. - pub fn with_metadata(metadata: WorkflowMetadata) -> Self { - Self { - nodes: HashMap::new(), - edges: Vec::new(), - metadata, - } - } - - /// Converts this definition into a workflow graph. - /// - /// Returns an error if any edge references a non-existent node. - pub fn into_graph(self) -> WorkflowResult { - WorkflowGraph::from_definition(self) - } -} - -impl Default for WorkflowDefinition { - fn default() -> Self { - Self::new() - } -} - -impl TryFrom for WorkflowGraph { - type Error = WorkflowError; - - fn try_from(definition: WorkflowDefinition) -> Result { - Self::from_definition(definition) - } -} - -impl From<&WorkflowGraph> for WorkflowDefinition { - fn from(graph: &WorkflowGraph) -> Self { - graph.to_definition() - } -} - -impl From for WorkflowDefinition { - fn from(graph: WorkflowGraph) -> Self { - graph.to_definition() - } -} diff --git a/crates/nvisy-runtime/src/graph/workflow/mod.rs b/crates/nvisy-runtime/src/graph/workflow/mod.rs deleted file mode 100644 index 3c7cb3c..0000000 --- a/crates/nvisy-runtime/src/graph/workflow/mod.rs +++ /dev/null @@ -1,18 +0,0 @@ -//! Workflow graph types. -//! -//! This module provides: -//! - [`WorkflowGraph`]: Runtime graph representation using petgraph -//! - [`WorkflowDefinition`]: Serializable JSON-friendly definition -//! - [`WorkflowMetadata`]: Workflow metadata (name, description, version, etc.) -//! - [`Node`], [`NodeId`], [`NodeData`]: Node types and identifiers -//! - [`Edge`], [`EdgeData`]: Edge types - -mod definition; -mod edge; -mod metadata; -mod node; - -pub use definition::WorkflowDefinition; -pub use edge::{Edge, EdgeData}; -pub use metadata::WorkflowMetadata; -pub use node::{Node, NodeCommon, NodeData, NodeId}; diff --git a/crates/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs index 1edc6fe..6d6ce21 100644 --- a/crates/nvisy-runtime/src/lib.rs +++ b/crates/nvisy-runtime/src/lib.rs @@ -7,7 +7,7 @@ mod error; pub mod graph; pub mod provider; -pub use error::{WorkflowError, WorkflowResult}; +pub use error::{Error, Result}; /// Tracing target for runtime operations. pub const TRACING_TARGET: &str = "nvisy_runtime"; diff --git a/crates/nvisy-runtime/src/provider/ai.rs b/crates/nvisy-runtime/src/provider/ai.rs index bd2d08a..cee8ffe 100644 --- a/crates/nvisy-runtime/src/provider/ai.rs +++ b/crates/nvisy-runtime/src/provider/ai.rs @@ -12,7 +12,7 @@ use super::backend::{ IntoProvider, OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams, PerplexityCompletionParams, PerplexityCredentials, }; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// Completion provider parameters. #[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] @@ -54,18 +54,19 @@ impl CompletionProviderParams { } } +#[async_trait::async_trait] impl IntoProvider for CompletionProviderParams { type Credentials = ProviderCredentials; type Output = CompletionProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { match (self, credentials) { - (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c), - (Self::Anthropic(p), ProviderCredentials::Anthropic(c)) => p.into_provider(c), - (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c), - (Self::Gemini(p), ProviderCredentials::Gemini(c)) => p.into_provider(c), - (Self::Perplexity(p), ProviderCredentials::Perplexity(c)) => p.into_provider(c), - (params, creds) => Err(WorkflowError::Internal(format!( + (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c).await, + (Self::Anthropic(p), ProviderCredentials::Anthropic(c)) => p.into_provider(c).await, + (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c).await, + (Self::Gemini(p), ProviderCredentials::Gemini(c)) => p.into_provider(c).await, + (Self::Perplexity(p), ProviderCredentials::Perplexity(c)) => p.into_provider(c).await, + (params, creds) => Err(Error::Internal(format!( "credentials type mismatch: expected '{}', got '{}'", params.kind(), creds.kind() @@ -115,16 +116,17 @@ impl EmbeddingProviderParams { } } +#[async_trait::async_trait] impl IntoProvider for EmbeddingProviderParams { type Credentials = ProviderCredentials; type Output = EmbeddingProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { match (self, credentials) { - (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c), - (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c), - (Self::Gemini(p), ProviderCredentials::Gemini(c)) => p.into_provider(c), - (params, creds) => Err(WorkflowError::Internal(format!( + (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c).await, + (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c).await, + (Self::Gemini(p), ProviderCredentials::Gemini(c)) => p.into_provider(c).await, + (params, creds) => Err(Error::Internal(format!( "credentials type mismatch: expected '{}', got '{}'", params.kind(), creds.kind() diff --git a/crates/nvisy-runtime/src/provider/backend/anthropic.rs b/crates/nvisy-runtime/src/provider/backend/anthropic.rs index f8b5b65..663c04a 100644 --- a/crates/nvisy-runtime/src/provider/backend/anthropic.rs +++ b/crates/nvisy-runtime/src/provider/backend/anthropic.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// Anthropic credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -33,16 +33,16 @@ impl AnthropicCompletionParams { } } +#[async_trait::async_trait] impl IntoProvider for AnthropicCompletionParams { type Credentials = AnthropicCredentials; type Output = CompletionProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let rig_creds = nvisy_rig::provider::CompletionCredentials::Anthropic { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::Anthropic(self.model); - CompletionProvider::new(&rig_creds, &model) - .map_err(|e| WorkflowError::Internal(e.to_string())) + CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/azblob.rs b/crates/nvisy-runtime/src/provider/backend/azblob.rs index 9b8a94b..19d2636 100644 --- a/crates/nvisy-runtime/src/provider/backend/azblob.rs +++ b/crates/nvisy-runtime/src/provider/backend/azblob.rs @@ -1,11 +1,11 @@ //! Azure Blob Storage provider. -use nvisy_dal::provider::AzblobConfig; +use nvisy_dal::provider::{AzblobConfig, AzblobProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// Azure Blob Storage credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -32,11 +32,12 @@ pub struct AzblobParams { pub prefix: Option, } +#[async_trait::async_trait] impl IntoProvider for AzblobParams { type Credentials = AzblobCredentials; - type Output = AzblobConfig; + type Output = AzblobProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let mut config = AzblobConfig::new(credentials.account_name, self.container); if let Some(account_key) = credentials.account_key { @@ -49,6 +50,6 @@ impl IntoProvider for AzblobParams { config = config.with_prefix(prefix); } - Ok(config) + AzblobProvider::new(&config).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/cohere.rs b/crates/nvisy-runtime/src/provider/backend/cohere.rs index c358525..85a0c09 100644 --- a/crates/nvisy-runtime/src/provider/backend/cohere.rs +++ b/crates/nvisy-runtime/src/provider/backend/cohere.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// Cohere credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -35,17 +35,17 @@ impl CohereCompletionParams { } } +#[async_trait::async_trait] impl IntoProvider for CohereCompletionParams { type Credentials = CohereCredentials; type Output = CompletionProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let rig_creds = nvisy_rig::provider::CompletionCredentials::Cohere { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::Cohere(self.model); - CompletionProvider::new(&rig_creds, &model) - .map_err(|e| WorkflowError::Internal(e.to_string())) + CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) } } @@ -68,16 +68,16 @@ impl CohereEmbeddingParams { } } +#[async_trait::async_trait] impl IntoProvider for CohereEmbeddingParams { type Credentials = CohereCredentials; type Output = EmbeddingProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let rig_creds = nvisy_rig::provider::EmbeddingCredentials::Cohere { api_key: credentials.api_key, }; let model = nvisy_rig::provider::EmbeddingModel::Cohere(self.model); - EmbeddingProvider::new(&rig_creds, &model) - .map_err(|e| WorkflowError::Internal(e.to_string())) + EmbeddingProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/gcs.rs b/crates/nvisy-runtime/src/provider/backend/gcs.rs index cec5247..b83b45e 100644 --- a/crates/nvisy-runtime/src/provider/backend/gcs.rs +++ b/crates/nvisy-runtime/src/provider/backend/gcs.rs @@ -1,11 +1,11 @@ //! Google Cloud Storage provider. -use nvisy_dal::provider::GcsConfig; +use nvisy_dal::provider::{GcsConfig, GcsProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// Google Cloud Storage credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -26,17 +26,18 @@ pub struct GcsParams { pub prefix: Option, } +#[async_trait::async_trait] impl IntoProvider for GcsParams { type Credentials = GcsCredentials; - type Output = GcsConfig; + type Output = GcsProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let mut config = GcsConfig::new(self.bucket).with_credentials(credentials.credentials_json); if let Some(prefix) = self.prefix { config = config.with_prefix(prefix); } - Ok(config) + GcsProvider::new(&config).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/gemini.rs b/crates/nvisy-runtime/src/provider/backend/gemini.rs index 9c98778..8c38450 100644 --- a/crates/nvisy-runtime/src/provider/backend/gemini.rs +++ b/crates/nvisy-runtime/src/provider/backend/gemini.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// Gemini credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -35,17 +35,17 @@ impl GeminiCompletionParams { } } +#[async_trait::async_trait] impl IntoProvider for GeminiCompletionParams { type Credentials = GeminiCredentials; type Output = CompletionProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let rig_creds = nvisy_rig::provider::CompletionCredentials::Gemini { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::Gemini(self.model); - CompletionProvider::new(&rig_creds, &model) - .map_err(|e| WorkflowError::Internal(e.to_string())) + CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) } } @@ -68,16 +68,16 @@ impl GeminiEmbeddingParams { } } +#[async_trait::async_trait] impl IntoProvider for GeminiEmbeddingParams { type Credentials = GeminiCredentials; type Output = EmbeddingProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let rig_creds = nvisy_rig::provider::EmbeddingCredentials::Gemini { api_key: credentials.api_key, }; let model = nvisy_rig::provider::EmbeddingModel::Gemini(self.model); - EmbeddingProvider::new(&rig_creds, &model) - .map_err(|e| WorkflowError::Internal(e.to_string())) + EmbeddingProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/milvus.rs b/crates/nvisy-runtime/src/provider/backend/milvus.rs index 37e094a..9893339 100644 --- a/crates/nvisy-runtime/src/provider/backend/milvus.rs +++ b/crates/nvisy-runtime/src/provider/backend/milvus.rs @@ -1,11 +1,11 @@ //! Milvus vector database provider. -use nvisy_dal::provider::MilvusConfig; +use nvisy_dal::provider::{MilvusConfig, MilvusProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// Default Milvus port. fn default_milvus_port() -> u16 { @@ -43,11 +43,12 @@ pub struct MilvusParams { pub dimensions: Option, } +#[async_trait::async_trait] impl IntoProvider for MilvusParams { type Credentials = MilvusCredentials; - type Output = MilvusConfig; + type Output = MilvusProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let mut config = MilvusConfig::new(credentials.host) .with_port(credentials.port) .with_collection(self.collection); @@ -62,6 +63,8 @@ impl IntoProvider for MilvusParams { config = config.with_dimensions(dimensions); } - Ok(config) + MilvusProvider::new(&config) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/mod.rs b/crates/nvisy-runtime/src/provider/backend/mod.rs index 627804a..c755dec 100644 --- a/crates/nvisy-runtime/src/provider/backend/mod.rs +++ b/crates/nvisy-runtime/src/provider/backend/mod.rs @@ -3,26 +3,26 @@ //! Each provider file contains credentials and params for a specific backend: //! //! ## Storage backends -//! - [`s3`]: Amazon S3 -//! - [`gcs`]: Google Cloud Storage -//! - [`azblob`]: Azure Blob Storage -//! - [`postgres`]: PostgreSQL -//! - [`mysql`]: MySQL +//! - `s3` - Amazon S3 +//! - `gcs` - Google Cloud Storage +//! - `azblob` - Azure Blob Storage +//! - `postgres` - PostgreSQL +//! - `mysql` - MySQL //! //! ## Vector databases -//! - [`qdrant`]: Qdrant vector database -//! - [`pinecone`]: Pinecone vector database -//! - [`milvus`]: Milvus vector database -//! - [`pgvector`]: pgvector (PostgreSQL extension) +//! - `qdrant` - Qdrant vector database +//! - `pinecone` - Pinecone vector database +//! - `milvus` - Milvus vector database +//! - `pgvector` - pgvector (PostgreSQL extension) //! //! ## AI providers -//! - [`openai`]: OpenAI (completion + embedding) -//! - [`anthropic`]: Anthropic (completion only) -//! - [`cohere`]: Cohere (completion + embedding) -//! - [`gemini`]: Google Gemini (completion + embedding) -//! - [`perplexity`]: Perplexity (completion only) +//! - `openai` - OpenAI (completion + embedding) +//! - `anthropic` - Anthropic (completion only) +//! - `cohere` - Cohere (completion + embedding) +//! - `gemini` - Google Gemini (completion + embedding) +//! - `perplexity` - Perplexity (completion only) -use crate::error::WorkflowResult; +use crate::error::Result; // Storage backends mod azblob; @@ -64,13 +64,14 @@ pub use gemini::{GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParam pub use openai::{OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams}; pub use perplexity::{PerplexityCompletionParams, PerplexityCredentials}; -/// Trait for provider parameters that can be combined with credentials to create a provider/config. +/// Trait for provider parameters that can be combined with credentials to create a provider. +#[async_trait::async_trait] pub trait IntoProvider { /// The credentials type required by this provider. - type Credentials; - /// The output type (provider instance or config). + type Credentials: Send; + /// The output type (provider instance). type Output; - /// Combines params with credentials to create the output. - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult; + /// Combines params with credentials to create the provider. + async fn into_provider(self, credentials: Self::Credentials) -> Result; } diff --git a/crates/nvisy-runtime/src/provider/backend/mysql.rs b/crates/nvisy-runtime/src/provider/backend/mysql.rs index e76777f..c2686f5 100644 --- a/crates/nvisy-runtime/src/provider/backend/mysql.rs +++ b/crates/nvisy-runtime/src/provider/backend/mysql.rs @@ -1,11 +1,11 @@ //! MySQL provider. -use nvisy_dal::provider::MysqlConfig; +use nvisy_dal::provider::{MysqlConfig, MysqlProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// MySQL credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -26,17 +26,18 @@ pub struct MysqlParams { pub database: Option, } +#[async_trait::async_trait] impl IntoProvider for MysqlParams { type Credentials = MysqlCredentials; - type Output = MysqlConfig; + type Output = MysqlProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let mut config = MysqlConfig::new(credentials.connection_string).with_table(self.table); if let Some(database) = self.database { config = config.with_database(database); } - Ok(config) + MysqlProvider::new(&config).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/openai.rs b/crates/nvisy-runtime/src/provider/backend/openai.rs index fd1fcf6..36dc484 100644 --- a/crates/nvisy-runtime/src/provider/backend/openai.rs +++ b/crates/nvisy-runtime/src/provider/backend/openai.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// OpenAI credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -35,17 +35,17 @@ impl OpenAiCompletionParams { } } +#[async_trait::async_trait] impl IntoProvider for OpenAiCompletionParams { type Credentials = OpenAiCredentials; type Output = CompletionProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let rig_creds = nvisy_rig::provider::CompletionCredentials::OpenAi { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::OpenAi(self.model); - CompletionProvider::new(&rig_creds, &model) - .map_err(|e| WorkflowError::Internal(e.to_string())) + CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) } } @@ -68,16 +68,16 @@ impl OpenAiEmbeddingParams { } } +#[async_trait::async_trait] impl IntoProvider for OpenAiEmbeddingParams { type Credentials = OpenAiCredentials; type Output = EmbeddingProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let rig_creds = nvisy_rig::provider::EmbeddingCredentials::OpenAi { api_key: credentials.api_key, }; let model = nvisy_rig::provider::EmbeddingModel::OpenAi(self.model); - EmbeddingProvider::new(&rig_creds, &model) - .map_err(|e| WorkflowError::Internal(e.to_string())) + EmbeddingProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/perplexity.rs b/crates/nvisy-runtime/src/provider/backend/perplexity.rs index 763f2d6..03106a1 100644 --- a/crates/nvisy-runtime/src/provider/backend/perplexity.rs +++ b/crates/nvisy-runtime/src/provider/backend/perplexity.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// Perplexity credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -33,16 +33,16 @@ impl PerplexityCompletionParams { } } +#[async_trait::async_trait] impl IntoProvider for PerplexityCompletionParams { type Credentials = PerplexityCredentials; type Output = CompletionProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let rig_creds = nvisy_rig::provider::CompletionCredentials::Perplexity { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::Perplexity(self.model); - CompletionProvider::new(&rig_creds, &model) - .map_err(|e| WorkflowError::Internal(e.to_string())) + CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/pgvector.rs b/crates/nvisy-runtime/src/provider/backend/pgvector.rs index eefdcd3..8bce688 100644 --- a/crates/nvisy-runtime/src/provider/backend/pgvector.rs +++ b/crates/nvisy-runtime/src/provider/backend/pgvector.rs @@ -1,11 +1,11 @@ //! pgvector (PostgreSQL extension) provider. -use nvisy_dal::provider::PgVectorConfig; +use nvisy_dal::provider::{PgVectorConfig, PgVectorProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// pgvector credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -25,11 +25,16 @@ pub struct PgVectorParams { pub dimensions: usize, } +#[async_trait::async_trait] impl IntoProvider for PgVectorParams { type Credentials = PgVectorCredentials; - type Output = PgVectorConfig; + type Output = PgVectorProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { - Ok(PgVectorConfig::new(credentials.connection_url, self.dimensions).with_table(self.table)) + async fn into_provider(self, credentials: Self::Credentials) -> Result { + let config = + PgVectorConfig::new(credentials.connection_url, self.dimensions).with_table(self.table); + PgVectorProvider::new(&config) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/pinecone.rs b/crates/nvisy-runtime/src/provider/backend/pinecone.rs index 6858a44..f9ab4dd 100644 --- a/crates/nvisy-runtime/src/provider/backend/pinecone.rs +++ b/crates/nvisy-runtime/src/provider/backend/pinecone.rs @@ -1,11 +1,11 @@ //! Pinecone vector database provider. -use nvisy_dal::provider::PineconeConfig; +use nvisy_dal::provider::{PineconeConfig, PineconeProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// Pinecone credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -31,11 +31,12 @@ pub struct PineconeParams { pub dimensions: Option, } +#[async_trait::async_trait] impl IntoProvider for PineconeParams { type Credentials = PineconeCredentials; - type Output = PineconeConfig; + type Output = PineconeProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let mut config = PineconeConfig::new(credentials.api_key, credentials.environment, self.index); @@ -46,6 +47,8 @@ impl IntoProvider for PineconeParams { config = config.with_dimensions(dimensions); } - Ok(config) + PineconeProvider::new(&config) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/postgres.rs b/crates/nvisy-runtime/src/provider/backend/postgres.rs index 537a976..22f6290 100644 --- a/crates/nvisy-runtime/src/provider/backend/postgres.rs +++ b/crates/nvisy-runtime/src/provider/backend/postgres.rs @@ -1,11 +1,11 @@ //! PostgreSQL provider. -use nvisy_dal::provider::PostgresConfig; +use nvisy_dal::provider::{PostgresConfig, PostgresProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// PostgreSQL credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -26,17 +26,18 @@ pub struct PostgresParams { pub schema: Option, } +#[async_trait::async_trait] impl IntoProvider for PostgresParams { type Credentials = PostgresCredentials; - type Output = PostgresConfig; + type Output = PostgresProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let mut config = PostgresConfig::new(credentials.connection_string).with_table(self.table); if let Some(schema) = self.schema { config = config.with_schema(schema); } - Ok(config) + PostgresProvider::new(&config).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/qdrant.rs b/crates/nvisy-runtime/src/provider/backend/qdrant.rs index 0c48d0a..7e83784 100644 --- a/crates/nvisy-runtime/src/provider/backend/qdrant.rs +++ b/crates/nvisy-runtime/src/provider/backend/qdrant.rs @@ -1,11 +1,11 @@ //! Qdrant vector database provider. -use nvisy_dal::provider::QdrantConfig; +use nvisy_dal::provider::{QdrantConfig, QdrantProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// Qdrant credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -29,11 +29,12 @@ pub struct QdrantParams { pub dimensions: Option, } +#[async_trait::async_trait] impl IntoProvider for QdrantParams { type Credentials = QdrantCredentials; - type Output = QdrantConfig; + type Output = QdrantProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let mut config = QdrantConfig::new(credentials.url).with_collection(self.collection); if let Some(api_key) = credentials.api_key { @@ -43,6 +44,8 @@ impl IntoProvider for QdrantParams { config = config.with_dimensions(dimensions); } - Ok(config) + QdrantProvider::new(&config) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/s3.rs b/crates/nvisy-runtime/src/provider/backend/s3.rs index 2931018..765d880 100644 --- a/crates/nvisy-runtime/src/provider/backend/s3.rs +++ b/crates/nvisy-runtime/src/provider/backend/s3.rs @@ -1,11 +1,11 @@ //! Amazon S3 provider. -use nvisy_dal::provider::S3Config; +use nvisy_dal::provider::{S3Config, S3Provider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::IntoProvider; -use crate::error::WorkflowResult; +use crate::error::{Error, Result}; /// Amazon S3 credentials. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -33,11 +33,12 @@ pub struct S3Params { pub prefix: Option, } +#[async_trait::async_trait] impl IntoProvider for S3Params { type Credentials = S3Credentials; - type Output = S3Config; + type Output = S3Provider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { let mut config = S3Config::new(self.bucket, credentials.region) .with_credentials(credentials.access_key_id, credentials.secret_access_key); @@ -48,6 +49,6 @@ impl IntoProvider for S3Params { config = config.with_prefix(prefix); } - Ok(config) + S3Provider::new(&config).map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/inputs.rs b/crates/nvisy-runtime/src/provider/inputs.rs index c6889e9..edb2e52 100644 --- a/crates/nvisy-runtime/src/provider/inputs.rs +++ b/crates/nvisy-runtime/src/provider/inputs.rs @@ -3,8 +3,7 @@ use derive_more::From; use nvisy_dal::core::Context; use nvisy_dal::provider::{ - AzblobConfig, AzblobProvider, GcsConfig, GcsProvider, MysqlConfig, MysqlProvider, - PostgresConfig, PostgresProvider, S3Config, S3Provider, + AzblobProvider, GcsProvider, MysqlProvider, PostgresProvider, S3Provider, }; use nvisy_dal::{AnyDataValue, DataTypeId}; use serde::{Deserialize, Serialize}; @@ -14,7 +13,7 @@ use super::ProviderCredentials; use super::backend::{ AzblobParams, GcsParams, IntoProvider, MysqlParams, PostgresParams, S3Params, }; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// Input provider parameters (storage backends only, no vector DBs). #[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] @@ -64,28 +63,29 @@ impl InputProviderParams { } } +#[async_trait::async_trait] impl IntoProvider for InputProviderParams { type Credentials = ProviderCredentials; - type Output = InputProviderConfig; + type Output = InputProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { match (self, credentials) { (Self::S3(p), ProviderCredentials::S3(c)) => { - Ok(InputProviderConfig::S3(p.into_provider(c)?)) + Ok(InputProvider::S3(p.into_provider(c).await?)) } (Self::Gcs(p), ProviderCredentials::Gcs(c)) => { - Ok(InputProviderConfig::Gcs(p.into_provider(c)?)) + Ok(InputProvider::Gcs(p.into_provider(c).await?)) } (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { - Ok(InputProviderConfig::Azblob(p.into_provider(c)?)) + Ok(InputProvider::Azblob(p.into_provider(c).await?)) } (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { - Ok(InputProviderConfig::Postgres(p.into_provider(c)?)) + Ok(InputProvider::Postgres(p.into_provider(c).await?)) } (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { - Ok(InputProviderConfig::Mysql(p.into_provider(c)?)) + Ok(InputProvider::Mysql(p.into_provider(c).await?)) } - (params, creds) => Err(WorkflowError::Internal(format!( + (params, creds) => Err(Error::Internal(format!( "credentials type mismatch: expected '{}', got '{}'", params.kind(), creds.kind() @@ -94,39 +94,6 @@ impl IntoProvider for InputProviderParams { } } -/// Resolved input provider config (params + credentials combined). -#[derive(Debug, Clone)] -pub enum InputProviderConfig { - S3(S3Config), - Gcs(GcsConfig), - Azblob(AzblobConfig), - Postgres(PostgresConfig), - Mysql(MysqlConfig), -} - -impl InputProviderConfig { - /// Creates an input provider from this config. - pub fn into_provider(self) -> WorkflowResult { - match self { - Self::S3(config) => S3Provider::new(&config) - .map(InputProvider::S3) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Gcs(config) => GcsProvider::new(&config) - .map(InputProvider::Gcs) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Azblob(config) => AzblobProvider::new(&config) - .map(InputProvider::Azblob) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Postgres(config) => PostgresProvider::new(&config) - .map(InputProvider::Postgres) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Mysql(config) => MysqlProvider::new(&config) - .map(InputProvider::Mysql) - .map_err(|e| WorkflowError::Internal(e.to_string())), - } - } -} - /// Input provider instance (created from config). #[derive(Debug, Clone)] pub enum InputProvider { @@ -146,8 +113,24 @@ impl InputProvider { } } + /// Reads data from the provider as a stream. + /// + /// Returns a boxed stream of type-erased values that can be processed incrementally. + pub async fn read_stream( + &self, + ctx: &Context, + ) -> Result>> { + match self { + Self::S3(p) => read_stream!(p, ctx, Blob), + Self::Gcs(p) => read_stream!(p, ctx, Blob), + Self::Azblob(p) => read_stream!(p, ctx, Blob), + Self::Postgres(p) => read_stream!(p, ctx, Record), + Self::Mysql(p) => read_stream!(p, ctx, Record), + } + } + /// Reads data from the provider, returning type-erased values. - pub async fn read(&self, ctx: &Context) -> WorkflowResult> { + pub async fn read(&self, ctx: &Context) -> Result> { match self { Self::S3(p) => read_data!(p, ctx, Blob), Self::Gcs(p) => read_data!(p, ctx, Blob), @@ -158,6 +141,24 @@ impl InputProvider { } } +/// Helper macro to read data from a provider as a boxed stream of AnyDataValue. +macro_rules! read_stream { + ($provider:expr, $ctx:expr, $variant:ident) => {{ + use futures::StreamExt; + use nvisy_dal::core::DataInput; + + let stream = $provider + .read($ctx) + .await + .map_err(|e| Error::Internal(e.to_string()))?; + + let mapped = stream.map(|result| result.map(AnyDataValue::$variant)); + Ok(Box::pin(mapped) as futures::stream::BoxStream<'static, _>) + }}; +} + +use read_stream; + /// Helper macro to read data from a provider and convert to AnyDataValue. macro_rules! read_data { ($provider:expr, $ctx:expr, $variant:ident) => {{ @@ -168,14 +169,14 @@ macro_rules! read_data { let stream = $provider .read($ctx) .await - .map_err(|e| WorkflowError::Internal(e.to_string()))?; + .map_err(|e| Error::Internal(e.to_string()))?; let items: Vec<$variant> = stream .collect::>() .await .into_iter() - .collect::, _>>() - .map_err(|e| WorkflowError::Internal(e.to_string()))?; + .collect::, _>>() + .map_err(|e| Error::Internal(e.to_string()))?; Ok(items.into_iter().map(AnyDataValue::$variant).collect()) }}; diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs index c8aaefa..9fe8276 100644 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -10,10 +10,10 @@ //! # Module Structure //! //! - [`backend`]: Individual provider implementations (credentials + params) -//! - [`inputs`]: Input provider types and read operations -//! - [`outputs`]: Output provider types and write operations -//! - [`ai`]: AI provider types (completion + embedding) -//! - [`registry`]: Credentials registry for workflow execution +//! - `inputs`: Input provider types and read operations +//! - `outputs`: Output provider types and write operations +//! - `ai`: AI provider types (completion + embedding) +//! - `registry`: Credentials registry for workflow execution mod ai; pub mod backend; @@ -43,8 +43,8 @@ pub use backend::{ }; use derive_more::From; -pub use inputs::{InputProvider, InputProviderConfig, InputProviderParams}; -pub use outputs::{OutputProvider, OutputProviderConfig, OutputProviderParams}; +pub use inputs::{InputProvider, InputProviderParams}; +pub use outputs::{OutputProvider, OutputProviderParams}; pub use registry::CredentialsRegistry; use serde::{Deserialize, Serialize}; diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs index a21f81f..30cbd19 100644 --- a/crates/nvisy-runtime/src/provider/outputs.rs +++ b/crates/nvisy-runtime/src/provider/outputs.rs @@ -1,22 +1,29 @@ //! Output provider types and implementations. +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context as TaskContext, Poll}; + use derive_more::From; +use futures::Sink; use nvisy_dal::core::Context; use nvisy_dal::provider::{ - AzblobConfig, AzblobProvider, GcsConfig, GcsProvider, MilvusConfig, MilvusProvider, - MysqlConfig, MysqlProvider, PgVectorConfig, PgVectorProvider, PineconeConfig, PineconeProvider, - PostgresConfig, PostgresProvider, QdrantConfig, QdrantProvider, S3Config, S3Provider, + AzblobProvider, GcsProvider, MilvusProvider, MysqlProvider, PgVectorProvider, PineconeProvider, + PostgresProvider, QdrantProvider, S3Provider, }; use nvisy_dal::{AnyDataValue, DataTypeId}; use serde::{Deserialize, Serialize}; +use tokio::sync::Mutex; use uuid::Uuid; +use crate::graph::compiled::DataSink; + use super::ProviderCredentials; use super::backend::{ AzblobParams, GcsParams, IntoProvider, MilvusParams, MysqlParams, PgVectorParams, PineconeParams, PostgresParams, QdrantParams, S3Params, }; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// Output provider parameters (storage backends + vector DBs). #[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] @@ -85,40 +92,41 @@ impl OutputProviderParams { } } +#[async_trait::async_trait] impl IntoProvider for OutputProviderParams { type Credentials = ProviderCredentials; - type Output = OutputProviderConfig; + type Output = OutputProvider; - fn into_provider(self, credentials: Self::Credentials) -> WorkflowResult { + async fn into_provider(self, credentials: Self::Credentials) -> Result { match (self, credentials) { (Self::S3(p), ProviderCredentials::S3(c)) => { - Ok(OutputProviderConfig::S3(p.into_provider(c)?)) + Ok(OutputProvider::S3(p.into_provider(c).await?)) } (Self::Gcs(p), ProviderCredentials::Gcs(c)) => { - Ok(OutputProviderConfig::Gcs(p.into_provider(c)?)) + Ok(OutputProvider::Gcs(p.into_provider(c).await?)) } (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { - Ok(OutputProviderConfig::Azblob(p.into_provider(c)?)) + Ok(OutputProvider::Azblob(p.into_provider(c).await?)) } (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { - Ok(OutputProviderConfig::Postgres(p.into_provider(c)?)) + Ok(OutputProvider::Postgres(p.into_provider(c).await?)) } (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { - Ok(OutputProviderConfig::Mysql(p.into_provider(c)?)) + Ok(OutputProvider::Mysql(p.into_provider(c).await?)) } (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => { - Ok(OutputProviderConfig::Qdrant(p.into_provider(c)?)) + Ok(OutputProvider::Qdrant(p.into_provider(c).await?)) } (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => { - Ok(OutputProviderConfig::Pinecone(p.into_provider(c)?)) + Ok(OutputProvider::Pinecone(p.into_provider(c).await?)) } (Self::Milvus(p), ProviderCredentials::Milvus(c)) => { - Ok(OutputProviderConfig::Milvus(p.into_provider(c)?)) + Ok(OutputProvider::Milvus(p.into_provider(c).await?)) } (Self::PgVector(p), ProviderCredentials::PgVector(c)) => { - Ok(OutputProviderConfig::PgVector(p.into_provider(c)?)) + Ok(OutputProvider::PgVector(p.into_provider(c).await?)) } - (params, creds) => Err(WorkflowError::Internal(format!( + (params, creds) => Err(Error::Internal(format!( "credentials type mismatch: expected '{}', got '{}'", params.kind(), creds.kind() @@ -127,59 +135,6 @@ impl IntoProvider for OutputProviderParams { } } -/// Resolved output provider config (params + credentials combined). -#[derive(Debug, Clone)] -pub enum OutputProviderConfig { - S3(S3Config), - Gcs(GcsConfig), - Azblob(AzblobConfig), - Postgres(PostgresConfig), - Mysql(MysqlConfig), - Qdrant(QdrantConfig), - Pinecone(PineconeConfig), - Milvus(MilvusConfig), - PgVector(PgVectorConfig), -} - -impl OutputProviderConfig { - /// Creates an output provider from this config. - pub async fn into_provider(self) -> WorkflowResult { - match self { - Self::S3(config) => S3Provider::new(&config) - .map(OutputProvider::S3) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Gcs(config) => GcsProvider::new(&config) - .map(OutputProvider::Gcs) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Azblob(config) => AzblobProvider::new(&config) - .map(OutputProvider::Azblob) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Postgres(config) => PostgresProvider::new(&config) - .map(OutputProvider::Postgres) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Mysql(config) => MysqlProvider::new(&config) - .map(OutputProvider::Mysql) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Qdrant(config) => QdrantProvider::new(&config) - .await - .map(OutputProvider::Qdrant) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Pinecone(config) => PineconeProvider::new(&config) - .await - .map(OutputProvider::Pinecone) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::Milvus(config) => MilvusProvider::new(&config) - .await - .map(OutputProvider::Milvus) - .map_err(|e| WorkflowError::Internal(e.to_string())), - Self::PgVector(config) => PgVectorProvider::new(&config) - .await - .map(OutputProvider::PgVector) - .map_err(|e| WorkflowError::Internal(e.to_string())), - } - } -} - /// Output provider instance (created from config). #[derive(Debug)] pub enum OutputProvider { @@ -206,8 +161,16 @@ impl OutputProvider { } } + /// Creates a sink for streaming writes to the provider. + /// + /// The sink buffers items and writes them on flush/close. + pub async fn write_sink(self, ctx: &Context) -> Result { + let sink = ProviderSink::new(self, ctx.clone()); + Ok(Box::pin(sink)) + } + /// Writes data to the provider, accepting type-erased values. - pub async fn write(&self, ctx: &Context, data: Vec) -> WorkflowResult<()> { + pub async fn write(&self, ctx: &Context, data: Vec) -> Result<()> { match self { Self::S3(p) => write_data!(p, ctx, data, Blob, into_blob), Self::Gcs(p) => write_data!(p, ctx, data, Blob, into_blob), @@ -222,6 +185,94 @@ impl OutputProvider { } } +/// A sink that buffers items and writes them to an output provider. +struct ProviderSink { + provider: Arc, + ctx: Context, + buffer: Arc>>, + flush_future: Option> + Send>>>, +} + +impl ProviderSink { + fn new(provider: OutputProvider, ctx: Context) -> Self { + Self { + provider: Arc::new(provider), + ctx, + buffer: Arc::new(Mutex::new(Vec::new())), + flush_future: None, + } + } +} + +impl Sink for ProviderSink { + type Error = Error; + + fn poll_ready( + self: Pin<&mut Self>, + _cx: &mut TaskContext<'_>, + ) -> Poll> { + Poll::Ready(Ok(())) + } + + fn start_send( + self: Pin<&mut Self>, + item: AnyDataValue, + ) -> std::result::Result<(), Self::Error> { + let buffer = self.buffer.clone(); + // Use blocking lock since we're in a sync context + if let Ok(mut guard) = buffer.try_lock() { + guard.push(item); + Ok(()) + } else { + Err(Error::Internal("buffer lock contention".into())) + } + } + + fn poll_flush( + mut self: Pin<&mut Self>, + cx: &mut TaskContext<'_>, + ) -> Poll> { + // If we have an in-progress flush, poll it + if let Some(ref mut future) = self.flush_future { + return match future.as_mut().poll(cx) { + Poll::Ready(result) => { + self.flush_future = None; + Poll::Ready(result) + } + Poll::Pending => Poll::Pending, + }; + } + + // Take items from buffer and start write + let buffer = self.buffer.clone(); + let provider = self.provider.clone(); + let ctx = self.ctx.clone(); + + let future = Box::pin(async move { + let items = { + let mut guard = buffer.lock().await; + std::mem::take(&mut *guard) + }; + + if items.is_empty() { + return Ok(()); + } + + provider.write(&ctx, items).await + }); + + self.flush_future = Some(future); + self.poll_flush(cx) + } + + fn poll_close( + self: Pin<&mut Self>, + cx: &mut TaskContext<'_>, + ) -> Poll> { + self.poll_flush(cx) + } +} + /// Helper macro to write data to a provider from AnyDataValue. macro_rules! write_data { ($provider:expr, $ctx:expr, $data:expr, $type:ident, $converter:ident) => {{ @@ -233,7 +284,7 @@ macro_rules! write_data { $provider .write($ctx, items) .await - .map_err(|e| WorkflowError::Internal(e.to_string())) + .map_err(|e| Error::Internal(e.to_string())) }}; } diff --git a/crates/nvisy-runtime/src/provider/registry.rs b/crates/nvisy-runtime/src/provider/registry.rs index 7567781..6eb3dbd 100644 --- a/crates/nvisy-runtime/src/provider/registry.rs +++ b/crates/nvisy-runtime/src/provider/registry.rs @@ -5,7 +5,7 @@ use std::collections::HashMap; use uuid::Uuid; use super::ProviderCredentials; -use crate::error::{WorkflowError, WorkflowResult}; +use crate::error::{Error, Result}; /// In-memory credentials registry. /// @@ -19,27 +19,28 @@ impl CredentialsRegistry { /// Creates a new registry from a JSON value. /// /// Expects a JSON object with UUID keys and credential objects as values. - pub fn new(value: serde_json::Value) -> WorkflowResult { + pub fn new(value: serde_json::Value) -> Result { let map: HashMap = - serde_json::from_value(value).map_err(WorkflowError::CredentialsRegistry)?; + serde_json::from_value(value).map_err(Error::CredentialsRegistry)?; Ok(Self { credentials: map }) } /// Retrieves credentials by ID. - pub fn get(&self, credentials_id: Uuid) -> WorkflowResult<&ProviderCredentials> { + pub fn get(&self, credentials_id: Uuid) -> Result<&ProviderCredentials> { self.credentials .get(&credentials_id) - .ok_or(WorkflowError::CredentialsNotFound(credentials_id)) + .ok_or(Error::CredentialsNotFound(credentials_id)) } /// Inserts credentials with a new UUID v4. /// /// Generates a unique UUID that doesn't conflict with existing entries. pub fn insert(&mut self, credentials: ProviderCredentials) -> Uuid { + use std::collections::hash_map::Entry; loop { let id = Uuid::new_v4(); - if !self.credentials.contains_key(&id) { - self.credentials.insert(id, credentials); + if let Entry::Vacant(entry) = self.credentials.entry(id) { + entry.insert(credentials); return id; } } From 7f4d8fac1c7a35608d3637b164128f0691d77328 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Thu, 22 Jan 2026 13:48:05 +0100 Subject: [PATCH 17/28] refactor(runtime): reorganize graph module structure - Add Position field to Node definitions for visual editor support - Create compiled/input folder with stream.rs for InputStream/DataStream - Create compiled/output folder with stream.rs for OutputStream/DataSink - Split compiled/transform.rs into separate processor files - Add Process trait to compiled/transform for runtime processing - Move graph/transform into graph/definition/transform - Remove Transform trait from definitions (now pure data types) - Simplify SwitchDef to single condition with match/else ports - Add derive_builder to Edge, Node, and WorkflowMetadata - Use Jiff Timestamp for FileDateCondition Structure: compiled/{input,output,transform}/ - runtime types with Process trait definition/transform/ - serializable transform definitions --- crates/nvisy-runtime/src/engine/executor.rs | 2 +- .../nvisy-runtime/src/graph/compiled/graph.rs | 12 +- .../graph/compiled/{input.rs => input/mod.rs} | 4 +- .../src/graph/compiled/{ => input}/stream.rs | 98 +---- .../nvisy-runtime/src/graph/compiled/mod.rs | 8 +- .../compiled/{output.rs => output/mod.rs} | 4 +- .../src/graph/compiled/output/stream.rs | 101 +++++ .../nvisy-runtime/src/graph/compiled/route.rs | 398 +++++++++++++----- .../src/graph/compiled/transform.rs | 362 ---------------- .../src/graph/compiled/transform/chunk.rs | 66 +++ .../src/graph/compiled/transform/derive.rs | 57 +++ .../src/graph/compiled/transform/embedding.rs | 47 +++ .../src/graph/compiled/transform/enrich.rs | 58 +++ .../src/graph/compiled/transform/extract.rs | 59 +++ .../src/graph/compiled/transform/mod.rs | 75 ++++ .../src/graph/compiled/transform/partition.rs | 56 +++ .../src/graph/definition/edge.rs | 77 ++-- .../src/graph/definition/metadata.rs | 47 ++- .../nvisy-runtime/src/graph/definition/mod.rs | 14 +- .../src/graph/definition/node.rs | 65 ++- .../src/graph/definition/output.rs | 6 +- .../src/graph/definition/route.rs | 272 +++++++----- .../src/graph/definition/transform.rs | 7 - .../graph/{ => definition}/transform/chunk.rs | 21 +- .../{ => definition}/transform/derive.rs | 21 +- .../graph/definition/transform/embedding.rs | 17 + .../{ => definition}/transform/enrich.rs | 21 +- .../{ => definition}/transform/extract.rs | 21 +- .../src/graph/definition/transform/mod.rs | 43 ++ .../{ => definition}/transform/partition.rs | 21 +- .../src/graph/definition/workflow.rs | 3 +- crates/nvisy-runtime/src/graph/mod.rs | 19 +- .../src/graph/transform/embedding.rs | 32 -- .../nvisy-runtime/src/graph/transform/mod.rs | 80 ---- 34 files changed, 1217 insertions(+), 977 deletions(-) rename crates/nvisy-runtime/src/graph/compiled/{input.rs => input/mod.rs} (94%) rename crates/nvisy-runtime/src/graph/compiled/{ => input}/stream.rs (55%) rename crates/nvisy-runtime/src/graph/compiled/{output.rs => output/mod.rs} (94%) create mode 100644 crates/nvisy-runtime/src/graph/compiled/output/stream.rs delete mode 100644 crates/nvisy-runtime/src/graph/compiled/transform.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/transform/chunk.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/transform/derive.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/transform/embedding.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/transform/enrich.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/transform/extract.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/transform/mod.rs create mode 100644 crates/nvisy-runtime/src/graph/compiled/transform/partition.rs delete mode 100644 crates/nvisy-runtime/src/graph/definition/transform.rs rename crates/nvisy-runtime/src/graph/{ => definition}/transform/chunk.rs (74%) rename crates/nvisy-runtime/src/graph/{ => definition}/transform/derive.rs (58%) create mode 100644 crates/nvisy-runtime/src/graph/definition/transform/embedding.rs rename crates/nvisy-runtime/src/graph/{ => definition}/transform/enrich.rs (73%) rename crates/nvisy-runtime/src/graph/{ => definition}/transform/extract.rs (81%) create mode 100644 crates/nvisy-runtime/src/graph/definition/transform/mod.rs rename crates/nvisy-runtime/src/graph/{ => definition}/transform/partition.rs (62%) delete mode 100644 crates/nvisy-runtime/src/graph/transform/embedding.rs delete mode 100644 crates/nvisy-runtime/src/graph/transform/mod.rs diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index dedb0d8..66421f8 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -9,7 +9,7 @@ use super::EngineConfig; use super::context::ExecutionContext; use crate::error::{Error, Result}; use crate::graph::NodeId; -use crate::graph::compiled::{CompiledGraph, CompiledNode, InputStream, OutputStream}; +use crate::graph::compiled::{CompiledGraph, CompiledNode, InputStream, OutputStream, Process}; use crate::provider::CredentialsRegistry; /// Tracing target for engine operations. diff --git a/crates/nvisy-runtime/src/graph/compiled/graph.rs b/crates/nvisy-runtime/src/graph/compiled/graph.rs index 56ce789..3b6e25e 100644 --- a/crates/nvisy-runtime/src/graph/compiled/graph.rs +++ b/crates/nvisy-runtime/src/graph/compiled/graph.rs @@ -9,7 +9,9 @@ use super::input::CompiledInput; use super::node::CompiledNode; use super::output::CompiledOutput; use super::route::CompiledSwitch; -use crate::graph::definition::{EdgeData, NodeId, WorkflowMetadata}; +use crate::graph::definition::{ + ContentTypeCategory, ContentTypeCondition, EdgeData, NodeId, SwitchCondition, WorkflowMetadata, +}; /// A compiled workflow graph ready for execution. /// @@ -157,7 +159,13 @@ impl CompiledGraph { if let Some(node) = self.graph.node_weight_mut(idx) { // Use mem::replace with a placeholder to take ownership // This is safe because we won't access the graph again - let placeholder = CompiledNode::Switch(CompiledSwitch::new(vec![], None)); + let placeholder = CompiledNode::Switch(CompiledSwitch::new( + SwitchCondition::ContentType(ContentTypeCondition { + category: ContentTypeCategory::Other, + }), + String::new(), + String::new(), + )); let owned = std::mem::replace(node, placeholder); nodes.insert(*id, owned); } diff --git a/crates/nvisy-runtime/src/graph/compiled/input.rs b/crates/nvisy-runtime/src/graph/compiled/input/mod.rs similarity index 94% rename from crates/nvisy-runtime/src/graph/compiled/input.rs rename to crates/nvisy-runtime/src/graph/compiled/input/mod.rs index e008f22..dba1e79 100644 --- a/crates/nvisy-runtime/src/graph/compiled/input.rs +++ b/crates/nvisy-runtime/src/graph/compiled/input/mod.rs @@ -1,6 +1,8 @@ //! Compiled input node types. -use super::stream::InputStream; +mod stream; + +pub use stream::{DataStream, InputStream}; /// Compiled input node - ready to stream data. /// diff --git a/crates/nvisy-runtime/src/graph/compiled/stream.rs b/crates/nvisy-runtime/src/graph/compiled/input/stream.rs similarity index 55% rename from crates/nvisy-runtime/src/graph/compiled/stream.rs rename to crates/nvisy-runtime/src/graph/compiled/input/stream.rs index 9a5163b..5a0d667 100644 --- a/crates/nvisy-runtime/src/graph/compiled/stream.rs +++ b/crates/nvisy-runtime/src/graph/compiled/input/stream.rs @@ -1,20 +1,17 @@ -//! Stream types for compiled workflow data flow. +//! Input stream types for compiled workflow data flow. use std::pin::Pin; use std::task::{Context, Poll}; use futures::stream::BoxStream; -use futures::{Sink, SinkExt, Stream, StreamExt}; +use futures::{Stream, StreamExt}; use nvisy_dal::AnyDataValue; -use crate::error::{Error, Result}; +use crate::error::Result; /// A boxed stream of workflow data values. pub type DataStream = BoxStream<'static, Result>; -/// A boxed sink for workflow data values. -pub type DataSink = Pin + Send + 'static>>; - /// Input stream for reading data in a workflow. /// /// Wraps a boxed stream and provides metadata about the source. @@ -125,92 +122,3 @@ impl std::fmt::Debug for InputStream { .finish_non_exhaustive() } } - -/// Output stream for writing data in a workflow. -/// -/// Wraps a boxed sink and tracks write statistics. -pub struct OutputStream { - /// The underlying data sink. - sink: DataSink, - /// Optional buffer size for batching. - buffer_size: Option, - /// Number of items written so far. - items_written: usize, -} - -impl OutputStream { - /// Creates a new output stream. - pub fn new(sink: DataSink) -> Self { - Self { - sink, - buffer_size: None, - items_written: 0, - } - } - - /// Creates an output stream with buffering for batched writes. - pub fn with_buffer(sink: DataSink, buffer_size: usize) -> Self { - Self { - sink: Box::pin(sink.buffer(buffer_size)), - buffer_size: Some(buffer_size), - items_written: 0, - } - } - - /// Returns the buffer size, if set. - pub fn buffer_size(&self) -> Option { - self.buffer_size - } - - /// Returns the number of items written so far. - pub fn items_written(&self) -> usize { - self.items_written - } - - /// Consumes the stream and returns the inner boxed sink. - pub fn into_inner(self) -> DataSink { - self.sink - } -} - -impl Sink for OutputStream { - type Error = Error; - - fn poll_ready( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - self.sink.as_mut().poll_ready(cx) - } - - fn start_send( - mut self: Pin<&mut Self>, - item: AnyDataValue, - ) -> std::result::Result<(), Self::Error> { - self.items_written += 1; - self.sink.as_mut().start_send(item) - } - - fn poll_flush( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - self.sink.as_mut().poll_flush(cx) - } - - fn poll_close( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - self.sink.as_mut().poll_close(cx) - } -} - -impl std::fmt::Debug for OutputStream { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("OutputStream") - .field("buffer_size", &self.buffer_size) - .field("items_written", &self.items_written) - .finish_non_exhaustive() - } -} diff --git a/crates/nvisy-runtime/src/graph/compiled/mod.rs b/crates/nvisy-runtime/src/graph/compiled/mod.rs index c3d65c1..e789018 100644 --- a/crates/nvisy-runtime/src/graph/compiled/mod.rs +++ b/crates/nvisy-runtime/src/graph/compiled/mod.rs @@ -14,16 +14,14 @@ mod input; mod node; mod output; mod route; -mod stream; mod transform; pub use graph::CompiledGraph; -pub use input::CompiledInput; +pub use input::{CompiledInput, DataStream, InputStream}; pub use node::CompiledNode; -pub use output::CompiledOutput; +pub use output::{CompiledOutput, DataSink, OutputStream}; pub use route::CompiledSwitch; -pub use stream::{DataSink, DataStream, InputStream, OutputStream}; pub use transform::{ ChunkProcessor, CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, - ExtractProcessor, PartitionProcessor, + ExtractProcessor, PartitionProcessor, Process, }; diff --git a/crates/nvisy-runtime/src/graph/compiled/output.rs b/crates/nvisy-runtime/src/graph/compiled/output/mod.rs similarity index 94% rename from crates/nvisy-runtime/src/graph/compiled/output.rs rename to crates/nvisy-runtime/src/graph/compiled/output/mod.rs index a4e1682..3282bc4 100644 --- a/crates/nvisy-runtime/src/graph/compiled/output.rs +++ b/crates/nvisy-runtime/src/graph/compiled/output/mod.rs @@ -1,6 +1,8 @@ //! Compiled output node types. -use super::stream::OutputStream; +mod stream; + +pub use stream::{DataSink, OutputStream}; /// Compiled output node - ready to receive data. /// diff --git a/crates/nvisy-runtime/src/graph/compiled/output/stream.rs b/crates/nvisy-runtime/src/graph/compiled/output/stream.rs new file mode 100644 index 0000000..50873da --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/output/stream.rs @@ -0,0 +1,101 @@ +//! Output stream types for compiled workflow data flow. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use futures::{Sink, SinkExt}; +use nvisy_dal::AnyDataValue; + +use crate::error::Error; + +/// A boxed sink for workflow data values. +pub type DataSink = Pin + Send + 'static>>; + +/// Output stream for writing data in a workflow. +/// +/// Wraps a boxed sink and tracks write statistics. +pub struct OutputStream { + /// The underlying data sink. + sink: DataSink, + /// Optional buffer size for batching. + buffer_size: Option, + /// Number of items written so far. + items_written: usize, +} + +impl OutputStream { + /// Creates a new output stream. + pub fn new(sink: DataSink) -> Self { + Self { + sink, + buffer_size: None, + items_written: 0, + } + } + + /// Creates an output stream with buffering for batched writes. + pub fn with_buffer(sink: DataSink, buffer_size: usize) -> Self { + Self { + sink: Box::pin(sink.buffer(buffer_size)), + buffer_size: Some(buffer_size), + items_written: 0, + } + } + + /// Returns the buffer size, if set. + pub fn buffer_size(&self) -> Option { + self.buffer_size + } + + /// Returns the number of items written so far. + pub fn items_written(&self) -> usize { + self.items_written + } + + /// Consumes the stream and returns the inner boxed sink. + pub fn into_inner(self) -> DataSink { + self.sink + } +} + +impl Sink for OutputStream { + type Error = Error; + + fn poll_ready( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_ready(cx) + } + + fn start_send( + mut self: Pin<&mut Self>, + item: AnyDataValue, + ) -> std::result::Result<(), Self::Error> { + self.items_written += 1; + self.sink.as_mut().start_send(item) + } + + fn poll_flush( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_flush(cx) + } + + fn poll_close( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + self.sink.as_mut().poll_close(cx) + } +} + +impl std::fmt::Debug for OutputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OutputStream") + .field("buffer_size", &self.buffer_size) + .field("items_written", &self.items_written) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/route.rs b/crates/nvisy-runtime/src/graph/compiled/route.rs index 890a1f0..5103667 100644 --- a/crates/nvisy-runtime/src/graph/compiled/route.rs +++ b/crates/nvisy-runtime/src/graph/compiled/route.rs @@ -1,160 +1,291 @@ //! Compiled routing node types. +use jiff::Timestamp; use nvisy_dal::AnyDataValue; -use crate::graph::definition::{ContentTypeCategory, SwitchBranch, SwitchCondition}; +use crate::graph::definition::{ContentTypeCategory, PatternMatchType, SwitchCondition, SwitchDef}; /// Compiled switch node - ready to route data. /// -/// Evaluates conditions against input data and determines -/// which branch to route the data to. +/// Evaluates a condition against input data and determines +/// which output port to route the data to. #[derive(Debug, Clone)] pub struct CompiledSwitch { - /// Branches with conditions and targets. - branches: Vec, - /// Default target if no condition matches. - default: Option, + /// The condition to evaluate. + condition: SwitchCondition, + /// Output port for data matching the condition. + match_port: String, + /// Output port for data not matching the condition. + else_port: String, } impl CompiledSwitch { - /// Creates a new compiled switch from branches and default target. - pub fn new(branches: Vec, default: Option) -> Self { - Self { branches, default } - } - - /// Returns the branches. - pub fn branches(&self) -> &[SwitchBranch] { - &self.branches + /// Creates a new compiled switch. + pub fn new(condition: SwitchCondition, match_port: String, else_port: String) -> Self { + Self { + condition, + match_port, + else_port, + } } - /// Returns the default target. - pub fn default(&self) -> Option<&str> { - self.default.as_deref() + /// Returns all output port names. + pub fn output_ports(&self) -> impl Iterator { + [self.match_port.as_str(), self.else_port.as_str()].into_iter() } - /// Evaluates the switch conditions against input data. + /// Evaluates the switch condition against input data. /// - /// Returns the target slot name for routing, or None if no match - /// and no default is configured. - pub fn evaluate(&self, data: &AnyDataValue) -> Option<&str> { - for branch in &self.branches { - if self.evaluate_condition(&branch.condition, data) { - return Some(&branch.target); - } + /// Returns the appropriate output port name based on whether + /// the condition matches. + pub fn evaluate(&self, data: &AnyDataValue) -> &str { + if self.evaluate_condition(data) { + &self.match_port + } else { + &self.else_port } - self.default.as_deref() } - /// Evaluates a single condition against the data. - fn evaluate_condition(&self, condition: &SwitchCondition, data: &AnyDataValue) -> bool { - match condition { - SwitchCondition::Always => true, - SwitchCondition::ContentType { category } => { - // Check if data matches the content type category + /// Evaluates the condition against the data. + fn evaluate_condition(&self, data: &AnyDataValue) -> bool { + match &self.condition { + SwitchCondition::ContentType(c) => self.match_content_type(data, c.category), + + SwitchCondition::FileExtension(c) => { match data { - AnyDataValue::Blob(blob) => { - let mime = blob - .content_type - .as_deref() - .unwrap_or("application/octet-stream"); - match category { - ContentTypeCategory::Image => mime.starts_with("image/"), - ContentTypeCategory::Document => { - mime == "application/pdf" - || mime.starts_with("application/vnd.") - || mime == "application/msword" - } - ContentTypeCategory::Text => { - mime.starts_with("text/") || mime == "application/json" - } - ContentTypeCategory::Audio => mime.starts_with("audio/"), - ContentTypeCategory::Video => mime.starts_with("video/"), - ContentTypeCategory::Spreadsheet => { - mime == "application/vnd.ms-excel" - || mime.contains("spreadsheet") - || mime == "text/csv" - } - ContentTypeCategory::Presentation => { - mime == "application/vnd.ms-powerpoint" - || mime.contains("presentation") - } - ContentTypeCategory::Archive => { - mime == "application/zip" - || mime == "application/x-tar" - || mime == "application/gzip" - } - ContentTypeCategory::Code => { - mime.starts_with("text/x-") - || mime == "application/javascript" - || mime == "application/typescript" - } - } - } + AnyDataValue::Blob(blob) => blob.path.rsplit('.').next().is_some_and(|ext| { + c.extensions.iter().any(|e| ext.eq_ignore_ascii_case(e)) + }), _ => false, } } - SwitchCondition::FileSizeAbove { threshold_bytes } => match data { - AnyDataValue::Blob(blob) => blob.data.len() as u64 > *threshold_bytes, - _ => false, - }, - SwitchCondition::FileSizeBelow { threshold_bytes } => match data { - AnyDataValue::Blob(blob) => (blob.data.len() as u64) < *threshold_bytes, + + SwitchCondition::FileSize(c) => match data { + AnyDataValue::Blob(blob) => { + let size = blob.data.len() as u64; + let above_min = c.min_bytes.is_none_or(|min| size >= min); + let below_max = c.max_bytes.is_none_or(|max| size <= max); + above_min && below_max + } _ => false, }, - SwitchCondition::HasMetadata { key } => { - // Check if the data has metadata with the given key - match data { - AnyDataValue::Blob(blob) => blob.metadata.contains_key(key), - AnyDataValue::Record(record) => record.columns.contains_key(key), - _ => false, + + SwitchCondition::PageCount(c) => { + let page_count = self.get_metadata_u32(data, "page_count"); + match page_count { + Some(count) => { + let above_min = c.min_pages.is_none_or(|min| count >= min); + let below_max = c.max_pages.is_none_or(|max| count <= max); + above_min && below_max + } + None => false, } } - SwitchCondition::MetadataEquals { key, value } => { - // Check if metadata key equals value - match data { - AnyDataValue::Blob(blob) => { - blob.metadata.get(key).map(|v| v == value).unwrap_or(false) + + SwitchCondition::Duration(c) => { + let duration_secs = self.get_metadata_i64(data, "duration_seconds"); + match duration_secs { + Some(secs) => { + let above_min = c.min_seconds.is_none_or(|min| secs >= min); + let below_max = c.max_seconds.is_none_or(|max| secs <= max); + above_min && below_max } + None => false, + } + } + + SwitchCondition::Language(c) => { + let detected_lang = self.get_metadata_string(data, "language"); + let confidence = self.get_metadata_f32(data, "language_confidence"); + match (detected_lang, confidence) { + (Some(lang), Some(conf)) => { + lang.eq_ignore_ascii_case(&c.code) && conf >= c.min_confidence + } + (Some(lang), None) => lang.eq_ignore_ascii_case(&c.code), _ => false, } } - // TODO: Implement remaining conditions - SwitchCondition::PageCountAbove { .. } => false, - SwitchCondition::DurationAbove { .. } => false, - SwitchCondition::Language { .. } => false, - SwitchCondition::DateNewerThan { .. } => false, - SwitchCondition::FileNameMatches { pattern } => match data { + + SwitchCondition::FileDate(c) => { + let timestamp = match c.field { + crate::graph::definition::DateField::Created => { + self.get_metadata_timestamp(data, "created_at") + } + crate::graph::definition::DateField::Modified => { + self.get_metadata_timestamp(data, "modified_at") + } + }; + match timestamp { + Some(ts) => { + let after_ok = c.after.is_none_or(|after| ts >= after); + let before_ok = c.before.is_none_or(|before| ts <= before); + after_ok && before_ok + } + None => false, + } + } + + SwitchCondition::FileName(c) => match data { AnyDataValue::Blob(blob) => { - // Simple glob-style matching for common patterns - glob_match(pattern, &blob.path) + let filename = blob.path.rsplit('/').next().unwrap_or(&blob.path); + match c.match_type { + PatternMatchType::Glob => glob_match(&c.pattern, filename), + PatternMatchType::Regex => { + // Fall back to glob matching for now + glob_match(&c.pattern, filename) + } + PatternMatchType::Exact => filename == c.pattern, + PatternMatchType::Contains => { + filename.to_lowercase().contains(&c.pattern.to_lowercase()) + } + } } _ => false, }, - SwitchCondition::FileExtension { extension } => match data { - AnyDataValue::Blob(blob) => blob - .path - .rsplit('.') - .next() - .map(|ext| ext.eq_ignore_ascii_case(extension)) - .unwrap_or(false), - _ => false, - }, } } + + /// Matches content type category against data. + fn match_content_type(&self, data: &AnyDataValue, category: ContentTypeCategory) -> bool { + match data { + AnyDataValue::Blob(blob) => { + let mime = blob + .content_type + .as_deref() + .unwrap_or("application/octet-stream"); + match category { + ContentTypeCategory::Image => mime.starts_with("image/"), + ContentTypeCategory::Document => { + mime == "application/pdf" + || mime.starts_with("application/vnd.") + || mime == "application/msword" + } + ContentTypeCategory::Text => { + mime.starts_with("text/") || mime == "application/json" + } + ContentTypeCategory::Audio => mime.starts_with("audio/"), + ContentTypeCategory::Video => mime.starts_with("video/"), + ContentTypeCategory::Spreadsheet => { + mime == "application/vnd.ms-excel" + || mime.contains("spreadsheet") + || mime == "text/csv" + } + ContentTypeCategory::Presentation => { + mime == "application/vnd.ms-powerpoint" || mime.contains("presentation") + } + ContentTypeCategory::Archive => { + mime == "application/zip" + || mime == "application/x-tar" + || mime == "application/gzip" + || mime == "application/x-rar-compressed" + || mime == "application/x-7z-compressed" + } + ContentTypeCategory::Code => { + mime.starts_with("text/x-") + || mime == "application/javascript" + || mime == "application/typescript" + || mime == "application/x-python" + } + ContentTypeCategory::Other => true, + } + } + _ => false, + } + } + + /// Gets a string metadata value from JSON. + fn get_metadata_string(&self, data: &AnyDataValue, key: &str) -> Option { + match data { + AnyDataValue::Blob(blob) => blob.metadata.get(key).and_then(json_to_string), + AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_string), + _ => None, + } + } + + /// Gets a u32 metadata value. + fn get_metadata_u32(&self, data: &AnyDataValue, key: &str) -> Option { + match data { + AnyDataValue::Blob(blob) => blob + .metadata + .get(key) + .and_then(json_to_u64) + .map(|v| v as u32), + AnyDataValue::Record(record) => record + .columns + .get(key) + .and_then(json_to_u64) + .map(|v| v as u32), + _ => None, + } + } + + /// Gets an i64 metadata value. + fn get_metadata_i64(&self, data: &AnyDataValue, key: &str) -> Option { + match data { + AnyDataValue::Blob(blob) => blob.metadata.get(key).and_then(json_to_i64), + AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_i64), + _ => None, + } + } + + /// Gets an f32 metadata value. + fn get_metadata_f32(&self, data: &AnyDataValue, key: &str) -> Option { + self.get_metadata_f64(data, key).map(|v| v as f32) + } + + /// Gets an f64 metadata value. + fn get_metadata_f64(&self, data: &AnyDataValue, key: &str) -> Option { + match data { + AnyDataValue::Blob(blob) => blob.metadata.get(key).and_then(json_to_f64), + AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_f64), + _ => None, + } + } + + /// Gets a timestamp metadata value. + fn get_metadata_timestamp(&self, data: &AnyDataValue, key: &str) -> Option { + let s = self.get_metadata_string(data, key)?; + s.parse().ok() + } } -impl From for CompiledSwitch { - fn from(def: crate::graph::definition::SwitchDef) -> Self { - Self::new(def.branches, def.default) +impl From for CompiledSwitch { + fn from(def: SwitchDef) -> Self { + Self::new(def.condition, def.match_port, def.else_port) } } +/// Converts a JSON value to a string. +fn json_to_string(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::String(s) => Some(s.clone()), + serde_json::Value::Number(n) => Some(n.to_string()), + serde_json::Value::Bool(b) => Some(b.to_string()), + _ => None, + } +} + +/// Converts a JSON value to u64. +fn json_to_u64(value: &serde_json::Value) -> Option { + value.as_u64().or_else(|| value.as_f64().map(|f| f as u64)) +} + +/// Converts a JSON value to i64. +fn json_to_i64(value: &serde_json::Value) -> Option { + value.as_i64().or_else(|| value.as_f64().map(|f| f as i64)) +} + +/// Converts a JSON value to f64. +fn json_to_f64(value: &serde_json::Value) -> Option { + value.as_f64() +} + /// Simple glob-style pattern matching. /// /// Supports: -/// - `*` matches any sequence of characters (except path separators) +/// - `*` matches any sequence of characters /// - `?` matches any single character -/// - Literal matching for other characters +/// - Literal matching for other characters (case-insensitive) fn glob_match(pattern: &str, text: &str) -> bool { let mut pattern_chars = pattern.chars().peekable(); let mut text_chars = text.chars().peekable(); @@ -186,7 +317,7 @@ fn glob_match(pattern: &str, text: &str) -> bool { } } c => { - // Literal match (case-insensitive for file matching) + // Literal match (case-insensitive) match text_chars.next() { Some(t) if c.eq_ignore_ascii_case(&t) => {} _ => return false, @@ -198,3 +329,48 @@ fn glob_match(pattern: &str, text: &str) -> bool { // Pattern is exhausted, text should also be exhausted text_chars.peek().is_none() } + +#[cfg(test)] +mod tests { + use super::*; + use crate::graph::definition::{ContentTypeCondition, FileExtensionCondition}; + + #[test] + fn test_evaluate_file_extension() { + let switch = CompiledSwitch::new( + SwitchCondition::FileExtension(FileExtensionCondition { + extensions: vec!["pdf".into(), "docx".into()], + }), + "documents".into(), + "other".into(), + ); + + let pdf = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("report.pdf", vec![])); + let txt = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("notes.txt", vec![])); + + assert_eq!(switch.evaluate(&pdf), "documents"); + assert_eq!(switch.evaluate(&txt), "other"); + } + + #[test] + fn test_evaluate_content_type() { + let switch = CompiledSwitch::new( + SwitchCondition::ContentType(ContentTypeCondition { + category: ContentTypeCategory::Image, + }), + "images".into(), + "other".into(), + ); + + let mut blob = nvisy_dal::datatype::Blob::new("photo.jpg", vec![]); + blob.content_type = Some("image/jpeg".into()); + let image = AnyDataValue::Blob(blob); + + let mut blob = nvisy_dal::datatype::Blob::new("doc.pdf", vec![]); + blob.content_type = Some("application/pdf".into()); + let pdf = AnyDataValue::Blob(blob); + + assert_eq!(switch.evaluate(&image), "images"); + assert_eq!(switch.evaluate(&pdf), "other"); + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform.rs b/crates/nvisy-runtime/src/graph/compiled/transform.rs deleted file mode 100644 index fb78b3e..0000000 --- a/crates/nvisy-runtime/src/graph/compiled/transform.rs +++ /dev/null @@ -1,362 +0,0 @@ -//! Compiled transform node types. -//! -//! Processors are the runtime representation of transform nodes. Each processor -//! encapsulates the logic and dependencies needed to execute a specific transform. - -use nvisy_dal::AnyDataValue; -use nvisy_rig::agent::Agents; -use nvisy_rig::provider::EmbeddingProvider; - -use crate::error::Result; -use crate::graph::transform::{ - ChunkStrategy, DeriveTask, EnrichTask, ExtractTask, PartitionStrategy, -}; - -/// Compiled transform node - ready to process data. -/// -/// Each variant wraps a dedicated processor that encapsulates -/// the transform logic and any required external dependencies. -#[derive(Debug)] -pub enum CompiledTransform { - /// Partition documents into elements. - Partition(PartitionProcessor), - /// Chunk content into smaller pieces. - Chunk(ChunkProcessor), - /// Generate vector embeddings. - Embedding(EmbeddingProcessor), - /// Enrich elements with metadata/descriptions. - Enrich(EnrichProcessor), - /// Extract structured data or convert formats. - Extract(ExtractProcessor), - /// Generate new content from input. - Derive(DeriveProcessor), -} - -impl CompiledTransform { - /// Processes input data through the transform. - pub async fn process(&self, input: Vec) -> Result> { - match self { - Self::Partition(p) => p.process(input).await, - Self::Chunk(p) => p.process(input).await, - Self::Embedding(p) => p.process(input).await, - Self::Enrich(p) => p.process(input).await, - Self::Extract(p) => p.process(input).await, - Self::Derive(p) => p.process(input).await, - } - } -} - -// ============================================================================ -// Partition Processor -// ============================================================================ - -/// Processor for partitioning documents into elements. -#[derive(Debug)] -pub struct PartitionProcessor { - /// Partitioning strategy to use. - strategy: PartitionStrategy, - /// Whether to include page break markers. - include_page_breaks: bool, - /// Whether to discard unsupported element types. - discard_unsupported: bool, -} - -impl PartitionProcessor { - /// Creates a new partition processor. - pub fn new( - strategy: PartitionStrategy, - include_page_breaks: bool, - discard_unsupported: bool, - ) -> Self { - Self { - strategy, - include_page_breaks, - discard_unsupported, - } - } - - /// Returns the partitioning strategy. - pub fn strategy(&self) -> PartitionStrategy { - self.strategy - } - - /// Returns whether page breaks are included. - pub fn include_page_breaks(&self) -> bool { - self.include_page_breaks - } - - /// Returns whether unsupported types are discarded. - pub fn discard_unsupported(&self) -> bool { - self.discard_unsupported - } - - /// Processes input data through the partition transform. - pub async fn process(&self, input: Vec) -> Result> { - // TODO: Implement document partitioning based on strategy - // For now, pass through unchanged - Ok(input) - } -} - -// ============================================================================ -// Chunk Processor -// ============================================================================ - -/// Processor for chunking content into smaller pieces. -pub struct ChunkProcessor { - /// Chunking strategy to use. - strategy: ChunkStrategy, - /// Whether to use LLM-powered contextual chunking. - contextual_chunking: bool, - /// Agents for contextual chunking (if enabled). - agents: Option, -} - -impl ChunkProcessor { - /// Creates a new chunk processor without contextual chunking. - pub fn new(strategy: ChunkStrategy) -> Self { - Self { - strategy, - contextual_chunking: false, - agents: None, - } - } - - /// Creates a new chunk processor with contextual chunking enabled. - pub fn with_contextual_chunking(strategy: ChunkStrategy, agents: Agents) -> Self { - Self { - strategy, - contextual_chunking: true, - agents: Some(agents), - } - } - - /// Returns the chunking strategy. - pub fn strategy(&self) -> &ChunkStrategy { - &self.strategy - } - - /// Returns whether contextual chunking is enabled. - pub fn contextual_chunking(&self) -> bool { - self.contextual_chunking - } - - /// Processes input data through the chunk transform. - pub async fn process(&self, input: Vec) -> Result> { - // TODO: Implement chunking based on strategy - // If contextual_chunking is enabled, use agents for context generation - Ok(input) - } -} - -impl std::fmt::Debug for ChunkProcessor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ChunkProcessor") - .field("strategy", &self.strategy) - .field("contextual_chunking", &self.contextual_chunking) - .field("has_agents", &self.agents.is_some()) - .finish() - } -} - -// ============================================================================ -// Embedding Processor -// ============================================================================ - -/// Processor for generating vector embeddings. -pub struct EmbeddingProcessor { - /// The embedding provider for generating embeddings. - provider: EmbeddingProvider, - /// Whether to L2-normalize output embeddings. - normalize: bool, -} - -impl EmbeddingProcessor { - /// Creates a new embedding processor. - pub fn new(provider: EmbeddingProvider, normalize: bool) -> Self { - Self { - provider, - normalize, - } - } - - /// Returns whether normalization is enabled. - pub fn normalize(&self) -> bool { - self.normalize - } - - /// Processes input data through the embedding transform. - pub async fn process(&self, input: Vec) -> Result> { - // TODO: Implement embedding generation using provider - // For now, pass through unchanged - let _ = &self.provider; // Suppress unused warning - Ok(input) - } -} - -impl std::fmt::Debug for EmbeddingProcessor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("EmbeddingProcessor") - .field("normalize", &self.normalize) - .finish_non_exhaustive() - } -} - -// ============================================================================ -// Enrich Processor -// ============================================================================ - -/// Processor for enriching elements with metadata/descriptions. -pub struct EnrichProcessor { - /// Agents for enrichment tasks. - agents: Agents, - /// The enrichment task to perform. - task: EnrichTask, - /// Optional prompt override. - override_prompt: Option, -} - -impl EnrichProcessor { - /// Creates a new enrich processor. - pub fn new(agents: Agents, task: EnrichTask, override_prompt: Option) -> Self { - Self { - agents, - task, - override_prompt, - } - } - - /// Returns the enrichment task. - pub fn task(&self) -> &EnrichTask { - &self.task - } - - /// Returns the prompt override, if any. - pub fn override_prompt(&self) -> Option<&str> { - self.override_prompt.as_deref() - } - - /// Processes input data through the enrich transform. - pub async fn process(&self, input: Vec) -> Result> { - // TODO: Implement enrichment using agents - // Use self.agents.vision_agent for image tasks - // Use self.agents.table_agent for table tasks - let _ = &self.agents; // Suppress unused warning - Ok(input) - } -} - -impl std::fmt::Debug for EnrichProcessor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("EnrichProcessor") - .field("task", &self.task) - .field("override_prompt", &self.override_prompt) - .finish_non_exhaustive() - } -} - -// ============================================================================ -// Extract Processor -// ============================================================================ - -/// Processor for extracting structured data or converting formats. -pub struct ExtractProcessor { - /// Agents for extraction tasks. - agents: Agents, - /// The extraction task to perform. - task: ExtractTask, - /// Optional prompt override. - override_prompt: Option, -} - -impl ExtractProcessor { - /// Creates a new extract processor. - pub fn new(agents: Agents, task: ExtractTask, override_prompt: Option) -> Self { - Self { - agents, - task, - override_prompt, - } - } - - /// Returns the extraction task. - pub fn task(&self) -> &ExtractTask { - &self.task - } - - /// Returns the prompt override, if any. - pub fn override_prompt(&self) -> Option<&str> { - self.override_prompt.as_deref() - } - - /// Processes input data through the extract transform. - pub async fn process(&self, input: Vec) -> Result> { - // TODO: Implement extraction using agents - // Use self.agents.text_analysis_agent for NER, keywords, classification, sentiment - // Use self.agents.table_agent for table conversion - // Use self.agents.structured_output_agent for JSON conversion - let _ = &self.agents; // Suppress unused warning - Ok(input) - } -} - -impl std::fmt::Debug for ExtractProcessor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ExtractProcessor") - .field("task", &self.task) - .field("override_prompt", &self.override_prompt) - .finish_non_exhaustive() - } -} - -// ============================================================================ -// Derive Processor -// ============================================================================ - -/// Processor for generating new content from input. -pub struct DeriveProcessor { - /// Agents for derivation tasks. - agents: Agents, - /// The derivation task to perform. - task: DeriveTask, - /// Optional prompt override. - override_prompt: Option, -} - -impl DeriveProcessor { - /// Creates a new derive processor. - pub fn new(agents: Agents, task: DeriveTask, override_prompt: Option) -> Self { - Self { - agents, - task, - override_prompt, - } - } - - /// Returns the derivation task. - pub fn task(&self) -> DeriveTask { - self.task - } - - /// Returns the prompt override, if any. - pub fn override_prompt(&self) -> Option<&str> { - self.override_prompt.as_deref() - } - - /// Processes input data through the derive transform. - pub async fn process(&self, input: Vec) -> Result> { - // TODO: Implement derivation using agents - // Use self.agents.text_generation_agent for summarization and title generation - let _ = &self.agents; // Suppress unused warning - Ok(input) - } -} - -impl std::fmt::Debug for DeriveProcessor { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("DeriveProcessor") - .field("task", &self.task) - .field("override_prompt", &self.override_prompt) - .finish_non_exhaustive() - } -} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/chunk.rs b/crates/nvisy-runtime/src/graph/compiled/transform/chunk.rs new file mode 100644 index 0000000..6945193 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/transform/chunk.rs @@ -0,0 +1,66 @@ +//! Chunk processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::Agents; + +use super::Process; +use crate::error::Result; +use crate::graph::definition::ChunkStrategy; + +/// Processor for chunking content into smaller pieces. +pub struct ChunkProcessor { + /// Chunking strategy to use. + strategy: ChunkStrategy, + /// Whether to use LLM-powered contextual chunking. + contextual_chunking: bool, + /// Agents for contextual chunking (if enabled). + agents: Option, +} + +impl ChunkProcessor { + /// Creates a new chunk processor without contextual chunking. + pub fn new(strategy: ChunkStrategy) -> Self { + Self { + strategy, + contextual_chunking: false, + agents: None, + } + } + + /// Creates a new chunk processor with contextual chunking enabled. + pub fn with_contextual_chunking(strategy: ChunkStrategy, agents: Agents) -> Self { + Self { + strategy, + contextual_chunking: true, + agents: Some(agents), + } + } + + /// Returns the chunking strategy. + pub fn strategy(&self) -> &ChunkStrategy { + &self.strategy + } + + /// Returns whether contextual chunking is enabled. + pub fn contextual_chunking(&self) -> bool { + self.contextual_chunking + } +} + +impl Process for ChunkProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement chunking based on strategy + // If contextual_chunking is enabled, use agents for context generation + Ok(input) + } +} + +impl std::fmt::Debug for ChunkProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ChunkProcessor") + .field("strategy", &self.strategy) + .field("contextual_chunking", &self.contextual_chunking) + .field("has_agents", &self.agents.is_some()) + .finish() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/derive.rs b/crates/nvisy-runtime/src/graph/compiled/transform/derive.rs new file mode 100644 index 0000000..40be67a --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/transform/derive.rs @@ -0,0 +1,57 @@ +//! Derive processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::Agents; + +use super::Process; +use crate::error::Result; +use crate::graph::definition::DeriveTask; + +/// Processor for generating new content from input. +pub struct DeriveProcessor { + /// Agents for derivation tasks. + agents: Agents, + /// The derivation task to perform. + task: DeriveTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl DeriveProcessor { + /// Creates a new derive processor. + pub fn new(agents: Agents, task: DeriveTask, override_prompt: Option) -> Self { + Self { + agents, + task, + override_prompt, + } + } + + /// Returns the derivation task. + pub fn task(&self) -> DeriveTask { + self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } +} + +impl Process for DeriveProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement derivation using agents + // Use self.agents.text_generation_agent for summarization and title generation + let _ = &self.agents; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for DeriveProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DeriveProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/embedding.rs b/crates/nvisy-runtime/src/graph/compiled/transform/embedding.rs new file mode 100644 index 0000000..078e7e4 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/transform/embedding.rs @@ -0,0 +1,47 @@ +//! Embedding processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::provider::EmbeddingProvider; + +use super::Process; +use crate::error::Result; + +/// Processor for generating vector embeddings. +pub struct EmbeddingProcessor { + /// The embedding provider for generating embeddings. + provider: EmbeddingProvider, + /// Whether to L2-normalize output embeddings. + normalize: bool, +} + +impl EmbeddingProcessor { + /// Creates a new embedding processor. + pub fn new(provider: EmbeddingProvider, normalize: bool) -> Self { + Self { + provider, + normalize, + } + } + + /// Returns whether normalization is enabled. + pub fn normalize(&self) -> bool { + self.normalize + } +} + +impl Process for EmbeddingProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement embedding generation using provider + // For now, pass through unchanged + let _ = &self.provider; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for EmbeddingProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EmbeddingProcessor") + .field("normalize", &self.normalize) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/enrich.rs b/crates/nvisy-runtime/src/graph/compiled/transform/enrich.rs new file mode 100644 index 0000000..9e18b44 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/transform/enrich.rs @@ -0,0 +1,58 @@ +//! Enrich processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::Agents; + +use super::Process; +use crate::error::Result; +use crate::graph::definition::EnrichTask; + +/// Processor for enriching elements with metadata/descriptions. +pub struct EnrichProcessor { + /// Agents for enrichment tasks. + agents: Agents, + /// The enrichment task to perform. + task: EnrichTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl EnrichProcessor { + /// Creates a new enrich processor. + pub fn new(agents: Agents, task: EnrichTask, override_prompt: Option) -> Self { + Self { + agents, + task, + override_prompt, + } + } + + /// Returns the enrichment task. + pub fn task(&self) -> &EnrichTask { + &self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } +} + +impl Process for EnrichProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement enrichment using agents + // Use self.agents.vision_agent for image tasks + // Use self.agents.table_agent for table tasks + let _ = &self.agents; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for EnrichProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EnrichProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/extract.rs b/crates/nvisy-runtime/src/graph/compiled/transform/extract.rs new file mode 100644 index 0000000..fabc26b --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/transform/extract.rs @@ -0,0 +1,59 @@ +//! Extract processor. + +use nvisy_dal::AnyDataValue; +use nvisy_rig::agent::Agents; + +use super::Process; +use crate::error::Result; +use crate::graph::definition::ExtractTask; + +/// Processor for extracting structured data or converting formats. +pub struct ExtractProcessor { + /// Agents for extraction tasks. + agents: Agents, + /// The extraction task to perform. + task: ExtractTask, + /// Optional prompt override. + override_prompt: Option, +} + +impl ExtractProcessor { + /// Creates a new extract processor. + pub fn new(agents: Agents, task: ExtractTask, override_prompt: Option) -> Self { + Self { + agents, + task, + override_prompt, + } + } + + /// Returns the extraction task. + pub fn task(&self) -> &ExtractTask { + &self.task + } + + /// Returns the prompt override, if any. + pub fn override_prompt(&self) -> Option<&str> { + self.override_prompt.as_deref() + } +} + +impl Process for ExtractProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement extraction using agents + // Use self.agents.text_analysis_agent for NER, keywords, classification, sentiment + // Use self.agents.table_agent for table conversion + // Use self.agents.structured_output_agent for JSON conversion + let _ = &self.agents; // Suppress unused warning + Ok(input) + } +} + +impl std::fmt::Debug for ExtractProcessor { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ExtractProcessor") + .field("task", &self.task) + .field("override_prompt", &self.override_prompt) + .finish_non_exhaustive() + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/mod.rs b/crates/nvisy-runtime/src/graph/compiled/transform/mod.rs new file mode 100644 index 0000000..9cf77d3 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/transform/mod.rs @@ -0,0 +1,75 @@ +//! Compiled transform node types. +//! +//! Processors are the runtime representation of transform nodes. Each processor +//! encapsulates the logic and dependencies needed to execute a specific transform. + +mod chunk; +mod derive; +mod embedding; +mod enrich; +mod extract; +mod partition; + +use std::future::Future; + +pub use chunk::ChunkProcessor; +pub use derive::DeriveProcessor; +pub use embedding::EmbeddingProcessor; +pub use enrich::EnrichProcessor; +pub use extract::ExtractProcessor; +use nvisy_dal::AnyDataValue; +pub use partition::PartitionProcessor; + +use crate::error::Result; + +/// Trait for processing data in a workflow pipeline. +/// +/// Processors are the compiled form of transforms. They take input data items +/// and produce output data items. A single input can produce multiple outputs +/// (e.g., chunking splits one document into many chunks). +pub trait Process: Send + Sync { + /// Processes input data items into output data items. + /// + /// # Arguments + /// * `input` - The input data items to process + /// + /// # Returns + /// A vector of processed data items (may be more or fewer than input) + fn process( + &self, + input: Vec, + ) -> impl Future>> + Send; +} + +/// Compiled transform node - ready to process data. +/// +/// Each variant wraps a dedicated processor that encapsulates +/// the transform logic and any required external dependencies. +#[derive(Debug)] +pub enum CompiledTransform { + /// Partition documents into elements. + Partition(PartitionProcessor), + /// Chunk content into smaller pieces. + Chunk(ChunkProcessor), + /// Generate vector embeddings. + Embedding(EmbeddingProcessor), + /// Enrich elements with metadata/descriptions. + Enrich(EnrichProcessor), + /// Extract structured data or convert formats. + Extract(ExtractProcessor), + /// Generate new content from input. + Derive(DeriveProcessor), +} + +impl Process for CompiledTransform { + async fn process(&self, input: Vec) -> Result> { + match self { + Self::Partition(p) => p.process(input).await, + Self::Chunk(p) => p.process(input).await, + Self::Embedding(p) => p.process(input).await, + Self::Enrich(p) => p.process(input).await, + Self::Extract(p) => p.process(input).await, + Self::Derive(p) => p.process(input).await, + } + } +} diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/partition.rs b/crates/nvisy-runtime/src/graph/compiled/transform/partition.rs new file mode 100644 index 0000000..798c344 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/compiled/transform/partition.rs @@ -0,0 +1,56 @@ +//! Partition processor. + +use nvisy_dal::AnyDataValue; + +use super::Process; +use crate::error::Result; +use crate::graph::definition::PartitionStrategy; + +/// Processor for partitioning documents into elements. +#[derive(Debug)] +pub struct PartitionProcessor { + /// Partitioning strategy to use. + strategy: PartitionStrategy, + /// Whether to include page break markers. + include_page_breaks: bool, + /// Whether to discard unsupported element types. + discard_unsupported: bool, +} + +impl PartitionProcessor { + /// Creates a new partition processor. + pub fn new( + strategy: PartitionStrategy, + include_page_breaks: bool, + discard_unsupported: bool, + ) -> Self { + Self { + strategy, + include_page_breaks, + discard_unsupported, + } + } + + /// Returns the partitioning strategy. + pub fn strategy(&self) -> PartitionStrategy { + self.strategy + } + + /// Returns whether page breaks are included. + pub fn include_page_breaks(&self) -> bool { + self.include_page_breaks + } + + /// Returns whether unsupported types are discarded. + pub fn discard_unsupported(&self) -> bool { + self.discard_unsupported + } +} + +impl Process for PartitionProcessor { + async fn process(&self, input: Vec) -> Result> { + // TODO: Implement document partitioning based on strategy + // For now, pass through unchanged + Ok(input) + } +} diff --git a/crates/nvisy-runtime/src/graph/definition/edge.rs b/crates/nvisy-runtime/src/graph/definition/edge.rs index 6e5638a..10a7f99 100644 --- a/crates/nvisy-runtime/src/graph/definition/edge.rs +++ b/crates/nvisy-runtime/src/graph/definition/edge.rs @@ -1,11 +1,18 @@ //! Edge types for connecting nodes in a workflow graph. +use derive_builder::Builder; use serde::{Deserialize, Serialize}; use super::NodeId; /// An edge connecting two nodes in the workflow graph. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, Builder)] +#[builder( + name = "EdgeBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] pub struct Edge { /// Source node ID. pub from: NodeId, @@ -13,12 +20,26 @@ pub struct Edge { pub to: NodeId, /// Optional port/slot name on the source node. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub from_port: Option, /// Optional port/slot name on the target node. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub to_port: Option, } +impl EdgeBuilder { + fn validate(&self) -> Result<(), String> { + if self.from.is_none() { + return Err("from is required".into()); + } + if self.to.is_none() { + return Err("to is required".into()); + } + Ok(()) + } +} + impl Edge { /// Creates a new edge between two nodes. pub fn new(from: NodeId, to: NodeId) -> Self { @@ -30,41 +51,43 @@ impl Edge { } } - /// Creates an edge with port specifications. - pub fn with_ports( - from: NodeId, - from_port: impl Into, - to: NodeId, - to_port: impl Into, - ) -> Self { - Self { - from, - to, - from_port: Some(from_port.into()), - to_port: Some(to_port.into()), - } - } - - /// Sets the source port. - pub fn from_port(mut self, port: impl Into) -> Self { - self.from_port = Some(port.into()); - self - } - - /// Sets the target port. - pub fn to_port(mut self, port: impl Into) -> Self { - self.to_port = Some(port.into()); - self + /// Returns a builder for creating an edge. + pub fn builder() -> EdgeBuilder { + EdgeBuilder::default() } } /// Edge data stored in the compiled graph. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Default, Serialize, Deserialize)] +#[derive( + Debug, + Clone, + PartialEq, + Eq, + Hash, + Default, + Serialize, + Deserialize, + Builder +)] +#[builder( + name = "EdgeDataBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with") +)] pub struct EdgeData { /// Optional port/slot name on the source node. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub from_port: Option, /// Optional port/slot name on the target node. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub to_port: Option, } + +impl EdgeData { + /// Returns a builder for creating edge data. + pub fn builder() -> EdgeDataBuilder { + EdgeDataBuilder::default() + } +} diff --git a/crates/nvisy-runtime/src/graph/definition/metadata.rs b/crates/nvisy-runtime/src/graph/definition/metadata.rs index 102e49b..7e705cb 100644 --- a/crates/nvisy-runtime/src/graph/definition/metadata.rs +++ b/crates/nvisy-runtime/src/graph/definition/metadata.rs @@ -1,59 +1,60 @@ //! Workflow metadata. +use derive_builder::Builder; use jiff::Timestamp; use semver::Version; use serde::{Deserialize, Serialize}; /// Workflow metadata. -#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize, Builder)] +#[builder( + name = "WorkflowMetadataBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] pub struct WorkflowMetadata { /// Workflow name (optional). #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub name: Option, /// Workflow description. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub description: Option, /// Workflow version (semver, optional). #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub version: Option, /// Tags for organization. #[serde(default, skip_serializing_if = "Vec::is_empty")] + #[builder(default)] pub tags: Vec, /// Creation timestamp. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub created_at: Option, /// Last update timestamp. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub updated_at: Option, } +impl WorkflowMetadataBuilder { + fn validate(&self) -> Result<(), String> { + // All fields are optional, so validation always succeeds + Ok(()) + } +} + impl WorkflowMetadata { /// Creates a new empty metadata. pub fn new() -> Self { Self::default() } - /// Sets the workflow name. - pub fn with_name(mut self, name: impl Into) -> Self { - self.name = Some(name.into()); - self - } - - /// Sets the workflow description. - pub fn with_description(mut self, description: impl Into) -> Self { - self.description = Some(description.into()); - self - } - - /// Sets the workflow version. - pub fn with_version(mut self, version: Version) -> Self { - self.version = Some(version); - self - } - - /// Adds tags. - pub fn with_tags(mut self, tags: impl IntoIterator>) -> Self { - self.tags = tags.into_iter().map(Into::into).collect(); - self + /// Returns a builder for creating workflow metadata. + pub fn builder() -> WorkflowMetadataBuilder { + WorkflowMetadataBuilder::default() } } diff --git a/crates/nvisy-runtime/src/graph/definition/mod.rs b/crates/nvisy-runtime/src/graph/definition/mod.rs index 108431e..f7f6497 100644 --- a/crates/nvisy-runtime/src/graph/definition/mod.rs +++ b/crates/nvisy-runtime/src/graph/definition/mod.rs @@ -21,10 +21,16 @@ mod workflow; pub use edge::{Edge, EdgeData}; pub use input::{InputDef, InputProvider, InputSource}; pub use metadata::WorkflowMetadata; -pub use node::{Node, NodeCommon, NodeDef, NodeId}; -pub use output::{OutputDef, OutputProviderDef, OutputTarget}; +pub use node::{Node, NodeCommon, NodeDef, NodeId, Position}; +pub use output::{OutputDef, OutputProvider, OutputTarget}; pub use route::{ - CacheSlot, ContentTypeCategory, DateField, SwitchBranch, SwitchCondition, SwitchDef, + CacheSlot, ContentTypeCategory, ContentTypeCondition, DateField, DurationCondition, + FileDateCondition, FileExtensionCondition, FileNameCondition, FileSizeCondition, + LanguageCondition, PageCountCondition, PatternMatchType, SwitchCondition, SwitchDef, +}; +pub use transform::{ + AnalyzeTask, Chunk, ChunkStrategy, ConvertTask, Derive, DeriveTask, Embedding, Enrich, + EnrichTask, Extract, ExtractTask, ImageEnrichTask, Partition, PartitionStrategy, + TableConvertTask, TableEnrichTask, TextConvertTask, Transformer, }; -pub use transform::{Chunk, Derive, Embedding, Enrich, Extract, Partition, Transform, Transformer}; pub use workflow::{ValidationError, WorkflowDefinition}; diff --git a/crates/nvisy-runtime/src/graph/definition/node.rs b/crates/nvisy-runtime/src/graph/definition/node.rs index 2e13537..d38559d 100644 --- a/crates/nvisy-runtime/src/graph/definition/node.rs +++ b/crates/nvisy-runtime/src/graph/definition/node.rs @@ -2,6 +2,7 @@ use std::str::FromStr; +use derive_builder::Builder; use derive_more::{Debug, Display, From, Into}; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -65,40 +66,78 @@ impl AsRef for NodeId { } } +/// Position of a node in the visual editor. +#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)] +pub struct Position { + /// X coordinate. + pub x: f32, + /// Y coordinate. + pub y: f32, +} + +impl Position { + /// Creates a new position. + pub fn new(x: f32, y: f32) -> Self { + Self { x, y } + } +} + /// A generic node wrapper that adds optional name and description to any inner type. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct NodeCommon { +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Builder)] +#[builder( + name = "NodeCommonBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with"), + build_fn(validate = "Self::validate") +)] +pub struct NodeCommon +where + T: Clone, +{ /// Display name of the node. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub name: Option, /// Description of what this node does. #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] pub description: Option, + /// Position in the visual editor. + #[serde(default, skip_serializing_if = "is_default_position")] + #[builder(default)] + pub position: Position, /// Inner node configuration. #[serde(flatten)] pub inner: T, } -impl NodeCommon { +fn is_default_position(pos: &Position) -> bool { + pos.x == 0.0 && pos.y == 0.0 +} + +impl NodeCommonBuilder { + fn validate(&self) -> Result<(), String> { + if self.inner.is_none() { + return Err("inner is required".into()); + } + Ok(()) + } +} + +impl NodeCommon { /// Creates a new node with the given inner value. pub fn new(inner: T) -> Self { Self { name: None, description: None, + position: Position::default(), inner, } } - /// Sets the display name. - pub fn with_name(mut self, name: impl Into) -> Self { - self.name = Some(name.into()); - self - } - - /// Sets the description. - pub fn with_description(mut self, description: impl Into) -> Self { - self.description = Some(description.into()); - self + /// Returns a builder for creating a node. + pub fn builder() -> NodeCommonBuilder { + NodeCommonBuilder::default() } } diff --git a/crates/nvisy-runtime/src/graph/definition/output.rs b/crates/nvisy-runtime/src/graph/definition/output.rs index 0396713..18fa4af 100644 --- a/crates/nvisy-runtime/src/graph/definition/output.rs +++ b/crates/nvisy-runtime/src/graph/definition/output.rs @@ -8,7 +8,7 @@ use super::route::CacheSlot; /// Output provider definition for workflow nodes. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OutputProviderDef { +pub struct OutputProvider { /// Provider parameters (contains credentials_id). pub provider: OutputProviderParams, } @@ -18,7 +18,7 @@ pub struct OutputProviderDef { #[serde(tag = "type", rename_all = "snake_case")] pub enum OutputTarget { /// Write to external provider (S3, Qdrant, etc.). - Provider(OutputProviderDef), + Provider(OutputProvider), /// Write to named cache slot (resolved at compile time). Cache(CacheSlot), } @@ -34,7 +34,7 @@ impl OutputDef { /// Creates a new output definition from a provider. pub fn from_provider(provider: OutputProviderParams) -> Self { Self { - target: OutputTarget::Provider(OutputProviderDef { provider }), + target: OutputTarget::Provider(OutputProvider { provider }), } } diff --git a/crates/nvisy-runtime/src/graph/definition/route.rs b/crates/nvisy-runtime/src/graph/definition/route.rs index 46c193d..f7e2215 100644 --- a/crates/nvisy-runtime/src/graph/definition/route.rs +++ b/crates/nvisy-runtime/src/graph/definition/route.rs @@ -3,7 +3,11 @@ //! This module provides types for controlling data flow in workflows: //! - [`CacheSlot`]: Named connection point for linking workflow branches //! - [`SwitchDef`]: Conditional routing based on data properties +//! +//! Switch conditions follow the same pattern as transforms - each condition +//! type is a separate struct, and `SwitchCondition` is an enum combining them. +use jiff::Timestamp; use serde::{Deserialize, Serialize}; /// A cache slot reference for in-memory data passing. @@ -36,124 +40,133 @@ impl CacheSlot { } } -/// A switch node definition that routes data to different branches based on conditions. +/// A switch node definition that routes data to different output ports based on conditions. +/// +/// Switch nodes evaluate a condition against incoming data and route it +/// to the appropriate output port. Edges then connect each port to downstream nodes. /// -/// Switch nodes evaluate conditions against incoming data and route it -/// to the appropriate output branch. Each branch has a condition and a -/// target cache slot or output. +/// Each switch has exactly one condition type, similar to how transforms work. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct SwitchDef { - /// Branches to evaluate in order. - pub branches: Vec, - /// Default branch if no conditions match. - #[serde(skip_serializing_if = "Option::is_none")] - pub default: Option, + /// The condition to evaluate. + pub condition: SwitchCondition, + /// Output port for data matching the condition. + pub match_port: String, + /// Output port for data not matching the condition. + pub else_port: String, } impl SwitchDef { - /// Creates a new switch definition with the given branches. - pub fn new(branches: Vec) -> Self { - Self { - branches, - default: None, - } - } - - /// Sets the default target for unmatched data. - pub fn with_default(mut self, target: impl Into) -> Self { - self.default = Some(target.into()); - self + /// Returns all output port names defined by this switch. + pub fn output_ports(&self) -> impl Iterator { + [self.match_port.as_str(), self.else_port.as_str()].into_iter() } } -/// A single branch in a switch node. +/// Switch condition enum - each variant is a distinct condition type. +/// +/// Similar to `Transformer`, each condition is a separate struct with its +/// own configuration, wrapped in this enum. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SwitchBranch { - /// Condition to evaluate. - pub condition: SwitchCondition, - /// Target cache slot name to route matching data. - pub target: String, +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum SwitchCondition { + /// Match by content type category. + ContentType(ContentTypeCondition), + /// Match by file extension. + FileExtension(FileExtensionCondition), + /// Match when file size is within range. + FileSize(FileSizeCondition), + /// Match when page count is within range. + PageCount(PageCountCondition), + /// Match when duration is within range (for audio/video). + Duration(DurationCondition), + /// Match by detected content language. + Language(LanguageCondition), + /// Match when file date is within range. + FileDate(FileDateCondition), + /// Match by filename pattern. + FileName(FileNameCondition), } -impl SwitchBranch { - /// Creates a new branch with the given condition and target. - pub fn new(condition: SwitchCondition, target: impl Into) -> Self { - Self { - condition, - target: target.into(), - } - } +/// Condition that matches by content type category. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ContentTypeCondition { + /// Content type category to match. + pub category: ContentTypeCategory, +} + +/// Condition that matches by file extension. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FileExtensionCondition { + /// Extensions to match (without dot, e.g., "pdf", "docx"). + pub extensions: Vec, +} + +/// Condition that matches when file size is within range. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FileSizeCondition { + /// Minimum size in bytes (inclusive). + #[serde(skip_serializing_if = "Option::is_none")] + pub min_bytes: Option, + /// Maximum size in bytes (inclusive). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_bytes: Option, +} + +/// Condition that matches when page count is within range. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct PageCountCondition { + /// Minimum page count (inclusive). + #[serde(skip_serializing_if = "Option::is_none")] + pub min_pages: Option, + /// Maximum page count (inclusive). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_pages: Option, +} + +/// Condition that matches when duration is within range (for audio/video). +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct DurationCondition { + /// Minimum duration in seconds (inclusive). + #[serde(skip_serializing_if = "Option::is_none")] + pub min_seconds: Option, + /// Maximum duration in seconds (inclusive). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_seconds: Option, } -/// Condition for switch branch evaluation. +/// Condition that matches by detected content language. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum SwitchCondition { - /// Always matches (catch-all). - Always, - /// Match by content type category. - ContentType { - /// Content type category to match. - category: ContentTypeCategory, - }, - /// Match when file size exceeds threshold. - FileSizeAbove { - /// Size threshold in bytes. - threshold_bytes: u64, - }, - /// Match when file size is below threshold. - FileSizeBelow { - /// Size threshold in bytes. - threshold_bytes: u64, - }, - /// Match when page count exceeds threshold. - PageCountAbove { - /// Page count threshold. - threshold_pages: u32, - }, - /// Match when duration exceeds threshold (for audio/video). - DurationAbove { - /// Duration threshold in seconds. - threshold_seconds: u64, - }, - /// Match by detected content language. - Language { - /// Language code to match (e.g., "en", "es", "fr"). - language_code: String, - /// Minimum confidence threshold (0.0 to 1.0). - #[serde(default = "default_confidence")] - min_confidence: f32, - }, - /// Match when file date is newer than threshold. - DateNewerThan { - /// Which date field to use. - #[serde(default)] - date_field: DateField, - /// Threshold as ISO 8601 datetime or relative duration (e.g., "7d", "30d", "1y"). - threshold: String, - }, - /// Match by filename regex pattern. - FileNameMatches { - /// Regex pattern to match against filename. - pattern: String, - }, - /// Match by file extension. - FileExtension { - /// Extension to match (without dot, e.g., "pdf", "docx"). - extension: String, - }, - /// Match when metadata key exists. - HasMetadata { - /// Metadata key to check for. - key: String, - }, - /// Match when metadata key equals value. - MetadataEquals { - /// Metadata key to check. - key: String, - /// Value to match. - value: String, - }, +pub struct LanguageCondition { + /// Language code to match (e.g., "en", "es", "fr"). + pub code: String, + /// Minimum confidence threshold (0.0 to 1.0). + #[serde(default = "default_confidence")] + pub min_confidence: f32, +} + +/// Condition that matches when file date is within range. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FileDateCondition { + /// Which date field to check. + #[serde(default)] + pub field: DateField, + /// Earliest date (inclusive). + #[serde(skip_serializing_if = "Option::is_none")] + pub after: Option, + /// Latest date (inclusive). + #[serde(skip_serializing_if = "Option::is_none")] + pub before: Option, +} + +/// Condition that matches by filename pattern. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FileNameCondition { + /// Pattern to match against filename. + pub pattern: String, + /// Pattern type. + #[serde(default)] + pub match_type: PatternMatchType, } /// Content type categories for routing. @@ -178,9 +191,11 @@ pub enum ContentTypeCategory { Presentation, /// Code/source files. Code, + /// Other/unknown content type. + Other, } -/// Date field to use for routing. +/// Date field to use for date-based routing. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum DateField { @@ -191,6 +206,55 @@ pub enum DateField { Modified, } +/// Pattern matching type for filename conditions. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum PatternMatchType { + /// Glob pattern (e.g., "*.pdf", "report_*"). + #[default] + Glob, + /// Regular expression pattern. + Regex, + /// Exact string match. + Exact, + /// Case-insensitive contains. + Contains, +} + fn default_confidence() -> f32 { 0.8 } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_switch_def_output_ports() { + let switch = SwitchDef { + condition: SwitchCondition::ContentType(ContentTypeCondition { + category: ContentTypeCategory::Image, + }), + match_port: "images".into(), + else_port: "other".into(), + }; + + let ports: Vec<_> = switch.output_ports().collect(); + assert_eq!(ports, vec!["images", "other"]); + } + + #[test] + fn test_serialization() { + let switch = SwitchDef { + condition: SwitchCondition::FileExtension(FileExtensionCondition { + extensions: vec!["pdf".into(), "docx".into()], + }), + match_port: "documents".into(), + else_port: "other".into(), + }; + + let json = serde_json::to_string_pretty(&switch).unwrap(); + let deserialized: SwitchDef = serde_json::from_str(&json).unwrap(); + assert_eq!(switch, deserialized); + } +} diff --git a/crates/nvisy-runtime/src/graph/definition/transform.rs b/crates/nvisy-runtime/src/graph/definition/transform.rs deleted file mode 100644 index 9c8faaf..0000000 --- a/crates/nvisy-runtime/src/graph/definition/transform.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Transform node definition types. -//! -//! This module re-exports the transform types from the transform module. - -pub use crate::graph::transform::{ - Chunk, Derive, Embedding, Enrich, Extract, Partition, Transform, Transformer, -}; diff --git a/crates/nvisy-runtime/src/graph/transform/chunk.rs b/crates/nvisy-runtime/src/graph/definition/transform/chunk.rs similarity index 74% rename from crates/nvisy-runtime/src/graph/transform/chunk.rs rename to crates/nvisy-runtime/src/graph/definition/transform/chunk.rs index eda56d3..b139ec1 100644 --- a/crates/nvisy-runtime/src/graph/transform/chunk.rs +++ b/crates/nvisy-runtime/src/graph/definition/transform/chunk.rs @@ -1,13 +1,8 @@ -//! Chunk transformer. +//! Chunk transform definition. -use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use super::Transform; -use crate::error::Result; -use crate::provider::CredentialsRegistry; - -/// Chunking transformer for splitting content into smaller pieces. +/// Chunking transform for splitting content into smaller pieces. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Chunk { /// Chunking strategy. @@ -22,18 +17,6 @@ pub struct Chunk { pub contextual_chunking: bool, } -impl Transform for Chunk { - async fn transform( - &self, - input: Vec, - _registry: &CredentialsRegistry, - ) -> Result> { - // TODO: Implement chunking based on strategy - // For now, pass through unchanged - Ok(input) - } -} - /// Chunking strategy. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "chunk_strategy", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/transform/derive.rs b/crates/nvisy-runtime/src/graph/definition/transform/derive.rs similarity index 58% rename from crates/nvisy-runtime/src/graph/transform/derive.rs rename to crates/nvisy-runtime/src/graph/definition/transform/derive.rs index 34060f1..558acd1 100644 --- a/crates/nvisy-runtime/src/graph/transform/derive.rs +++ b/crates/nvisy-runtime/src/graph/definition/transform/derive.rs @@ -1,13 +1,10 @@ -//! Derive transformer - generate new content from input. +//! Derive transform definition. -use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use super::Transform; -use crate::error::Result; -use crate::provider::{CompletionProviderParams, CredentialsRegistry}; +use crate::provider::CompletionProviderParams; -/// Derive transformer for generating new content from input. +/// Derive transform for generating new content from input. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Derive { /// Completion provider parameters (includes credentials_id and model). @@ -22,18 +19,6 @@ pub struct Derive { pub override_prompt: Option, } -impl Transform for Derive { - async fn transform( - &self, - input: Vec, - _registry: &CredentialsRegistry, - ) -> Result> { - // TODO: Implement derivation using completion provider - // For now, pass through unchanged - Ok(input) - } -} - /// Tasks for generating new content from input. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/definition/transform/embedding.rs b/crates/nvisy-runtime/src/graph/definition/transform/embedding.rs new file mode 100644 index 0000000..296a93e --- /dev/null +++ b/crates/nvisy-runtime/src/graph/definition/transform/embedding.rs @@ -0,0 +1,17 @@ +//! Embedding transform definition. + +use serde::{Deserialize, Serialize}; + +use crate::provider::EmbeddingProviderParams; + +/// Embedding transform for generating vector embeddings. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Embedding { + /// Embedding provider parameters (includes credentials_id and model). + #[serde(flatten)] + pub provider: EmbeddingProviderParams, + + /// Whether to L2-normalize the output embeddings. + #[serde(default)] + pub normalize: bool, +} diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/definition/transform/enrich.rs similarity index 73% rename from crates/nvisy-runtime/src/graph/transform/enrich.rs rename to crates/nvisy-runtime/src/graph/definition/transform/enrich.rs index 14e8783..d7552a1 100644 --- a/crates/nvisy-runtime/src/graph/transform/enrich.rs +++ b/crates/nvisy-runtime/src/graph/definition/transform/enrich.rs @@ -1,13 +1,10 @@ -//! Enrich transformer - add metadata/descriptions to elements. +//! Enrich transform definition. -use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use super::Transform; -use crate::error::Result; -use crate::provider::{CompletionProviderParams, CredentialsRegistry}; +use crate::provider::CompletionProviderParams; -/// Enrich transformer for adding metadata/descriptions to elements. +/// Enrich transform for adding metadata/descriptions to elements. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Enrich { /// Completion provider parameters (includes credentials_id and model). @@ -23,18 +20,6 @@ pub struct Enrich { pub override_prompt: Option, } -impl Transform for Enrich { - async fn transform( - &self, - input: Vec, - _registry: &CredentialsRegistry, - ) -> Result> { - // TODO: Implement enrichment using completion provider - // For now, pass through unchanged - Ok(input) - } -} - /// Tasks for adding metadata/descriptions to elements. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "input_type", content = "task", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/transform/extract.rs b/crates/nvisy-runtime/src/graph/definition/transform/extract.rs similarity index 81% rename from crates/nvisy-runtime/src/graph/transform/extract.rs rename to crates/nvisy-runtime/src/graph/definition/transform/extract.rs index f5c5c76..2afb5ed 100644 --- a/crates/nvisy-runtime/src/graph/transform/extract.rs +++ b/crates/nvisy-runtime/src/graph/definition/transform/extract.rs @@ -1,13 +1,10 @@ -//! Extract transformer - extract structured data or convert formats. +//! Extract transform definition. -use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use super::Transform; -use crate::error::Result; -use crate::provider::{CompletionProviderParams, CredentialsRegistry}; +use crate::provider::CompletionProviderParams; -/// Extract transformer for extracting structured data or converting formats. +/// Extract transform for extracting structured data or converting formats. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Extract { /// Completion provider parameters (includes credentials_id and model). @@ -23,18 +20,6 @@ pub struct Extract { pub override_prompt: Option, } -impl Transform for Extract { - async fn transform( - &self, - input: Vec, - _registry: &CredentialsRegistry, - ) -> Result> { - // TODO: Implement extraction using completion provider - // For now, pass through unchanged - Ok(input) - } -} - /// Tasks for extracting structured data or converting formats. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "task_type", content = "task", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/definition/transform/mod.rs b/crates/nvisy-runtime/src/graph/definition/transform/mod.rs new file mode 100644 index 0000000..68f8170 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/definition/transform/mod.rs @@ -0,0 +1,43 @@ +//! Transform definition types. +//! +//! This module contains serializable definitions for transform nodes. +//! Each transform type defines the configuration needed to perform +//! a specific data transformation in a workflow. + +mod chunk; +mod derive; +mod embedding; +mod enrich; +mod extract; +mod partition; + +pub use chunk::{Chunk, ChunkStrategy}; +pub use derive::{Derive, DeriveTask}; +pub use embedding::Embedding; +pub use enrich::{Enrich, EnrichTask, ImageEnrichTask, TableEnrichTask}; +pub use extract::{ + AnalyzeTask, ConvertTask, Extract, ExtractTask, TableConvertTask, TextConvertTask, +}; +pub use partition::{Partition, PartitionStrategy}; +use serde::{Deserialize, Serialize}; + +/// Transformer node variant. +/// +/// Each variant represents a different type of data transformation +/// that can be performed in a workflow pipeline. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum Transformer { + /// Partition documents into elements. + Partition(Partition), + /// Chunk content into smaller pieces. + Chunk(Chunk), + /// Generate vector embeddings. + Embedding(Embedding), + /// Enrich elements with metadata/descriptions. + Enrich(Enrich), + /// Extract structured data or convert formats. + Extract(Extract), + /// Generate new content from input. + Derive(Derive), +} diff --git a/crates/nvisy-runtime/src/graph/transform/partition.rs b/crates/nvisy-runtime/src/graph/definition/transform/partition.rs similarity index 62% rename from crates/nvisy-runtime/src/graph/transform/partition.rs rename to crates/nvisy-runtime/src/graph/definition/transform/partition.rs index 8f6df97..c4f0c2f 100644 --- a/crates/nvisy-runtime/src/graph/transform/partition.rs +++ b/crates/nvisy-runtime/src/graph/definition/transform/partition.rs @@ -1,13 +1,8 @@ -//! Partition transformer. +//! Partition transform definition. -use nvisy_dal::AnyDataValue; use serde::{Deserialize, Serialize}; -use super::Transform; -use crate::error::Result; -use crate::provider::CredentialsRegistry; - -/// Partition transformer for partitioning documents into elements. +/// Partition transform for partitioning documents into elements. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct Partition { /// Partitioning strategy. @@ -22,18 +17,6 @@ pub struct Partition { pub discard_unsupported: bool, } -impl Transform for Partition { - async fn transform( - &self, - input: Vec, - _registry: &CredentialsRegistry, - ) -> Result> { - // TODO: Implement document partitioning based on strategy - // For now, pass through unchanged - Ok(input) - } -} - /// Partitioning strategy for document element extraction. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/graph/definition/workflow.rs b/crates/nvisy-runtime/src/graph/definition/workflow.rs index 5e98ef3..2892f30 100644 --- a/crates/nvisy-runtime/src/graph/definition/workflow.rs +++ b/crates/nvisy-runtime/src/graph/definition/workflow.rs @@ -147,9 +147,8 @@ pub enum ValidationError { mod tests { use super::*; use crate::graph::definition::{ - CacheSlot, InputDef, InputSource, OutputDef, OutputTarget, Transformer, + CacheSlot, InputDef, InputSource, OutputDef, OutputTarget, Partition, Transformer, }; - use crate::graph::transform::Partition; use uuid::Uuid; /// Creates a deterministic NodeId for testing. diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index 78a3fb9..dfd4629 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -8,6 +8,7 @@ //! - [`definition::NodeDef`]: Node definition enum (Input, Transform, Output, Switch) //! - [`definition::InputDef`], [`definition::OutputDef`]: I/O node definitions //! - [`definition::CacheSlot`]: Named cache slot for inter-node data passing +//! - [`definition::Transformer`]: Enum of all transform definition types //! //! ## Compiled Types //! Runtime-optimized types in [`compiled`]: @@ -15,11 +16,7 @@ //! - [`compiled::CompiledNode`]: Compiled node enum (Input, Output, Transform, Switch) //! - [`compiled::CompiledInput`], [`compiled::CompiledOutput`]: Compiled I/O nodes //! - [`compiled::CompiledTransform`]: Compiled transform with processor structs -//! -//! ## Transform Types -//! Transform definitions in [`transform`]: -//! - [`transform::Transformer`]: Enum of all transform types -//! - [`transform::Transform`]: Trait for data transformation +//! - [`compiled::Process`]: Trait for processing data in processors //! //! ## Compiler //! The [`compiler`] module compiles definitions into executable graphs. @@ -27,14 +24,12 @@ pub mod compiled; pub mod compiler; pub mod definition; -pub mod transform; // Re-export commonly used types from definition module pub use definition::{ - CacheSlot, Edge, EdgeData, InputDef, InputProvider, InputSource, Node, NodeCommon, NodeDef, - NodeId, OutputDef, OutputProviderDef, OutputTarget, SwitchBranch, SwitchCondition, SwitchDef, - ValidationError, WorkflowDefinition, WorkflowMetadata, + CacheSlot, ContentTypeCategory, ContentTypeCondition, DateField, DurationCondition, Edge, + EdgeData, FileDateCondition, FileExtensionCondition, FileNameCondition, FileSizeCondition, + InputDef, InputProvider, InputSource, LanguageCondition, Node, NodeCommon, NodeDef, NodeId, + OutputDef, OutputProvider, OutputTarget, PageCountCondition, PatternMatchType, Position, + SwitchCondition, SwitchDef, Transformer, ValidationError, WorkflowDefinition, WorkflowMetadata, }; - -// Re-export transform types -pub use transform::Transformer; diff --git a/crates/nvisy-runtime/src/graph/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs deleted file mode 100644 index 25d747b..0000000 --- a/crates/nvisy-runtime/src/graph/transform/embedding.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! Embedding transformer. - -use nvisy_dal::AnyDataValue; -use serde::{Deserialize, Serialize}; - -use super::Transform; -use crate::error::Result; -use crate::provider::{CredentialsRegistry, EmbeddingProviderParams}; - -/// Embedding transformer for generating vector embeddings. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct Embedding { - /// Embedding provider parameters (includes credentials_id and model). - #[serde(flatten)] - pub provider: EmbeddingProviderParams, - - /// Whether to L2-normalize the output embeddings. - #[serde(default)] - pub normalize: bool, -} - -impl Transform for Embedding { - async fn transform( - &self, - input: Vec, - _registry: &CredentialsRegistry, - ) -> Result> { - // TODO: Implement embedding generation using provider - // For now, pass through unchanged - Ok(input) - } -} diff --git a/crates/nvisy-runtime/src/graph/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs deleted file mode 100644 index e2001d7..0000000 --- a/crates/nvisy-runtime/src/graph/transform/mod.rs +++ /dev/null @@ -1,80 +0,0 @@ -//! Transformer node types for processing and transforming data. - -mod chunk; -mod derive; -mod embedding; -mod enrich; -mod extract; -mod partition; - -use std::future::Future; - -pub use chunk::{Chunk, ChunkStrategy}; -pub use derive::{Derive, DeriveTask}; -pub use embedding::Embedding; -pub use enrich::{Enrich, EnrichTask, ImageEnrichTask, TableEnrichTask}; -pub use extract::{ - AnalyzeTask, ConvertTask, Extract, ExtractTask, TableConvertTask, TextConvertTask, -}; -use nvisy_dal::AnyDataValue; -pub use partition::{Partition, PartitionStrategy}; -use serde::{Deserialize, Serialize}; - -use crate::error::Result; -use crate::provider::CredentialsRegistry; - -/// Trait for transforming data in a workflow pipeline. -/// -/// Transforms take input data items and produce output data items. -/// A single input can produce multiple outputs (e.g., chunking splits one document -/// into many chunks, or embedding generates one vector per chunk). -pub trait Transform { - /// Transforms input data items into output data items. - /// - /// # Arguments - /// * `input` - The input data items to transform - /// * `registry` - Credentials registry for accessing external services - /// - /// # Returns - /// A vector of transformed data items (may be more or fewer than input) - fn transform( - &self, - input: Vec, - registry: &CredentialsRegistry, - ) -> impl Future>> + Send; -} - -/// Transformer node variant. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub enum Transformer { - /// Partition documents into elements. - Partition(Partition), - /// Chunk content into smaller pieces. - Chunk(Chunk), - /// Generate vector embeddings. - Embedding(Embedding), - /// Enrich elements with metadata/descriptions. - Enrich(Enrich), - /// Extract structured data or convert formats. - Extract(Extract), - /// Generate new content from input. - Derive(Derive), -} - -impl Transform for Transformer { - async fn transform( - &self, - input: Vec, - registry: &CredentialsRegistry, - ) -> Result> { - match self { - Self::Partition(t) => t.transform(input, registry).await, - Self::Chunk(t) => t.transform(input, registry).await, - Self::Embedding(t) => t.transform(input, registry).await, - Self::Enrich(t) => t.transform(input, registry).await, - Self::Extract(t) => t.transform(input, registry).await, - Self::Derive(t) => t.transform(input, registry).await, - } - } -} From 6e63670a52cf9d1182bf914f6190be7cd84c300b Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Thu, 22 Jan 2026 13:59:08 +0100 Subject: [PATCH 18/28] refactor(runtime): reorganize module structure with definition/graph separation - Move graph/definition to src/definition (top-level module) - Move graph/compiled to src/graph (renamed from compiled) - Move compiler.rs into engine folder (private module) - Engine.execute() now accepts WorkflowDefinition directly - Rename graph/graph.rs to compiled.rs to avoid module inception warning - Update all imports across nvisy-runtime and nvisy-server --- .../src/agent/tools/context_store.rs | 6 +-- .../src/agent/tools/document_fetch.rs | 6 +-- .../src/agent/tools/image_analysis.rs | 6 +-- .../nvisy-rig/src/agent/tools/json_schema.rs | 9 ++-- .../src/agent/tools/metadata_query.rs | 6 +-- .../nvisy-rig/src/agent/tools/scratchpad.rs | 6 +-- .../src/agent/tools/vector_search.rs | 6 +-- crates/nvisy-rig/src/agent/tools/web_fetch.rs | 9 ++-- .../nvisy-rig/src/provider/embedding/mod.rs | 2 +- .../src/{graph => }/definition/edge.rs | 0 .../src/{graph => }/definition/input.rs | 3 +- .../src/{graph => }/definition/metadata.rs | 0 .../src/{graph => }/definition/mod.rs | 0 .../src/{graph => }/definition/node.rs | 0 .../src/{graph => }/definition/output.rs | 3 +- .../src/{graph => }/definition/route.rs | 0 .../{graph => }/definition/transform/chunk.rs | 0 .../definition/transform/derive.rs | 0 .../definition/transform/embedding.rs | 0 .../definition/transform/enrich.rs | 0 .../definition/transform/extract.rs | 0 .../{graph => }/definition/transform/mod.rs | 0 .../definition/transform/partition.rs | 0 .../src/{graph => }/definition/workflow.rs | 5 +- .../src/{graph => engine}/compiler.rs | 30 ++++------- crates/nvisy-runtime/src/engine/executor.rs | 29 ++++++++--- crates/nvisy-runtime/src/engine/mod.rs | 1 + crates/nvisy-runtime/src/error.rs | 2 +- .../graph/{compiled/graph.rs => compiled.rs} | 2 +- .../nvisy-runtime/src/graph/compiled/mod.rs | 27 ---------- .../src/graph/{compiled => }/input/mod.rs | 0 .../src/graph/{compiled => }/input/stream.rs | 0 crates/nvisy-runtime/src/graph/mod.rs | 52 ++++++++----------- .../src/graph/{compiled => }/node.rs | 0 .../src/graph/{compiled => }/output/mod.rs | 0 .../src/graph/{compiled => }/output/stream.rs | 0 .../src/graph/{compiled => }/route.rs | 14 +++-- .../graph/{compiled => }/transform/chunk.rs | 2 +- .../graph/{compiled => }/transform/derive.rs | 2 +- .../{compiled => }/transform/embedding.rs | 0 .../graph/{compiled => }/transform/enrich.rs | 2 +- .../graph/{compiled => }/transform/extract.rs | 2 +- .../src/graph/{compiled => }/transform/mod.rs | 0 .../{compiled => }/transform/partition.rs | 2 +- crates/nvisy-runtime/src/lib.rs | 1 + .../nvisy-runtime/src/provider/backend/mod.rs | 20 ++++--- crates/nvisy-runtime/src/provider/mod.rs | 27 ++++------ crates/nvisy-runtime/src/provider/outputs.rs | 3 +- .../src/handler/request/pipelines.rs | 2 +- .../src/handler/response/pipelines.rs | 2 +- 50 files changed, 127 insertions(+), 162 deletions(-) rename crates/nvisy-runtime/src/{graph => }/definition/edge.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/input.rs (99%) rename crates/nvisy-runtime/src/{graph => }/definition/metadata.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/mod.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/node.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/output.rs (99%) rename crates/nvisy-runtime/src/{graph => }/definition/route.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/transform/chunk.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/transform/derive.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/transform/embedding.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/transform/enrich.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/transform/extract.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/transform/mod.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/transform/partition.rs (100%) rename crates/nvisy-runtime/src/{graph => }/definition/workflow.rs (99%) rename crates/nvisy-runtime/src/{graph => engine}/compiler.rs (96%) rename crates/nvisy-runtime/src/graph/{compiled/graph.rs => compiled.rs} (99%) delete mode 100644 crates/nvisy-runtime/src/graph/compiled/mod.rs rename crates/nvisy-runtime/src/graph/{compiled => }/input/mod.rs (100%) rename crates/nvisy-runtime/src/graph/{compiled => }/input/stream.rs (100%) rename crates/nvisy-runtime/src/graph/{compiled => }/node.rs (100%) rename crates/nvisy-runtime/src/graph/{compiled => }/output/mod.rs (100%) rename crates/nvisy-runtime/src/graph/{compiled => }/output/stream.rs (100%) rename crates/nvisy-runtime/src/graph/{compiled => }/route.rs (96%) rename crates/nvisy-runtime/src/graph/{compiled => }/transform/chunk.rs (97%) rename crates/nvisy-runtime/src/graph/{compiled => }/transform/derive.rs (97%) rename crates/nvisy-runtime/src/graph/{compiled => }/transform/embedding.rs (100%) rename crates/nvisy-runtime/src/graph/{compiled => }/transform/enrich.rs (97%) rename crates/nvisy-runtime/src/graph/{compiled => }/transform/extract.rs (97%) rename crates/nvisy-runtime/src/graph/{compiled => }/transform/mod.rs (100%) rename crates/nvisy-runtime/src/graph/{compiled => }/transform/partition.rs (96%) diff --git a/crates/nvisy-rig/src/agent/tools/context_store.rs b/crates/nvisy-rig/src/agent/tools/context_store.rs index 2d516b0..05e4376 100644 --- a/crates/nvisy-rig/src/agent/tools/context_store.rs +++ b/crates/nvisy-rig/src/agent/tools/context_store.rs @@ -94,12 +94,12 @@ impl ContextStoreTool { } impl Tool for ContextStoreTool { - const NAME: &'static str = "context_store"; - - type Error = ContextStoreError; type Args = ContextStoreArgs; + type Error = ContextStoreError; type Output = ContextStoreResult; + const NAME: &'static str = "context_store"; + async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), diff --git a/crates/nvisy-rig/src/agent/tools/document_fetch.rs b/crates/nvisy-rig/src/agent/tools/document_fetch.rs index 7f2b73b..7205761 100644 --- a/crates/nvisy-rig/src/agent/tools/document_fetch.rs +++ b/crates/nvisy-rig/src/agent/tools/document_fetch.rs @@ -74,12 +74,12 @@ impl DocumentFetchTool { } impl Tool for DocumentFetchTool { - const NAME: &'static str = "document_fetch"; - - type Error = DocumentFetchError; type Args = DocumentFetchArgs; + type Error = DocumentFetchError; type Output = Vec; + const NAME: &'static str = "document_fetch"; + async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), diff --git a/crates/nvisy-rig/src/agent/tools/image_analysis.rs b/crates/nvisy-rig/src/agent/tools/image_analysis.rs index 37be018..702f2fd 100644 --- a/crates/nvisy-rig/src/agent/tools/image_analysis.rs +++ b/crates/nvisy-rig/src/agent/tools/image_analysis.rs @@ -84,12 +84,12 @@ impl ImageAnalysisTool { } impl Tool for ImageAnalysisTool { - const NAME: &'static str = "image_analysis"; - - type Error = ImageAnalysisError; type Args = ImageAnalysisArgs; + type Error = ImageAnalysisError; type Output = ImageAnalysisResult; + const NAME: &'static str = "image_analysis"; + async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), diff --git a/crates/nvisy-rig/src/agent/tools/json_schema.rs b/crates/nvisy-rig/src/agent/tools/json_schema.rs index 66198e5..51caaeb 100644 --- a/crates/nvisy-rig/src/agent/tools/json_schema.rs +++ b/crates/nvisy-rig/src/agent/tools/json_schema.rs @@ -232,12 +232,12 @@ impl Default for JsonSchemaTool { } impl Tool for JsonSchemaTool { - const NAME: &'static str = "json_schema"; - - type Error = JsonSchemaError; type Args = JsonSchemaArgs; + type Error = JsonSchemaError; type Output = JsonSchemaResult; + const NAME: &'static str = "json_schema"; + async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), @@ -270,9 +270,10 @@ impl Tool for JsonSchemaTool { #[cfg(test)] mod tests { - use super::*; use serde_json::json; + use super::*; + #[tokio::test] async fn test_valid_object() { let tool = JsonSchemaTool::new(); diff --git a/crates/nvisy-rig/src/agent/tools/metadata_query.rs b/crates/nvisy-rig/src/agent/tools/metadata_query.rs index 50d85bf..395e435 100644 --- a/crates/nvisy-rig/src/agent/tools/metadata_query.rs +++ b/crates/nvisy-rig/src/agent/tools/metadata_query.rs @@ -119,12 +119,12 @@ impl MetadataQueryTool { } impl Tool for MetadataQueryTool { - const NAME: &'static str = "metadata_query"; - - type Error = MetadataQueryError; type Args = MetadataQueryArgs; + type Error = MetadataQueryError; type Output = Vec; + const NAME: &'static str = "metadata_query"; + async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), diff --git a/crates/nvisy-rig/src/agent/tools/scratchpad.rs b/crates/nvisy-rig/src/agent/tools/scratchpad.rs index 9afa781..8d9ca86 100644 --- a/crates/nvisy-rig/src/agent/tools/scratchpad.rs +++ b/crates/nvisy-rig/src/agent/tools/scratchpad.rs @@ -166,12 +166,12 @@ impl ScratchpadTool { } impl Tool for ScratchpadTool { - const NAME: &'static str = "scratchpad"; - - type Error = ScratchpadError; type Args = ScratchpadArgs; + type Error = ScratchpadError; type Output = ScratchpadResult; + const NAME: &'static str = "scratchpad"; + async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), diff --git a/crates/nvisy-rig/src/agent/tools/vector_search.rs b/crates/nvisy-rig/src/agent/tools/vector_search.rs index b165a5f..f2f8c74 100644 --- a/crates/nvisy-rig/src/agent/tools/vector_search.rs +++ b/crates/nvisy-rig/src/agent/tools/vector_search.rs @@ -81,12 +81,12 @@ impl VectorSearchTool { } impl Tool for VectorSearchTool { - const NAME: &'static str = "vector_search"; - - type Error = VectorSearchError; type Args = VectorSearchArgs; + type Error = VectorSearchError; type Output = Vec; + const NAME: &'static str = "vector_search"; + async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), diff --git a/crates/nvisy-rig/src/agent/tools/web_fetch.rs b/crates/nvisy-rig/src/agent/tools/web_fetch.rs index b20985f..e58ca0c 100644 --- a/crates/nvisy-rig/src/agent/tools/web_fetch.rs +++ b/crates/nvisy-rig/src/agent/tools/web_fetch.rs @@ -1,10 +1,11 @@ //! Web fetch tool for retrieving content from URLs. +use std::sync::Arc; + use async_trait::async_trait; use rig::completion::ToolDefinition; use rig::tool::Tool; use serde::{Deserialize, Serialize}; -use std::sync::Arc; /// Error type for web fetch operations. #[derive(Debug, thiserror::Error)] @@ -196,12 +197,12 @@ impl WebFetchTool { } impl Tool for WebFetchTool { - const NAME: &'static str = "web_fetch"; - - type Error = WebFetchError; type Args = WebFetchArgs; + type Error = WebFetchError; type Output = WebFetchResult; + const NAME: &'static str = "web_fetch"; + async fn definition(&self, _prompt: String) -> ToolDefinition { ToolDefinition { name: Self::NAME.to_string(), diff --git a/crates/nvisy-rig/src/provider/embedding/mod.rs b/crates/nvisy-rig/src/provider/embedding/mod.rs index 6399aca..4f970a8 100644 --- a/crates/nvisy-rig/src/provider/embedding/mod.rs +++ b/crates/nvisy-rig/src/provider/embedding/mod.rs @@ -9,4 +9,4 @@ pub use credentials::EmbeddingCredentials; #[cfg(feature = "ollama")] pub use model::OllamaEmbeddingModel; pub use model::{CohereEmbeddingModel, EmbeddingModel, GeminiEmbeddingModel, OpenAiEmbeddingModel}; -pub use provider::{EmbeddingProvider}; +pub use provider::EmbeddingProvider; diff --git a/crates/nvisy-runtime/src/graph/definition/edge.rs b/crates/nvisy-runtime/src/definition/edge.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/edge.rs rename to crates/nvisy-runtime/src/definition/edge.rs diff --git a/crates/nvisy-runtime/src/graph/definition/input.rs b/crates/nvisy-runtime/src/definition/input.rs similarity index 99% rename from crates/nvisy-runtime/src/graph/definition/input.rs rename to crates/nvisy-runtime/src/definition/input.rs index efcf8d3..3f23adb 100644 --- a/crates/nvisy-runtime/src/graph/definition/input.rs +++ b/crates/nvisy-runtime/src/definition/input.rs @@ -2,9 +2,8 @@ use serde::{Deserialize, Serialize}; -use crate::provider::InputProviderParams; - use super::route::CacheSlot; +use crate::provider::InputProviderParams; /// Input provider definition for workflow nodes. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] diff --git a/crates/nvisy-runtime/src/graph/definition/metadata.rs b/crates/nvisy-runtime/src/definition/metadata.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/metadata.rs rename to crates/nvisy-runtime/src/definition/metadata.rs diff --git a/crates/nvisy-runtime/src/graph/definition/mod.rs b/crates/nvisy-runtime/src/definition/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/mod.rs rename to crates/nvisy-runtime/src/definition/mod.rs diff --git a/crates/nvisy-runtime/src/graph/definition/node.rs b/crates/nvisy-runtime/src/definition/node.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/node.rs rename to crates/nvisy-runtime/src/definition/node.rs diff --git a/crates/nvisy-runtime/src/graph/definition/output.rs b/crates/nvisy-runtime/src/definition/output.rs similarity index 99% rename from crates/nvisy-runtime/src/graph/definition/output.rs rename to crates/nvisy-runtime/src/definition/output.rs index 18fa4af..53b2df4 100644 --- a/crates/nvisy-runtime/src/graph/definition/output.rs +++ b/crates/nvisy-runtime/src/definition/output.rs @@ -2,9 +2,8 @@ use serde::{Deserialize, Serialize}; -use crate::provider::OutputProviderParams; - use super::route::CacheSlot; +use crate::provider::OutputProviderParams; /// Output provider definition for workflow nodes. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] diff --git a/crates/nvisy-runtime/src/graph/definition/route.rs b/crates/nvisy-runtime/src/definition/route.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/route.rs rename to crates/nvisy-runtime/src/definition/route.rs diff --git a/crates/nvisy-runtime/src/graph/definition/transform/chunk.rs b/crates/nvisy-runtime/src/definition/transform/chunk.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/transform/chunk.rs rename to crates/nvisy-runtime/src/definition/transform/chunk.rs diff --git a/crates/nvisy-runtime/src/graph/definition/transform/derive.rs b/crates/nvisy-runtime/src/definition/transform/derive.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/transform/derive.rs rename to crates/nvisy-runtime/src/definition/transform/derive.rs diff --git a/crates/nvisy-runtime/src/graph/definition/transform/embedding.rs b/crates/nvisy-runtime/src/definition/transform/embedding.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/transform/embedding.rs rename to crates/nvisy-runtime/src/definition/transform/embedding.rs diff --git a/crates/nvisy-runtime/src/graph/definition/transform/enrich.rs b/crates/nvisy-runtime/src/definition/transform/enrich.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/transform/enrich.rs rename to crates/nvisy-runtime/src/definition/transform/enrich.rs diff --git a/crates/nvisy-runtime/src/graph/definition/transform/extract.rs b/crates/nvisy-runtime/src/definition/transform/extract.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/transform/extract.rs rename to crates/nvisy-runtime/src/definition/transform/extract.rs diff --git a/crates/nvisy-runtime/src/graph/definition/transform/mod.rs b/crates/nvisy-runtime/src/definition/transform/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/transform/mod.rs rename to crates/nvisy-runtime/src/definition/transform/mod.rs diff --git a/crates/nvisy-runtime/src/graph/definition/transform/partition.rs b/crates/nvisy-runtime/src/definition/transform/partition.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/definition/transform/partition.rs rename to crates/nvisy-runtime/src/definition/transform/partition.rs diff --git a/crates/nvisy-runtime/src/graph/definition/workflow.rs b/crates/nvisy-runtime/src/definition/workflow.rs similarity index 99% rename from crates/nvisy-runtime/src/graph/definition/workflow.rs rename to crates/nvisy-runtime/src/definition/workflow.rs index 2892f30..ba07e20 100644 --- a/crates/nvisy-runtime/src/graph/definition/workflow.rs +++ b/crates/nvisy-runtime/src/definition/workflow.rs @@ -145,11 +145,12 @@ pub enum ValidationError { #[cfg(test)] mod tests { + use uuid::Uuid; + use super::*; - use crate::graph::definition::{ + use crate::definition::{ CacheSlot, InputDef, InputSource, OutputDef, OutputTarget, Partition, Transformer, }; - use uuid::Uuid; /// Creates a deterministic NodeId for testing. fn test_node_id(n: u128) -> NodeId { diff --git a/crates/nvisy-runtime/src/graph/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs similarity index 96% rename from crates/nvisy-runtime/src/graph/compiler.rs rename to crates/nvisy-runtime/src/engine/compiler.rs index 1341faf..be1eafa 100644 --- a/crates/nvisy-runtime/src/graph/compiler.rs +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -17,18 +17,17 @@ use nvisy_rig::agent::Agents; use nvisy_rig::provider::CompletionProvider; use petgraph::graph::{DiGraph, NodeIndex}; +use crate::definition::{EdgeData, InputSource, NodeDef, NodeId, OutputTarget, WorkflowDefinition}; use crate::error::{Error, Result}; -use crate::provider::{ - CompletionProviderParams, CredentialsRegistry, EmbeddingProviderParams, InputProviderParams, - IntoProvider, OutputProviderParams, -}; - -use super::compiled::{ +use crate::graph::{ ChunkProcessor, CompiledGraph, CompiledInput, CompiledNode, CompiledOutput, CompiledSwitch, CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, ExtractProcessor, InputStream, OutputStream, PartitionProcessor, }; -use super::definition::{EdgeData, InputSource, NodeDef, NodeId, OutputTarget, WorkflowDefinition}; +use crate::provider::{ + CompletionProviderParams, CredentialsRegistry, EmbeddingProviderParams, InputProviderParams, + IntoProvider, OutputProviderParams, +}; /// Workflow compiler that transforms definitions into executable graphs. pub struct WorkflowCompiler<'a> { @@ -200,7 +199,7 @@ impl<'a> WorkflowCompiler<'a> { /// Creates an input stream from an input definition. async fn create_input_stream( &self, - input: &super::definition::InputDef, + input: &crate::definition::InputDef, ) -> Result { match &input.source { InputSource::Provider(provider_def) => { @@ -239,7 +238,7 @@ impl<'a> WorkflowCompiler<'a> { /// Creates an output stream from an output definition. async fn create_output_stream( &self, - output: &super::definition::OutputDef, + output: &crate::definition::OutputDef, ) -> Result { match &output.target { OutputTarget::Provider(provider_def) => { @@ -273,9 +272,9 @@ impl<'a> WorkflowCompiler<'a> { /// Creates a processor from a transformer definition. async fn create_processor( &self, - transformer: &super::definition::Transformer, + transformer: &crate::definition::Transformer, ) -> Result { - use super::definition::Transformer; + use crate::definition::Transformer; match transformer { Transformer::Partition(p) => Ok(CompiledTransform::Partition(PartitionProcessor::new( @@ -402,12 +401,3 @@ struct ResolvedEdge { struct ResolvedDefinition { edges: Vec, } - -/// Convenience function to compile a workflow definition. -pub async fn compile( - def: WorkflowDefinition, - registry: &CredentialsRegistry, - ctx: Context, -) -> Result { - WorkflowCompiler::new(registry, ctx).compile(def).await -} diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index 66421f8..9d12a75 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -3,13 +3,15 @@ use std::sync::Arc; use futures::{SinkExt, StreamExt}; +use nvisy_dal::core::Context; use tokio::sync::Semaphore; use super::EngineConfig; +use super::compiler::WorkflowCompiler; use super::context::ExecutionContext; +use crate::definition::{NodeId, WorkflowDefinition}; use crate::error::{Error, Result}; -use crate::graph::NodeId; -use crate::graph::compiled::{CompiledGraph, CompiledNode, InputStream, OutputStream, Process}; +use crate::graph::{CompiledGraph, CompiledNode, InputStream, OutputStream, Process}; use crate::provider::CredentialsRegistry; /// Tracing target for engine operations. @@ -50,15 +52,30 @@ impl Engine { &self.config } - /// Executes a pre-compiled workflow graph. - /// - /// The graph should be compiled using [`crate::graph::compiler::compile`] - /// before execution. + /// Executes a workflow definition. /// + /// The definition is compiled into an executable graph and then executed. /// Execution is pipe-based: items are read from inputs one at a time, /// flow through all transformers, and are written to outputs before /// the next item is processed. pub async fn execute( + &self, + definition: WorkflowDefinition, + credentials: CredentialsRegistry, + ctx: Context, + ) -> Result { + // Compile the definition into an executable graph + let compiler = WorkflowCompiler::new(&credentials, ctx); + let graph = compiler.compile(definition).await?; + + self.execute_graph(graph, credentials).await + } + + /// Executes a pre-compiled workflow graph. + /// + /// Use [`Self::execute`] to compile and execute a workflow definition in one step. + /// This method is useful when you want to reuse a compiled graph multiple times. + pub async fn execute_graph( &self, mut graph: CompiledGraph, credentials: CredentialsRegistry, diff --git a/crates/nvisy-runtime/src/engine/mod.rs b/crates/nvisy-runtime/src/engine/mod.rs index e07449a..62a8509 100644 --- a/crates/nvisy-runtime/src/engine/mod.rs +++ b/crates/nvisy-runtime/src/engine/mod.rs @@ -5,6 +5,7 @@ //! - [`EngineConfig`]: Configuration options //! - [`ExecutionContext`]: Runtime context for workflow execution +mod compiler; mod config; mod context; mod executor; diff --git a/crates/nvisy-runtime/src/error.rs b/crates/nvisy-runtime/src/error.rs index 45a0427..6d92249 100644 --- a/crates/nvisy-runtime/src/error.rs +++ b/crates/nvisy-runtime/src/error.rs @@ -3,7 +3,7 @@ use thiserror::Error; use uuid::Uuid; -use crate::graph::NodeId; +use crate::definition::NodeId; /// Result type for workflow operations. pub type Result = std::result::Result; diff --git a/crates/nvisy-runtime/src/graph/compiled/graph.rs b/crates/nvisy-runtime/src/graph/compiled.rs similarity index 99% rename from crates/nvisy-runtime/src/graph/compiled/graph.rs rename to crates/nvisy-runtime/src/graph/compiled.rs index 3b6e25e..4842408 100644 --- a/crates/nvisy-runtime/src/graph/compiled/graph.rs +++ b/crates/nvisy-runtime/src/graph/compiled.rs @@ -9,7 +9,7 @@ use super::input::CompiledInput; use super::node::CompiledNode; use super::output::CompiledOutput; use super::route::CompiledSwitch; -use crate::graph::definition::{ +use crate::definition::{ ContentTypeCategory, ContentTypeCondition, EdgeData, NodeId, SwitchCondition, WorkflowMetadata, }; diff --git a/crates/nvisy-runtime/src/graph/compiled/mod.rs b/crates/nvisy-runtime/src/graph/compiled/mod.rs deleted file mode 100644 index e789018..0000000 --- a/crates/nvisy-runtime/src/graph/compiled/mod.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! Compiled workflow types for execution. -//! -//! This module contains runtime-optimized types for executing workflows. -//! These types are created by compiling workflow definitions and are -//! optimized for: -//! - Fast execution without lookups -//! - Pre-resolved cache slots -//! - Pre-instantiated providers and agents -//! -//! To create compiled types, use the [`crate::graph::compiler`] module. - -mod graph; -mod input; -mod node; -mod output; -mod route; -mod transform; - -pub use graph::CompiledGraph; -pub use input::{CompiledInput, DataStream, InputStream}; -pub use node::CompiledNode; -pub use output::{CompiledOutput, DataSink, OutputStream}; -pub use route::CompiledSwitch; -pub use transform::{ - ChunkProcessor, CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, - ExtractProcessor, PartitionProcessor, Process, -}; diff --git a/crates/nvisy-runtime/src/graph/compiled/input/mod.rs b/crates/nvisy-runtime/src/graph/input/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/compiled/input/mod.rs rename to crates/nvisy-runtime/src/graph/input/mod.rs diff --git a/crates/nvisy-runtime/src/graph/compiled/input/stream.rs b/crates/nvisy-runtime/src/graph/input/stream.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/compiled/input/stream.rs rename to crates/nvisy-runtime/src/graph/input/stream.rs diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index dfd4629..cbd9e2c 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -1,35 +1,27 @@ -//! Workflow graph structures and node types. +//! Compiled workflow types for execution. //! -//! This module provides the graph representation for workflows: +//! This module contains runtime-optimized types for executing workflows. +//! These types are created by compiling workflow definitions and are +//! optimized for: +//! - Fast execution without lookups +//! - Pre-resolved cache slots +//! - Pre-instantiated providers and agents //! -//! ## Definition Types -//! Serializable, frontend-friendly types in [`definition`]: -//! - [`definition::WorkflowDefinition`]: JSON-serializable workflow structure -//! - [`definition::NodeDef`]: Node definition enum (Input, Transform, Output, Switch) -//! - [`definition::InputDef`], [`definition::OutputDef`]: I/O node definitions -//! - [`definition::CacheSlot`]: Named cache slot for inter-node data passing -//! - [`definition::Transformer`]: Enum of all transform definition types -//! -//! ## Compiled Types -//! Runtime-optimized types in [`compiled`]: -//! - [`compiled::CompiledGraph`]: Execution-ready graph with resolved cache slots -//! - [`compiled::CompiledNode`]: Compiled node enum (Input, Output, Transform, Switch) -//! - [`compiled::CompiledInput`], [`compiled::CompiledOutput`]: Compiled I/O nodes -//! - [`compiled::CompiledTransform`]: Compiled transform with processor structs -//! - [`compiled::Process`]: Trait for processing data in processors -//! -//! ## Compiler -//! The [`compiler`] module compiles definitions into executable graphs. +//! To create compiled types, use the [`crate::graph::compiler`] module. -pub mod compiled; -pub mod compiler; -pub mod definition; +mod compiled; +mod input; +mod node; +mod output; +mod route; +mod transform; -// Re-export commonly used types from definition module -pub use definition::{ - CacheSlot, ContentTypeCategory, ContentTypeCondition, DateField, DurationCondition, Edge, - EdgeData, FileDateCondition, FileExtensionCondition, FileNameCondition, FileSizeCondition, - InputDef, InputProvider, InputSource, LanguageCondition, Node, NodeCommon, NodeDef, NodeId, - OutputDef, OutputProvider, OutputTarget, PageCountCondition, PatternMatchType, Position, - SwitchCondition, SwitchDef, Transformer, ValidationError, WorkflowDefinition, WorkflowMetadata, +pub use compiled::CompiledGraph; +pub use input::{CompiledInput, DataStream, InputStream}; +pub use node::CompiledNode; +pub use output::{CompiledOutput, DataSink, OutputStream}; +pub use route::CompiledSwitch; +pub use transform::{ + ChunkProcessor, CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, + ExtractProcessor, PartitionProcessor, Process, }; diff --git a/crates/nvisy-runtime/src/graph/compiled/node.rs b/crates/nvisy-runtime/src/graph/node.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/compiled/node.rs rename to crates/nvisy-runtime/src/graph/node.rs diff --git a/crates/nvisy-runtime/src/graph/compiled/output/mod.rs b/crates/nvisy-runtime/src/graph/output/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/compiled/output/mod.rs rename to crates/nvisy-runtime/src/graph/output/mod.rs diff --git a/crates/nvisy-runtime/src/graph/compiled/output/stream.rs b/crates/nvisy-runtime/src/graph/output/stream.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/compiled/output/stream.rs rename to crates/nvisy-runtime/src/graph/output/stream.rs diff --git a/crates/nvisy-runtime/src/graph/compiled/route.rs b/crates/nvisy-runtime/src/graph/route.rs similarity index 96% rename from crates/nvisy-runtime/src/graph/compiled/route.rs rename to crates/nvisy-runtime/src/graph/route.rs index 5103667..5ae38be 100644 --- a/crates/nvisy-runtime/src/graph/compiled/route.rs +++ b/crates/nvisy-runtime/src/graph/route.rs @@ -3,7 +3,9 @@ use jiff::Timestamp; use nvisy_dal::AnyDataValue; -use crate::graph::definition::{ContentTypeCategory, PatternMatchType, SwitchCondition, SwitchDef}; +use crate::definition::{ + ContentTypeCategory, DateField, PatternMatchType, SwitchCondition, SwitchDef, +}; /// Compiled switch node - ready to route data. /// @@ -108,12 +110,8 @@ impl CompiledSwitch { SwitchCondition::FileDate(c) => { let timestamp = match c.field { - crate::graph::definition::DateField::Created => { - self.get_metadata_timestamp(data, "created_at") - } - crate::graph::definition::DateField::Modified => { - self.get_metadata_timestamp(data, "modified_at") - } + DateField::Created => self.get_metadata_timestamp(data, "created_at"), + DateField::Modified => self.get_metadata_timestamp(data, "modified_at"), }; match timestamp { Some(ts) => { @@ -333,7 +331,7 @@ fn glob_match(pattern: &str, text: &str) -> bool { #[cfg(test)] mod tests { use super::*; - use crate::graph::definition::{ContentTypeCondition, FileExtensionCondition}; + use crate::definition::{ContentTypeCondition, FileExtensionCondition}; #[test] fn test_evaluate_file_extension() { diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/chunk.rs b/crates/nvisy-runtime/src/graph/transform/chunk.rs similarity index 97% rename from crates/nvisy-runtime/src/graph/compiled/transform/chunk.rs rename to crates/nvisy-runtime/src/graph/transform/chunk.rs index 6945193..a6c47ac 100644 --- a/crates/nvisy-runtime/src/graph/compiled/transform/chunk.rs +++ b/crates/nvisy-runtime/src/graph/transform/chunk.rs @@ -4,8 +4,8 @@ use nvisy_dal::AnyDataValue; use nvisy_rig::agent::Agents; use super::Process; +use crate::definition::ChunkStrategy; use crate::error::Result; -use crate::graph::definition::ChunkStrategy; /// Processor for chunking content into smaller pieces. pub struct ChunkProcessor { diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/derive.rs b/crates/nvisy-runtime/src/graph/transform/derive.rs similarity index 97% rename from crates/nvisy-runtime/src/graph/compiled/transform/derive.rs rename to crates/nvisy-runtime/src/graph/transform/derive.rs index 40be67a..f8f8714 100644 --- a/crates/nvisy-runtime/src/graph/compiled/transform/derive.rs +++ b/crates/nvisy-runtime/src/graph/transform/derive.rs @@ -4,8 +4,8 @@ use nvisy_dal::AnyDataValue; use nvisy_rig::agent::Agents; use super::Process; +use crate::definition::DeriveTask; use crate::error::Result; -use crate::graph::definition::DeriveTask; /// Processor for generating new content from input. pub struct DeriveProcessor { diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/compiled/transform/embedding.rs rename to crates/nvisy-runtime/src/graph/transform/embedding.rs diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs similarity index 97% rename from crates/nvisy-runtime/src/graph/compiled/transform/enrich.rs rename to crates/nvisy-runtime/src/graph/transform/enrich.rs index 9e18b44..5bafb1a 100644 --- a/crates/nvisy-runtime/src/graph/compiled/transform/enrich.rs +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -4,8 +4,8 @@ use nvisy_dal::AnyDataValue; use nvisy_rig::agent::Agents; use super::Process; +use crate::definition::EnrichTask; use crate::error::Result; -use crate::graph::definition::EnrichTask; /// Processor for enriching elements with metadata/descriptions. pub struct EnrichProcessor { diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/extract.rs b/crates/nvisy-runtime/src/graph/transform/extract.rs similarity index 97% rename from crates/nvisy-runtime/src/graph/compiled/transform/extract.rs rename to crates/nvisy-runtime/src/graph/transform/extract.rs index fabc26b..37aeb63 100644 --- a/crates/nvisy-runtime/src/graph/compiled/transform/extract.rs +++ b/crates/nvisy-runtime/src/graph/transform/extract.rs @@ -4,8 +4,8 @@ use nvisy_dal::AnyDataValue; use nvisy_rig::agent::Agents; use super::Process; +use crate::definition::ExtractTask; use crate::error::Result; -use crate::graph::definition::ExtractTask; /// Processor for extracting structured data or converting formats. pub struct ExtractProcessor { diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs similarity index 100% rename from crates/nvisy-runtime/src/graph/compiled/transform/mod.rs rename to crates/nvisy-runtime/src/graph/transform/mod.rs diff --git a/crates/nvisy-runtime/src/graph/compiled/transform/partition.rs b/crates/nvisy-runtime/src/graph/transform/partition.rs similarity index 96% rename from crates/nvisy-runtime/src/graph/compiled/transform/partition.rs rename to crates/nvisy-runtime/src/graph/transform/partition.rs index 798c344..042858a 100644 --- a/crates/nvisy-runtime/src/graph/compiled/transform/partition.rs +++ b/crates/nvisy-runtime/src/graph/transform/partition.rs @@ -3,8 +3,8 @@ use nvisy_dal::AnyDataValue; use super::Process; +use crate::definition::PartitionStrategy; use crate::error::Result; -use crate::graph::definition::PartitionStrategy; /// Processor for partitioning documents into elements. #[derive(Debug)] diff --git a/crates/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs index 6d6ce21..81bc6fa 100644 --- a/crates/nvisy-runtime/src/lib.rs +++ b/crates/nvisy-runtime/src/lib.rs @@ -2,6 +2,7 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![doc = include_str!("../README.md")] +pub mod definition; pub mod engine; mod error; pub mod graph; diff --git a/crates/nvisy-runtime/src/provider/backend/mod.rs b/crates/nvisy-runtime/src/provider/backend/mod.rs index c755dec..09512b5 100644 --- a/crates/nvisy-runtime/src/provider/backend/mod.rs +++ b/crates/nvisy-runtime/src/provider/backend/mod.rs @@ -45,24 +45,22 @@ mod openai; mod perplexity; // Storage backend exports +// AI provider exports +pub use anthropic::{AnthropicCompletionParams, AnthropicCredentials}; pub use azblob::{AzblobCredentials, AzblobParams}; +pub use cohere::{CohereCompletionParams, CohereCredentials, CohereEmbeddingParams}; pub use gcs::{GcsCredentials, GcsParams}; -pub use mysql::{MysqlCredentials, MysqlParams}; -pub use postgres::{PostgresCredentials, PostgresParams}; -pub use s3::{S3Credentials, S3Params}; - +pub use gemini::{GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams}; // Vector database exports pub use milvus::{MilvusCredentials, MilvusParams}; +pub use mysql::{MysqlCredentials, MysqlParams}; +pub use openai::{OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams}; +pub use perplexity::{PerplexityCompletionParams, PerplexityCredentials}; pub use pgvector::{PgVectorCredentials, PgVectorParams}; pub use pinecone::{PineconeCredentials, PineconeParams}; +pub use postgres::{PostgresCredentials, PostgresParams}; pub use qdrant::{QdrantCredentials, QdrantParams}; - -// AI provider exports -pub use anthropic::{AnthropicCompletionParams, AnthropicCredentials}; -pub use cohere::{CohereCompletionParams, CohereCredentials, CohereEmbeddingParams}; -pub use gemini::{GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams}; -pub use openai::{OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams}; -pub use perplexity::{PerplexityCompletionParams, PerplexityCredentials}; +pub use s3::{S3Credentials, S3Params}; /// Trait for provider parameters that can be combined with credentials to create a provider. #[async_trait::async_trait] diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs index 9fe8276..a0a17e2 100644 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -23,17 +23,8 @@ mod registry; pub mod runtime; // Storage backend exports -pub use backend::{ - AzblobCredentials, AzblobParams, GcsCredentials, GcsParams, MysqlCredentials, MysqlParams, - PostgresCredentials, PostgresParams, S3Credentials, S3Params, -}; - -// Vector database exports -pub use backend::{ - MilvusCredentials, MilvusParams, PgVectorCredentials, PgVectorParams, PineconeCredentials, - PineconeParams, QdrantCredentials, QdrantParams, -}; - +// AI provider enum exports +pub use ai::{AiCredentials, CompletionProviderParams, EmbeddingProviderParams}; // AI provider exports pub use backend::{ AnthropicCompletionParams, AnthropicCredentials, CohereCompletionParams, CohereCredentials, @@ -41,17 +32,21 @@ pub use backend::{ OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams, PerplexityCompletionParams, PerplexityCredentials, }; - +pub use backend::{ + AzblobCredentials, AzblobParams, GcsCredentials, GcsParams, IntoProvider, MysqlCredentials, + MysqlParams, PostgresCredentials, PostgresParams, S3Credentials, S3Params, +}; +// Vector database exports +pub use backend::{ + MilvusCredentials, MilvusParams, PgVectorCredentials, PgVectorParams, PineconeCredentials, + PineconeParams, QdrantCredentials, QdrantParams, +}; use derive_more::From; pub use inputs::{InputProvider, InputProviderParams}; pub use outputs::{OutputProvider, OutputProviderParams}; pub use registry::CredentialsRegistry; use serde::{Deserialize, Serialize}; -// AI provider enum exports -pub use ai::{AiCredentials, CompletionProviderParams, EmbeddingProviderParams}; -pub use backend::IntoProvider; - /// Provider credentials (sensitive). #[derive(Debug, Clone, From, Serialize, Deserialize)] #[serde(tag = "provider", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs index 30cbd19..3996fd1 100644 --- a/crates/nvisy-runtime/src/provider/outputs.rs +++ b/crates/nvisy-runtime/src/provider/outputs.rs @@ -16,14 +16,13 @@ use serde::{Deserialize, Serialize}; use tokio::sync::Mutex; use uuid::Uuid; -use crate::graph::compiled::DataSink; - use super::ProviderCredentials; use super::backend::{ AzblobParams, GcsParams, IntoProvider, MilvusParams, MysqlParams, PgVectorParams, PineconeParams, PostgresParams, QdrantParams, S3Params, }; use crate::error::{Error, Result}; +use crate::graph::DataSink; /// Output provider parameters (storage backends + vector DBs). #[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] diff --git a/crates/nvisy-server/src/handler/request/pipelines.rs b/crates/nvisy-server/src/handler/request/pipelines.rs index 2a6d399..b4426ac 100644 --- a/crates/nvisy-server/src/handler/request/pipelines.rs +++ b/crates/nvisy-server/src/handler/request/pipelines.rs @@ -6,7 +6,7 @@ use nvisy_postgres::model::{NewPipeline, UpdatePipeline as UpdatePipelineModel}; use nvisy_postgres::types::PipelineStatus; -use nvisy_runtime::graph::WorkflowDefinition; +use nvisy_runtime::definition::WorkflowDefinition; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; diff --git a/crates/nvisy-server/src/handler/response/pipelines.rs b/crates/nvisy-server/src/handler/response/pipelines.rs index bda56cc..3bccf41 100644 --- a/crates/nvisy-server/src/handler/response/pipelines.rs +++ b/crates/nvisy-server/src/handler/response/pipelines.rs @@ -3,7 +3,7 @@ use jiff::Timestamp; use nvisy_postgres::model; use nvisy_postgres::types::PipelineStatus; -use nvisy_runtime::graph::WorkflowDefinition; +use nvisy_runtime::definition::WorkflowDefinition; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; From a02d12156d9a30202390697c01e8fc31d03d8884 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Thu, 22 Jan 2026 15:07:44 +0100 Subject: [PATCH 19/28] feat(runtime): reorganize graph module with definition/compiled separation - Split route module into definition/route (cache, switch) and graph/route (evaluators) - Simplify switch types to FileCategory and Language conditions with bool output - Add dedicated evaluators: FileCategoryEvaluator, LanguageEvaluator - Create engine::Context type with From impls for nvisy_dal::core::Context - Move EdgeData to graph/edge.rs (runtime type, not definition) - Inline NodeKind into Node struct, remove generic wrapper - Add Position type in definition/util module - Move Workflow directly into definition/mod.rs - Remove node types section from README - Delete unused stream module files --- crates/nvisy-runtime/README.md | 61 +- crates/nvisy-runtime/src/definition/edge.rs | 35 -- crates/nvisy-runtime/src/definition/input.rs | 45 +- crates/nvisy-runtime/src/definition/mod.rs | 202 ++++++- crates/nvisy-runtime/src/definition/node.rs | 121 ++-- crates/nvisy-runtime/src/definition/output.rs | 45 +- crates/nvisy-runtime/src/definition/route.rs | 260 --------- .../src/definition/route/cache.rs | 33 ++ .../nvisy-runtime/src/definition/route/mod.rs | 13 + .../src/definition/route/switch.rs | 103 ++++ .../nvisy-runtime/src/definition/util/mod.rs | 5 + .../src/definition/util/position.rs | 19 + .../nvisy-runtime/src/definition/workflow.rs | 294 ---------- crates/nvisy-runtime/src/engine/compiler.rs | 111 ++-- crates/nvisy-runtime/src/engine/context.rs | 82 ++- crates/nvisy-runtime/src/engine/executor.rs | 6 +- crates/nvisy-runtime/src/engine/mod.rs | 2 +- crates/nvisy-runtime/src/graph/compiled.rs | 195 ------- crates/nvisy-runtime/src/graph/edge.rs | 30 + crates/nvisy-runtime/src/graph/mod.rs | 192 ++++++- crates/nvisy-runtime/src/graph/route.rs | 374 ------------ .../src/graph/route/file_category.rs | 151 +++++ .../nvisy-runtime/src/graph/route/language.rs | 119 ++++ crates/nvisy-runtime/src/graph/route/mod.rs | 89 +++ crates/nvisy-runtime/stream/event.rs | 118 ---- crates/nvisy-runtime/stream/event_pub.rs | 76 --- crates/nvisy-runtime/stream/event_stream.rs | 74 --- crates/nvisy-runtime/stream/event_sub.rs | 63 --- crates/nvisy-runtime/stream/mod.rs | 20 - crates/nvisy-runtime/stream/stream_pub.rs | 232 -------- crates/nvisy-runtime/stream/stream_sub.rs | 535 ------------------ .../src/handler/request/pipelines.rs | 4 +- .../src/handler/response/pipelines.rs | 6 +- 33 files changed, 1197 insertions(+), 2518 deletions(-) delete mode 100644 crates/nvisy-runtime/src/definition/route.rs create mode 100644 crates/nvisy-runtime/src/definition/route/cache.rs create mode 100644 crates/nvisy-runtime/src/definition/route/mod.rs create mode 100644 crates/nvisy-runtime/src/definition/route/switch.rs create mode 100644 crates/nvisy-runtime/src/definition/util/mod.rs create mode 100644 crates/nvisy-runtime/src/definition/util/position.rs delete mode 100644 crates/nvisy-runtime/src/definition/workflow.rs delete mode 100644 crates/nvisy-runtime/src/graph/compiled.rs create mode 100644 crates/nvisy-runtime/src/graph/edge.rs delete mode 100644 crates/nvisy-runtime/src/graph/route.rs create mode 100644 crates/nvisy-runtime/src/graph/route/file_category.rs create mode 100644 crates/nvisy-runtime/src/graph/route/language.rs create mode 100644 crates/nvisy-runtime/src/graph/route/mod.rs delete mode 100644 crates/nvisy-runtime/stream/event.rs delete mode 100644 crates/nvisy-runtime/stream/event_pub.rs delete mode 100644 crates/nvisy-runtime/stream/event_stream.rs delete mode 100644 crates/nvisy-runtime/stream/event_sub.rs delete mode 100644 crates/nvisy-runtime/stream/mod.rs delete mode 100644 crates/nvisy-runtime/stream/stream_pub.rs delete mode 100644 crates/nvisy-runtime/stream/stream_sub.rs diff --git a/crates/nvisy-runtime/README.md b/crates/nvisy-runtime/README.md index 99fee4b..b8814dd 100644 --- a/crates/nvisy-runtime/README.md +++ b/crates/nvisy-runtime/README.md @@ -7,77 +7,38 @@ data processing workflows as directed acyclic graphs (DAGs). ## Architecture -Workflows are represented as graphs with four types of nodes: - -- **Input nodes**: Read or produce data (entry points) -- **Transform nodes**: Process or transform data (intermediate) -- **Output nodes**: Write or consume data (exit points) -- **Switch nodes**: Route data conditionally based on properties - ### Definition vs Compiled Types The crate separates workflow representation into two layers: -- **Definition types** (`graph::definition`): JSON-serializable types for - storing, editing, and transmitting workflows. These include `WorkflowDefinition`, - `NodeDef`, `InputDef`, `OutputDef`, and `CacheSlot`. +- **Definition types** (`definition`): JSON-serializable types for + storing, editing, and transmitting workflows. These include `Workflow`, + `Node`, `NodeKind`, `Input`, `Output`, and `CacheSlot`. -- **Compiled types** (`graph::compiled`): Runtime-optimized types for execution. +- **Compiled types** (`graph`): Runtime-optimized types for execution. These include `CompiledGraph`, `CompiledNode`, and processor types like `EmbeddingProcessor` and `EnrichProcessor`. -Use the `graph::compiler` module to transform definitions into executable graphs. +Use the `Engine` to compile definitions and execute workflows. ## Example ```rust,ignore -use nvisy_runtime::graph::definition::{ - InputDef, NodeDef, OutputDef, WorkflowDefinition, +use nvisy_runtime::definition::{ + Input, Node, NodeKind, Output, Workflow, }; -use nvisy_runtime::graph::compiler::compile; use nvisy_runtime::engine::Engine; use nvisy_runtime::provider::CredentialsRegistry; // Create a workflow definition -let mut workflow = WorkflowDefinition::new(); +let mut workflow = Workflow::new(); // Add input, transform, and output nodes... // Connect nodes with edges... -// Compile the definition +// Execute the workflow +let engine = Engine::with_defaults(); let registry = CredentialsRegistry::default(); let ctx = nvisy_dal::core::Context::default(); -let compiled = compile(workflow, ®istry, ctx).await?; - -// Execute the compiled graph -let engine = Engine::with_defaults(); -let result = engine.execute_compiled(compiled, registry).await?; +let result = engine.execute(workflow, registry, ctx).await?; ``` - -## Node Types - -### Input Nodes -Input nodes read data from external sources: -- Amazon S3, Google Cloud Storage, Azure Blob Storage -- PostgreSQL, MySQL databases - -### Transform Nodes -- `Partition` - Extract elements from documents -- `Chunk` - Split content into smaller chunks -- `Embedding` - Generate vector embeddings -- `Enrich` - Add metadata/descriptions using LLMs -- `Extract` - Extract structured data or convert formats -- `Derive` - Generate new content (summaries, titles) - -### Output Nodes -Output nodes write data to external destinations: -- Amazon S3, Google Cloud Storage, Azure Blob Storage -- PostgreSQL, MySQL databases -- Qdrant, Pinecone, Milvus, pgvector (vector databases) - -### Switch Nodes -Route data based on conditions: -- Content type (image, document, text, etc.) -- File size thresholds -- Metadata presence/values -- File name patterns diff --git a/crates/nvisy-runtime/src/definition/edge.rs b/crates/nvisy-runtime/src/definition/edge.rs index 10a7f99..aaca631 100644 --- a/crates/nvisy-runtime/src/definition/edge.rs +++ b/crates/nvisy-runtime/src/definition/edge.rs @@ -56,38 +56,3 @@ impl Edge { EdgeBuilder::default() } } - -/// Edge data stored in the compiled graph. -#[derive( - Debug, - Clone, - PartialEq, - Eq, - Hash, - Default, - Serialize, - Deserialize, - Builder -)] -#[builder( - name = "EdgeDataBuilder", - pattern = "owned", - setter(into, strip_option, prefix = "with") -)] -pub struct EdgeData { - /// Optional port/slot name on the source node. - #[serde(skip_serializing_if = "Option::is_none")] - #[builder(default)] - pub from_port: Option, - /// Optional port/slot name on the target node. - #[serde(skip_serializing_if = "Option::is_none")] - #[builder(default)] - pub to_port: Option, -} - -impl EdgeData { - /// Returns a builder for creating edge data. - pub fn builder() -> EdgeDataBuilder { - EdgeDataBuilder::default() - } -} diff --git a/crates/nvisy-runtime/src/definition/input.rs b/crates/nvisy-runtime/src/definition/input.rs index 3f23adb..121352e 100644 --- a/crates/nvisy-runtime/src/definition/input.rs +++ b/crates/nvisy-runtime/src/definition/input.rs @@ -12,48 +12,35 @@ pub struct InputProvider { pub provider: InputProviderParams, } -/// Source of input data for an input node. +/// Input node definition - source of data for the workflow. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum InputSource { +#[serde(tag = "source", rename_all = "snake_case")] +pub enum Input { /// Read from external provider (S3, Postgres, etc.). Provider(InputProvider), /// Read from named cache slot (resolved at compile time). CacheSlot(CacheSlot), } -/// Input node definition. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct InputDef { - /// Source of input data. - pub source: InputSource, -} - -impl InputDef { - /// Creates a new input definition from a provider. +impl Input { + /// Creates a new input from a provider. pub fn from_provider(provider: InputProviderParams) -> Self { - Self { - source: InputSource::Provider(InputProvider { provider }), - } + Self::Provider(InputProvider { provider }) } - /// Creates a new input definition from a cache slot. + /// Creates a new input from a cache slot. pub fn from_cache(slot: impl Into) -> Self { - Self { - source: InputSource::CacheSlot(CacheSlot { - slot: slot.into(), - priority: None, - }), - } + Self::CacheSlot(CacheSlot { + slot: slot.into(), + priority: None, + }) } - /// Creates a new input definition from a cache slot with priority. + /// Creates a new input from a cache slot with priority. pub fn from_cache_with_priority(slot: impl Into, priority: u32) -> Self { - Self { - source: InputSource::CacheSlot(CacheSlot { - slot: slot.into(), - priority: Some(priority), - }), - } + Self::CacheSlot(CacheSlot { + slot: slot.into(), + priority: Some(priority), + }) } } diff --git a/crates/nvisy-runtime/src/definition/mod.rs b/crates/nvisy-runtime/src/definition/mod.rs index f7f6497..2a2d503 100644 --- a/crates/nvisy-runtime/src/definition/mod.rs +++ b/crates/nvisy-runtime/src/definition/mod.rs @@ -7,7 +7,11 @@ //! - Storage in databases //! //! To execute a workflow, definitions must be compiled into runtime types -//! using the [`crate::graph::compiler`] module. +//! using the [`crate::engine::compiler`] module. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; mod edge; mod input; @@ -16,21 +20,199 @@ mod node; mod output; mod route; mod transform; -mod workflow; +mod util; -pub use edge::{Edge, EdgeData}; -pub use input::{InputDef, InputProvider, InputSource}; +pub use edge::Edge; +pub use input::{Input, InputProvider}; pub use metadata::WorkflowMetadata; -pub use node::{Node, NodeCommon, NodeDef, NodeId, Position}; -pub use output::{OutputDef, OutputProvider, OutputTarget}; +pub use node::{Node, NodeId, NodeKind}; +pub use output::{Output, OutputProvider}; pub use route::{ - CacheSlot, ContentTypeCategory, ContentTypeCondition, DateField, DurationCondition, - FileDateCondition, FileExtensionCondition, FileNameCondition, FileSizeCondition, - LanguageCondition, PageCountCondition, PatternMatchType, SwitchCondition, SwitchDef, + CacheSlot, FileCategory, FileCategoryCondition, LanguageCondition, SwitchCondition, SwitchDef, }; pub use transform::{ AnalyzeTask, Chunk, ChunkStrategy, ConvertTask, Derive, DeriveTask, Embedding, Enrich, EnrichTask, Extract, ExtractTask, ImageEnrichTask, Partition, PartitionStrategy, TableConvertTask, TableEnrichTask, TextConvertTask, Transformer, }; -pub use workflow::{ValidationError, WorkflowDefinition}; +pub use util::Position; + +/// Serializable workflow definition. +/// +/// This is the JSON-friendly representation of a workflow graph. +/// It contains all the information needed to compile and execute a workflow. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Workflow { + /// Nodes in the workflow, keyed by their ID. + pub nodes: HashMap, + /// Edges connecting nodes. + pub edges: Vec, + /// Workflow metadata. + #[serde(default)] + pub metadata: WorkflowMetadata, +} + +impl Workflow { + /// Creates a new empty workflow definition. + pub fn new() -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + metadata: WorkflowMetadata::default(), + } + } + + /// Creates a workflow definition with metadata. + pub fn with_metadata(metadata: WorkflowMetadata) -> Self { + Self { + nodes: HashMap::new(), + edges: Vec::new(), + metadata, + } + } + + /// Adds a node to the workflow. + pub fn add_node(&mut self, id: NodeId, node: Node) -> &mut Self { + self.nodes.insert(id, node); + self + } + + /// Adds a node definition with default metadata. + pub fn add_node_def(&mut self, id: NodeId, def: NodeKind) -> &mut Self { + self.nodes.insert(id, Node::new(def)); + self + } + + /// Adds an edge to the workflow. + pub fn add_edge(&mut self, edge: Edge) -> &mut Self { + self.edges.push(edge); + self + } + + /// Adds a simple edge between two nodes. + pub fn connect(&mut self, from: NodeId, to: NodeId) -> &mut Self { + self.edges.push(Edge::new(from, to)); + self + } + + /// Returns an iterator over input nodes. + pub fn input_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.is_input()) + } + + /// Returns an iterator over output nodes. + pub fn output_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.is_output()) + } + + /// Returns an iterator over transform nodes. + pub fn transform_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.is_transform()) + } + + /// Returns an iterator over switch nodes. + pub fn switch_nodes(&self) -> impl Iterator { + self.nodes.iter().filter(|(_, node)| node.is_switch()) + } +} + +impl Default for Workflow { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use uuid::Uuid; + + use super::*; + + /// Creates a deterministic NodeId for testing. + fn test_node_id(n: u128) -> NodeId { + NodeId::from_uuid(Uuid::from_u128(n)) + } + + fn input_node_cache(slot: &str) -> Node { + Node::new(NodeKind::Input(Input::CacheSlot(CacheSlot { + slot: slot.to_string(), + priority: None, + }))) + } + + fn output_node_cache(slot: &str) -> Node { + Node::new(NodeKind::Output(Output::Cache(CacheSlot { + slot: slot.to_string(), + priority: None, + }))) + } + + fn transform_node_partition() -> Node { + Node::new(NodeKind::Transform(Transformer::Partition(Partition { + strategy: Default::default(), + include_page_breaks: false, + discard_unsupported: false, + }))) + } + + #[test] + fn test_workflow_definition_new() { + let def = Workflow::new(); + assert!(def.nodes.is_empty()); + assert!(def.edges.is_empty()); + } + + #[test] + fn test_workflow_definition_add_node() { + let mut def = Workflow::new(); + let id = test_node_id(1); + def.add_node(id, input_node_cache("test")); + assert_eq!(def.nodes.len(), 1); + assert!(def.nodes.contains_key(&id)); + } + + #[test] + fn test_workflow_definition_connect() { + let mut def = Workflow::new(); + let id1 = test_node_id(1); + let id2 = test_node_id(2); + def.add_node(id1, input_node_cache("in")) + .add_node(id2, output_node_cache("out")) + .connect(id1, id2); + + assert_eq!(def.edges.len(), 1); + assert_eq!(def.edges[0].from, id1); + assert_eq!(def.edges[0].to, id2); + } + + #[test] + fn test_workflow_definition_node_iterators() { + let mut def = Workflow::new(); + def.add_node(test_node_id(1), input_node_cache("in")) + .add_node(test_node_id(2), transform_node_partition()) + .add_node(test_node_id(3), output_node_cache("out")); + + assert_eq!(def.input_nodes().count(), 1); + assert_eq!(def.transform_nodes().count(), 1); + assert_eq!(def.output_nodes().count(), 1); + } + + #[test] + fn test_workflow_definition_serialization() { + let mut def = Workflow::new(); + let id1 = test_node_id(1); + let id2 = test_node_id(2); + def.add_node(id1, input_node_cache("in")) + .add_node(id2, output_node_cache("out")) + .connect(id1, id2); + + // Serialize to JSON + let json = serde_json::to_string(&def).expect("serialization failed"); + + // Deserialize back + let deserialized: Workflow = serde_json::from_str(&json).expect("deserialization failed"); + + assert_eq!(def.nodes.len(), deserialized.nodes.len()); + assert_eq!(def.edges.len(), deserialized.edges.len()); + } +} diff --git a/crates/nvisy-runtime/src/definition/node.rs b/crates/nvisy-runtime/src/definition/node.rs index d38559d..a2e86a7 100644 --- a/crates/nvisy-runtime/src/definition/node.rs +++ b/crates/nvisy-runtime/src/definition/node.rs @@ -7,10 +7,11 @@ use derive_more::{Debug, Display, From, Into}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::input::InputDef; -use super::output::OutputDef; +use super::input::Input; +use super::output::Output; use super::route::SwitchDef; use super::transform::Transformer; +use super::util::Position; /// Unique identifier for a node in a workflow graph. #[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] @@ -66,34 +67,20 @@ impl AsRef for NodeId { } } -/// Position of a node in the visual editor. -#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)] -pub struct Position { - /// X coordinate. - pub x: f32, - /// Y coordinate. - pub y: f32, -} - -impl Position { - /// Creates a new position. - pub fn new(x: f32, y: f32) -> Self { - Self { x, y } - } -} - -/// A generic node wrapper that adds optional name and description to any inner type. +/// A workflow node definition with metadata and kind. +/// +/// Nodes are categorized by their role in data flow: +/// - **Input**: Reads/produces data (entry points) +/// - **Transform**: Processes/transforms data (intermediate) +/// - **Output**: Writes/consumes data (exit points) +/// - **Switch**: Routes data based on conditions #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Builder)] #[builder( - name = "NodeCommonBuilder", + name = "NodeBuilder", pattern = "owned", - setter(into, strip_option, prefix = "with"), - build_fn(validate = "Self::validate") + setter(into, strip_option, prefix = "with") )] -pub struct NodeCommon -where - T: Clone, -{ +pub struct Node { /// Display name of the node. #[serde(skip_serializing_if = "Option::is_none")] #[builder(default)] @@ -103,85 +90,83 @@ where #[builder(default)] pub description: Option, /// Position in the visual editor. - #[serde(default, skip_serializing_if = "is_default_position")] + #[serde(skip_serializing_if = "Option::is_none")] #[builder(default)] - pub position: Position, - /// Inner node configuration. + pub position: Option, + /// The node kind/type. #[serde(flatten)] - pub inner: T, -} - -fn is_default_position(pos: &Position) -> bool { - pos.x == 0.0 && pos.y == 0.0 -} - -impl NodeCommonBuilder { - fn validate(&self) -> Result<(), String> { - if self.inner.is_none() { - return Err("inner is required".into()); - } - Ok(()) - } + pub kind: NodeKind, } -impl NodeCommon { - /// Creates a new node with the given inner value. - pub fn new(inner: T) -> Self { +impl Node { + /// Creates a new node with the given kind. + pub fn new(kind: impl Into) -> Self { Self { name: None, description: None, - position: Position::default(), - inner, + position: None, + kind: kind.into(), } } /// Returns a builder for creating a node. - pub fn builder() -> NodeCommonBuilder { - NodeCommonBuilder::default() + pub fn builder() -> NodeBuilder { + NodeBuilder::default() } -} -/// A workflow node definition with common metadata. -pub type Node = NodeCommon; + /// Returns whether this is an input node. + pub const fn is_input(&self) -> bool { + self.kind.is_input() + } -/// Node definition enum for workflow graphs. -/// -/// Nodes are categorized by their role in data flow: -/// - **Input**: Reads/produces data (entry points) -/// - **Transform**: Processes/transforms data (intermediate) -/// - **Output**: Writes/consumes data (exit points) -/// - **Switch**: Routes data based on conditions + /// Returns whether this is a transform node. + pub const fn is_transform(&self) -> bool { + self.kind.is_transform() + } + + /// Returns whether this is an output node. + pub const fn is_output(&self) -> bool { + self.kind.is_output() + } + + /// Returns whether this is a switch node. + pub const fn is_switch(&self) -> bool { + self.kind.is_switch() + } +} + +/// Node kind enum for workflow graphs. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, From)] #[serde(tag = "type", rename_all = "snake_case")] -pub enum NodeDef { +pub enum NodeKind { /// Data input node, reads or produces data. - Input(InputDef), + Input(Input), /// Data transformer node, processes or transforms data. Transform(Transformer), /// Data output node, writes or consumes data. - Output(OutputDef), + Output(Output), /// Conditional routing node. Switch(SwitchDef), } -impl NodeDef { +impl NodeKind { /// Returns whether this is an input node. pub const fn is_input(&self) -> bool { - matches!(self, NodeDef::Input(_)) + matches!(self, NodeKind::Input(_)) } /// Returns whether this is a transform node. pub const fn is_transform(&self) -> bool { - matches!(self, NodeDef::Transform(_)) + matches!(self, NodeKind::Transform(_)) } /// Returns whether this is an output node. pub const fn is_output(&self) -> bool { - matches!(self, NodeDef::Output(_)) + matches!(self, NodeKind::Output(_)) } /// Returns whether this is a switch node. pub const fn is_switch(&self) -> bool { - matches!(self, NodeDef::Switch(_)) + matches!(self, NodeKind::Switch(_)) } } diff --git a/crates/nvisy-runtime/src/definition/output.rs b/crates/nvisy-runtime/src/definition/output.rs index 53b2df4..a41c38a 100644 --- a/crates/nvisy-runtime/src/definition/output.rs +++ b/crates/nvisy-runtime/src/definition/output.rs @@ -12,48 +12,35 @@ pub struct OutputProvider { pub provider: OutputProviderParams, } -/// Target destination for an output node. +/// Output node definition - destination for workflow data. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum OutputTarget { +#[serde(tag = "target", rename_all = "snake_case")] +pub enum Output { /// Write to external provider (S3, Qdrant, etc.). Provider(OutputProvider), /// Write to named cache slot (resolved at compile time). Cache(CacheSlot), } -/// Output node definition. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OutputDef { - /// Target destination for output data. - pub target: OutputTarget, -} - -impl OutputDef { - /// Creates a new output definition from a provider. +impl Output { + /// Creates a new output from a provider. pub fn from_provider(provider: OutputProviderParams) -> Self { - Self { - target: OutputTarget::Provider(OutputProvider { provider }), - } + Self::Provider(OutputProvider { provider }) } - /// Creates a new output definition from a cache slot. + /// Creates a new output from a cache slot. pub fn from_cache(slot: impl Into) -> Self { - Self { - target: OutputTarget::Cache(CacheSlot { - slot: slot.into(), - priority: None, - }), - } + Self::Cache(CacheSlot { + slot: slot.into(), + priority: None, + }) } - /// Creates a new output definition from a cache slot with priority. + /// Creates a new output from a cache slot with priority. pub fn from_cache_with_priority(slot: impl Into, priority: u32) -> Self { - Self { - target: OutputTarget::Cache(CacheSlot { - slot: slot.into(), - priority: Some(priority), - }), - } + Self::Cache(CacheSlot { + slot: slot.into(), + priority: Some(priority), + }) } } diff --git a/crates/nvisy-runtime/src/definition/route.rs b/crates/nvisy-runtime/src/definition/route.rs deleted file mode 100644 index f7e2215..0000000 --- a/crates/nvisy-runtime/src/definition/route.rs +++ /dev/null @@ -1,260 +0,0 @@ -//! Routing types for conditional data flow. -//! -//! This module provides types for controlling data flow in workflows: -//! - [`CacheSlot`]: Named connection point for linking workflow branches -//! - [`SwitchDef`]: Conditional routing based on data properties -//! -//! Switch conditions follow the same pattern as transforms - each condition -//! type is a separate struct, and `SwitchCondition` is an enum combining them. - -use jiff::Timestamp; -use serde::{Deserialize, Serialize}; - -/// A cache slot reference for in-memory data passing. -/// -/// Cache slots act as named connection points that link different parts -/// of a workflow graph. During compilation, cache slots are resolved by -/// connecting incoming edges directly to outgoing edges with matching slot names. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct CacheSlot { - /// Slot identifier (used as the key for matching inputs to outputs). - pub slot: String, - /// Priority for ordering when multiple slots are available. - #[serde(skip_serializing_if = "Option::is_none")] - pub priority: Option, -} - -impl CacheSlot { - /// Creates a new cache slot with the given slot name. - pub fn new(slot: impl Into) -> Self { - Self { - slot: slot.into(), - priority: None, - } - } - - /// Sets the priority. - pub fn with_priority(mut self, priority: u32) -> Self { - self.priority = Some(priority); - self - } -} - -/// A switch node definition that routes data to different output ports based on conditions. -/// -/// Switch nodes evaluate a condition against incoming data and route it -/// to the appropriate output port. Edges then connect each port to downstream nodes. -/// -/// Each switch has exactly one condition type, similar to how transforms work. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SwitchDef { - /// The condition to evaluate. - pub condition: SwitchCondition, - /// Output port for data matching the condition. - pub match_port: String, - /// Output port for data not matching the condition. - pub else_port: String, -} - -impl SwitchDef { - /// Returns all output port names defined by this switch. - pub fn output_ports(&self) -> impl Iterator { - [self.match_port.as_str(), self.else_port.as_str()].into_iter() - } -} - -/// Switch condition enum - each variant is a distinct condition type. -/// -/// Similar to `Transformer`, each condition is a separate struct with its -/// own configuration, wrapped in this enum. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub enum SwitchCondition { - /// Match by content type category. - ContentType(ContentTypeCondition), - /// Match by file extension. - FileExtension(FileExtensionCondition), - /// Match when file size is within range. - FileSize(FileSizeCondition), - /// Match when page count is within range. - PageCount(PageCountCondition), - /// Match when duration is within range (for audio/video). - Duration(DurationCondition), - /// Match by detected content language. - Language(LanguageCondition), - /// Match when file date is within range. - FileDate(FileDateCondition), - /// Match by filename pattern. - FileName(FileNameCondition), -} - -/// Condition that matches by content type category. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct ContentTypeCondition { - /// Content type category to match. - pub category: ContentTypeCategory, -} - -/// Condition that matches by file extension. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct FileExtensionCondition { - /// Extensions to match (without dot, e.g., "pdf", "docx"). - pub extensions: Vec, -} - -/// Condition that matches when file size is within range. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct FileSizeCondition { - /// Minimum size in bytes (inclusive). - #[serde(skip_serializing_if = "Option::is_none")] - pub min_bytes: Option, - /// Maximum size in bytes (inclusive). - #[serde(skip_serializing_if = "Option::is_none")] - pub max_bytes: Option, -} - -/// Condition that matches when page count is within range. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct PageCountCondition { - /// Minimum page count (inclusive). - #[serde(skip_serializing_if = "Option::is_none")] - pub min_pages: Option, - /// Maximum page count (inclusive). - #[serde(skip_serializing_if = "Option::is_none")] - pub max_pages: Option, -} - -/// Condition that matches when duration is within range (for audio/video). -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct DurationCondition { - /// Minimum duration in seconds (inclusive). - #[serde(skip_serializing_if = "Option::is_none")] - pub min_seconds: Option, - /// Maximum duration in seconds (inclusive). - #[serde(skip_serializing_if = "Option::is_none")] - pub max_seconds: Option, -} - -/// Condition that matches by detected content language. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct LanguageCondition { - /// Language code to match (e.g., "en", "es", "fr"). - pub code: String, - /// Minimum confidence threshold (0.0 to 1.0). - #[serde(default = "default_confidence")] - pub min_confidence: f32, -} - -/// Condition that matches when file date is within range. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct FileDateCondition { - /// Which date field to check. - #[serde(default)] - pub field: DateField, - /// Earliest date (inclusive). - #[serde(skip_serializing_if = "Option::is_none")] - pub after: Option, - /// Latest date (inclusive). - #[serde(skip_serializing_if = "Option::is_none")] - pub before: Option, -} - -/// Condition that matches by filename pattern. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct FileNameCondition { - /// Pattern to match against filename. - pub pattern: String, - /// Pattern type. - #[serde(default)] - pub match_type: PatternMatchType, -} - -/// Content type categories for routing. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum ContentTypeCategory { - /// Text files (plain text, markdown, etc.). - Text, - /// Image files (JPEG, PNG, GIF, etc.). - Image, - /// Audio files (MP3, WAV, FLAC, etc.). - Audio, - /// Video files (MP4, WebM, etc.). - Video, - /// Document files (PDF, DOCX, etc.). - Document, - /// Archive files (ZIP, TAR, etc.). - Archive, - /// Spreadsheet files (XLSX, CSV, etc.). - Spreadsheet, - /// Presentation files (PPTX, etc.). - Presentation, - /// Code/source files. - Code, - /// Other/unknown content type. - Other, -} - -/// Date field to use for date-based routing. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum DateField { - /// File creation date. - #[default] - Created, - /// File modification date. - Modified, -} - -/// Pattern matching type for filename conditions. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum PatternMatchType { - /// Glob pattern (e.g., "*.pdf", "report_*"). - #[default] - Glob, - /// Regular expression pattern. - Regex, - /// Exact string match. - Exact, - /// Case-insensitive contains. - Contains, -} - -fn default_confidence() -> f32 { - 0.8 -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_switch_def_output_ports() { - let switch = SwitchDef { - condition: SwitchCondition::ContentType(ContentTypeCondition { - category: ContentTypeCategory::Image, - }), - match_port: "images".into(), - else_port: "other".into(), - }; - - let ports: Vec<_> = switch.output_ports().collect(); - assert_eq!(ports, vec!["images", "other"]); - } - - #[test] - fn test_serialization() { - let switch = SwitchDef { - condition: SwitchCondition::FileExtension(FileExtensionCondition { - extensions: vec!["pdf".into(), "docx".into()], - }), - match_port: "documents".into(), - else_port: "other".into(), - }; - - let json = serde_json::to_string_pretty(&switch).unwrap(); - let deserialized: SwitchDef = serde_json::from_str(&json).unwrap(); - assert_eq!(switch, deserialized); - } -} diff --git a/crates/nvisy-runtime/src/definition/route/cache.rs b/crates/nvisy-runtime/src/definition/route/cache.rs new file mode 100644 index 0000000..349127e --- /dev/null +++ b/crates/nvisy-runtime/src/definition/route/cache.rs @@ -0,0 +1,33 @@ +//! Cache slot types for in-memory data passing. + +use serde::{Deserialize, Serialize}; + +/// A cache slot reference for in-memory data passing. +/// +/// Cache slots act as named connection points that link different parts +/// of a workflow graph. During compilation, cache slots are resolved by +/// connecting incoming edges directly to outgoing edges with matching slot names. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CacheSlot { + /// Slot identifier (used as the key for matching inputs to outputs). + pub slot: String, + /// Priority for ordering when multiple slots are available. + #[serde(skip_serializing_if = "Option::is_none")] + pub priority: Option, +} + +impl CacheSlot { + /// Creates a new cache slot with the given slot name. + pub fn new(slot: impl Into) -> Self { + Self { + slot: slot.into(), + priority: None, + } + } + + /// Sets the priority. + pub fn with_priority(mut self, priority: u32) -> Self { + self.priority = Some(priority); + self + } +} diff --git a/crates/nvisy-runtime/src/definition/route/mod.rs b/crates/nvisy-runtime/src/definition/route/mod.rs new file mode 100644 index 0000000..109ba9a --- /dev/null +++ b/crates/nvisy-runtime/src/definition/route/mod.rs @@ -0,0 +1,13 @@ +//! Routing types for conditional data flow. +//! +//! This module provides types for controlling data flow in workflows: +//! - [`CacheSlot`]: Named connection point for linking workflow branches +//! - [`SwitchDef`]: Conditional routing based on data properties + +mod cache; +mod switch; + +pub use cache::CacheSlot; +pub use switch::{ + FileCategory, FileCategoryCondition, LanguageCondition, SwitchCondition, SwitchDef, +}; diff --git a/crates/nvisy-runtime/src/definition/route/switch.rs b/crates/nvisy-runtime/src/definition/route/switch.rs new file mode 100644 index 0000000..d88d898 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/route/switch.rs @@ -0,0 +1,103 @@ +//! Switch types for conditional data routing. + +use serde::{Deserialize, Serialize}; + +/// A switch node definition that routes data based on a condition. +/// +/// Switch nodes evaluate a condition against incoming data and route it +/// to either the `true` or `false` output branch. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SwitchDef { + /// The condition to evaluate. + pub condition: SwitchCondition, +} + +impl SwitchDef { + /// Creates a new switch definition. + pub fn new(condition: SwitchCondition) -> Self { + Self { condition } + } +} + +/// Switch condition enum - each variant is a distinct condition type. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum SwitchCondition { + /// Match by file category (based on extension). + FileCategory(FileCategoryCondition), + /// Match by detected content language. + Language(LanguageCondition), +} + +/// Condition that matches by file category based on extension. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct FileCategoryCondition { + /// File category to match. + pub category: FileCategory, +} + +/// Condition that matches by detected content language. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct LanguageCondition { + /// Language codes to match (e.g., "en", "es", "fr"). + pub codes: Vec, + /// Minimum confidence threshold (0.0 to 1.0). + #[serde(default = "default_confidence")] + pub min_confidence: f32, +} + +fn default_confidence() -> f32 { + 0.8 +} + +/// File categories for routing based on extension. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum FileCategory { + /// Text files (.txt, .md, etc.). + Text, + /// Image files (.jpg, .png, .gif, etc.). + Image, + /// Audio files (.mp3, .wav, .flac, etc.). + Audio, + /// Video files (.mp4, .webm, etc.). + Video, + /// Document files (.pdf, .docx, etc.). + Document, + /// Archive files (.zip, .tar, etc.). + Archive, + /// Spreadsheet files (.xlsx, .csv, etc.). + Spreadsheet, + /// Presentation files (.pptx, etc.). + Presentation, + /// Code/source files. + Code, + /// Other/unknown file type. + Other, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_switch_def_new() { + let switch = SwitchDef::new(SwitchCondition::FileCategory(FileCategoryCondition { + category: FileCategory::Image, + })); + + assert!(matches!(switch.condition, SwitchCondition::FileCategory(_))); + } + + #[test] + fn test_serialization() { + let switch = SwitchDef::new(SwitchCondition::Language(LanguageCondition { + codes: vec!["en".into(), "es".into()], + min_confidence: 0.9, + })); + + let json = serde_json::to_string_pretty(&switch).unwrap(); + let deserialized: SwitchDef = serde_json::from_str(&json).unwrap(); + assert_eq!(switch, deserialized); + } +} diff --git a/crates/nvisy-runtime/src/definition/util/mod.rs b/crates/nvisy-runtime/src/definition/util/mod.rs new file mode 100644 index 0000000..a1752a6 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/util/mod.rs @@ -0,0 +1,5 @@ +//! Utility types for workflow definitions. + +mod position; + +pub use position::Position; diff --git a/crates/nvisy-runtime/src/definition/util/position.rs b/crates/nvisy-runtime/src/definition/util/position.rs new file mode 100644 index 0000000..841b0d3 --- /dev/null +++ b/crates/nvisy-runtime/src/definition/util/position.rs @@ -0,0 +1,19 @@ +//! Position type for visual editor node placement. + +use serde::{Deserialize, Serialize}; + +/// Position of a node in the visual editor. +#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)] +pub struct Position { + /// X coordinate. + pub x: f32, + /// Y coordinate. + pub y: f32, +} + +impl Position { + /// Creates a new position. + pub fn new(x: f32, y: f32) -> Self { + Self { x, y } + } +} diff --git a/crates/nvisy-runtime/src/definition/workflow.rs b/crates/nvisy-runtime/src/definition/workflow.rs deleted file mode 100644 index ba07e20..0000000 --- a/crates/nvisy-runtime/src/definition/workflow.rs +++ /dev/null @@ -1,294 +0,0 @@ -//! Serializable workflow definition. - -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; - -use super::edge::Edge; -use super::metadata::WorkflowMetadata; -use super::node::{Node, NodeDef, NodeId}; - -/// Serializable workflow definition. -/// -/// This is the JSON-friendly representation of a workflow graph. -/// It contains all the information needed to compile and execute a workflow. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct WorkflowDefinition { - /// Nodes in the workflow, keyed by their ID. - pub nodes: HashMap, - /// Edges connecting nodes. - pub edges: Vec, - /// Workflow metadata. - #[serde(default)] - pub metadata: WorkflowMetadata, -} - -impl WorkflowDefinition { - /// Creates a new empty workflow definition. - pub fn new() -> Self { - Self { - nodes: HashMap::new(), - edges: Vec::new(), - metadata: WorkflowMetadata::default(), - } - } - - /// Creates a workflow definition with metadata. - pub fn with_metadata(metadata: WorkflowMetadata) -> Self { - Self { - nodes: HashMap::new(), - edges: Vec::new(), - metadata, - } - } - - /// Adds a node to the workflow. - pub fn add_node(&mut self, id: NodeId, node: Node) -> &mut Self { - self.nodes.insert(id, node); - self - } - - /// Adds a node definition with default metadata. - pub fn add_node_def(&mut self, id: NodeId, def: NodeDef) -> &mut Self { - self.nodes.insert(id, Node::new(def)); - self - } - - /// Adds an edge to the workflow. - pub fn add_edge(&mut self, edge: Edge) -> &mut Self { - self.edges.push(edge); - self - } - - /// Adds a simple edge between two nodes. - pub fn connect(&mut self, from: NodeId, to: NodeId) -> &mut Self { - self.edges.push(Edge::new(from, to)); - self - } - - /// Returns an iterator over input nodes. - pub fn input_nodes(&self) -> impl Iterator { - self.nodes.iter().filter(|(_, node)| node.inner.is_input()) - } - - /// Returns an iterator over output nodes. - pub fn output_nodes(&self) -> impl Iterator { - self.nodes.iter().filter(|(_, node)| node.inner.is_output()) - } - - /// Returns an iterator over transform nodes. - pub fn transform_nodes(&self) -> impl Iterator { - self.nodes - .iter() - .filter(|(_, node)| node.inner.is_transform()) - } - - /// Returns an iterator over switch nodes. - pub fn switch_nodes(&self) -> impl Iterator { - self.nodes.iter().filter(|(_, node)| node.inner.is_switch()) - } - - /// Validates the workflow definition structure. - /// - /// Checks that: - /// - All edge endpoints reference existing nodes - /// - There are no orphan nodes (nodes with no connections) - /// - The graph has at least one input and one output node - pub fn validate(&self) -> Result<(), ValidationError> { - // Check edge references - for edge in &self.edges { - if !self.nodes.contains_key(&edge.from) { - return Err(ValidationError::MissingNode(edge.from)); - } - if !self.nodes.contains_key(&edge.to) { - return Err(ValidationError::MissingNode(edge.to)); - } - } - - // Check for at least one input and output - let has_input = self.nodes.values().any(|n| n.inner.is_input()); - let has_output = self.nodes.values().any(|n| n.inner.is_output()); - - if !has_input { - return Err(ValidationError::NoInputNode); - } - if !has_output { - return Err(ValidationError::NoOutputNode); - } - - Ok(()) - } -} - -impl Default for WorkflowDefinition { - fn default() -> Self { - Self::new() - } -} - -/// Validation errors for workflow definitions. -#[derive(Debug, Clone, thiserror::Error)] -pub enum ValidationError { - /// An edge references a non-existent node. - #[error("edge references non-existent node: {0}")] - MissingNode(NodeId), - /// The workflow has no input nodes. - #[error("workflow must have at least one input node")] - NoInputNode, - /// The workflow has no output nodes. - #[error("workflow must have at least one output node")] - NoOutputNode, - /// The workflow contains a cycle. - #[error("workflow contains a cycle")] - CycleDetected, -} - -#[cfg(test)] -mod tests { - use uuid::Uuid; - - use super::*; - use crate::definition::{ - CacheSlot, InputDef, InputSource, OutputDef, OutputTarget, Partition, Transformer, - }; - - /// Creates a deterministic NodeId for testing. - fn test_node_id(n: u128) -> NodeId { - NodeId::from_uuid(Uuid::from_u128(n)) - } - - fn input_node_cache(slot: &str) -> Node { - Node::new(NodeDef::Input(InputDef { - source: InputSource::CacheSlot(CacheSlot { - slot: slot.to_string(), - priority: None, - }), - })) - } - - fn output_node_cache(slot: &str) -> Node { - Node::new(NodeDef::Output(OutputDef { - target: OutputTarget::Cache(CacheSlot { - slot: slot.to_string(), - priority: None, - }), - })) - } - - fn transform_node_partition() -> Node { - Node::new(NodeDef::Transform(Transformer::Partition(Partition { - strategy: Default::default(), - include_page_breaks: false, - discard_unsupported: false, - }))) - } - - #[test] - fn test_workflow_definition_new() { - let def = WorkflowDefinition::new(); - assert!(def.nodes.is_empty()); - assert!(def.edges.is_empty()); - } - - #[test] - fn test_workflow_definition_add_node() { - let mut def = WorkflowDefinition::new(); - let id = test_node_id(1); - def.add_node(id, input_node_cache("test")); - assert_eq!(def.nodes.len(), 1); - assert!(def.nodes.contains_key(&id)); - } - - #[test] - fn test_workflow_definition_connect() { - let mut def = WorkflowDefinition::new(); - let id1 = test_node_id(1); - let id2 = test_node_id(2); - def.add_node(id1, input_node_cache("in")) - .add_node(id2, output_node_cache("out")) - .connect(id1, id2); - - assert_eq!(def.edges.len(), 1); - assert_eq!(def.edges[0].from, id1); - assert_eq!(def.edges[0].to, id2); - } - - #[test] - fn test_workflow_definition_validate_valid() { - let mut def = WorkflowDefinition::new(); - let id1 = test_node_id(1); - let id2 = test_node_id(2); - def.add_node(id1, input_node_cache("in")) - .add_node(id2, output_node_cache("out")) - .connect(id1, id2); - - assert!(def.validate().is_ok()); - } - - #[test] - fn test_workflow_definition_validate_missing_node() { - let mut def = WorkflowDefinition::new(); - let id1 = test_node_id(1); - let id2 = test_node_id(2); - let id_invalid = test_node_id(99); - def.add_node(id1, input_node_cache("in")) - .add_node(id2, output_node_cache("out")) - .connect(id1, id_invalid); // Invalid reference - - let result = def.validate(); - assert!(result.is_err()); - assert!(matches!(result, Err(ValidationError::MissingNode(_)))); - } - - #[test] - fn test_workflow_definition_validate_no_input() { - let mut def = WorkflowDefinition::new(); - def.add_node(test_node_id(1), output_node_cache("out")); - - let result = def.validate(); - assert!(result.is_err()); - assert!(matches!(result, Err(ValidationError::NoInputNode))); - } - - #[test] - fn test_workflow_definition_validate_no_output() { - let mut def = WorkflowDefinition::new(); - def.add_node(test_node_id(1), input_node_cache("in")); - - let result = def.validate(); - assert!(result.is_err()); - assert!(matches!(result, Err(ValidationError::NoOutputNode))); - } - - #[test] - fn test_workflow_definition_node_iterators() { - let mut def = WorkflowDefinition::new(); - def.add_node(test_node_id(1), input_node_cache("in")) - .add_node(test_node_id(2), transform_node_partition()) - .add_node(test_node_id(3), output_node_cache("out")); - - assert_eq!(def.input_nodes().count(), 1); - assert_eq!(def.transform_nodes().count(), 1); - assert_eq!(def.output_nodes().count(), 1); - } - - #[test] - fn test_workflow_definition_serialization() { - let mut def = WorkflowDefinition::new(); - let id1 = test_node_id(1); - let id2 = test_node_id(2); - def.add_node(id1, input_node_cache("in")) - .add_node(id2, output_node_cache("out")) - .connect(id1, id2); - - // Serialize to JSON - let json = serde_json::to_string(&def).expect("serialization failed"); - - // Deserialize back - let deserialized: WorkflowDefinition = - serde_json::from_str(&json).expect("deserialization failed"); - - assert_eq!(def.nodes.len(), deserialized.nodes.len()); - assert_eq!(def.edges.len(), deserialized.edges.len()); - } -} diff --git a/crates/nvisy-runtime/src/engine/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs index be1eafa..a85f669 100644 --- a/crates/nvisy-runtime/src/engine/compiler.rs +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -12,17 +12,17 @@ use std::collections::HashMap; -use nvisy_dal::core::Context; +use super::context::Context; use nvisy_rig::agent::Agents; use nvisy_rig::provider::CompletionProvider; use petgraph::graph::{DiGraph, NodeIndex}; -use crate::definition::{EdgeData, InputSource, NodeDef, NodeId, OutputTarget, WorkflowDefinition}; +use crate::definition::{Input, NodeId, NodeKind, Output, Workflow}; use crate::error::{Error, Result}; use crate::graph::{ ChunkProcessor, CompiledGraph, CompiledInput, CompiledNode, CompiledOutput, CompiledSwitch, - CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, ExtractProcessor, - InputStream, OutputStream, PartitionProcessor, + CompiledTransform, DeriveProcessor, EdgeData, EmbeddingProcessor, EnrichProcessor, + ExtractProcessor, InputStream, OutputStream, PartitionProcessor, }; use crate::provider::{ CompletionProviderParams, CredentialsRegistry, EmbeddingProviderParams, InputProviderParams, @@ -44,10 +44,9 @@ impl<'a> WorkflowCompiler<'a> { } /// Compiles a workflow definition into an executable graph. - pub async fn compile(&self, def: WorkflowDefinition) -> Result { + pub async fn compile(&self, def: Workflow) -> Result { // Phase 1: Validate definition structure - def.validate() - .map_err(|e| Error::InvalidDefinition(format!("validation failed: {}", e)))?; + self.validate(&def)?; // Phase 2: Resolve cache slots let resolved = self.resolve_cache_slots(&def)?; @@ -56,10 +55,10 @@ impl<'a> WorkflowCompiler<'a> { let mut compiled_nodes = HashMap::new(); for (id, node) in &def.nodes { // Skip cache slot nodes - they're resolved during edge building - if self.is_cache_only_node(&node.inner) { + if self.is_cache_only_node(&node.kind) { continue; } - let compiled = self.compile_node(&node.inner).await?; + let compiled = self.compile_node(&node.kind).await?; compiled_nodes.insert(*id, compiled); } @@ -69,23 +68,57 @@ impl<'a> WorkflowCompiler<'a> { Ok(CompiledGraph::new(graph, node_indices, def.metadata)) } + /// Validates the workflow definition structure. + fn validate(&self, def: &Workflow) -> Result<()> { + // Check edge references + for edge in &def.edges { + if !def.nodes.contains_key(&edge.from) { + return Err(Error::InvalidDefinition(format!( + "edge references non-existent node: {}", + edge.from + ))); + } + if !def.nodes.contains_key(&edge.to) { + return Err(Error::InvalidDefinition(format!( + "edge references non-existent node: {}", + edge.to + ))); + } + } + + // Check for at least one input and output + let has_input = def.nodes.values().any(|n| n.is_input()); + let has_output = def.nodes.values().any(|n| n.is_output()); + + if !has_input { + return Err(Error::InvalidDefinition( + "workflow must have at least one input node".into(), + )); + } + if !has_output { + return Err(Error::InvalidDefinition( + "workflow must have at least one output node".into(), + )); + } + + Ok(()) + } + /// Checks if a node is a cache-only node (input from cache or output to cache). - fn is_cache_only_node(&self, def: &NodeDef) -> bool { + fn is_cache_only_node(&self, def: &NodeKind) -> bool { match def { - NodeDef::Input(input) => matches!(input.source, InputSource::CacheSlot(_)), - NodeDef::Output(output) => matches!(output.target, OutputTarget::Cache(_)), + NodeKind::Input(input) => matches!(input, Input::CacheSlot(_)), + NodeKind::Output(output) => matches!(output, Output::Cache(_)), _ => false, } } /// Resolves cache slots by connecting cache inputs to cache outputs. - fn resolve_cache_slots(&self, def: &WorkflowDefinition) -> Result { + fn resolve_cache_slots(&self, def: &Workflow) -> Result { // Collect cache slot outputs (nodes that write to cache slots) let mut cache_outputs: HashMap> = HashMap::new(); for (id, node) in &def.nodes { - if let NodeDef::Output(output) = &node.inner - && let OutputTarget::Cache(slot) = &output.target - { + if let NodeKind::Output(Output::Cache(slot)) = &node.kind { cache_outputs .entry(slot.slot.clone()) .or_default() @@ -96,9 +129,7 @@ impl<'a> WorkflowCompiler<'a> { // Collect cache slot inputs (nodes that read from cache slots) let mut cache_inputs: HashMap> = HashMap::new(); for (id, node) in &def.nodes { - if let NodeDef::Input(input) = &node.inner - && let InputSource::CacheSlot(slot) = &input.source - { + if let NodeKind::Input(Input::CacheSlot(slot)) = &node.kind { cache_inputs.entry(slot.slot.clone()).or_default().push(*id); } } @@ -112,10 +143,10 @@ impl<'a> WorkflowCompiler<'a> { let to_node = def.nodes.get(&edge.to); let from_is_cache = from_node - .map(|n| self.is_cache_only_node(&n.inner)) + .map(|n| self.is_cache_only_node(&n.kind)) .unwrap_or(false); let to_is_cache = to_node - .map(|n| self.is_cache_only_node(&n.inner)) + .map(|n| self.is_cache_only_node(&n.kind)) .unwrap_or(false); if !from_is_cache && !to_is_cache { @@ -176,39 +207,36 @@ impl<'a> WorkflowCompiler<'a> { } /// Compiles a single node definition into a compiled node. - async fn compile_node(&self, def: &NodeDef) -> Result { + async fn compile_node(&self, def: &NodeKind) -> Result { match def { - NodeDef::Input(input) => { + NodeKind::Input(input) => { let stream = self.create_input_stream(input).await?; Ok(CompiledNode::Input(CompiledInput::new(stream))) } - NodeDef::Output(output) => { + NodeKind::Output(output) => { let stream = self.create_output_stream(output).await?; Ok(CompiledNode::Output(CompiledOutput::new(stream))) } - NodeDef::Transform(transformer) => { + NodeKind::Transform(transformer) => { let processor = self.create_processor(transformer).await?; Ok(CompiledNode::Transform(Box::new(processor))) } - NodeDef::Switch(switch) => { + NodeKind::Switch(switch) => { Ok(CompiledNode::Switch(CompiledSwitch::from(switch.clone()))) } } } /// Creates an input stream from an input definition. - async fn create_input_stream( - &self, - input: &crate::definition::InputDef, - ) -> Result { - match &input.source { - InputSource::Provider(provider_def) => { + async fn create_input_stream(&self, input: &Input) -> Result { + match input { + Input::Provider(provider_def) => { let stream = self .create_provider_input_stream(&provider_def.provider) .await?; Ok(stream) } - InputSource::CacheSlot(_) => { + Input::CacheSlot(_) => { // Cache inputs are resolved during cache slot resolution // This shouldn't be called for cache inputs Err(Error::Internal( @@ -226,7 +254,8 @@ impl<'a> WorkflowCompiler<'a> { let creds = self.registry.get(params.credentials_id())?; let provider = params.clone().into_provider(creds.clone()).await?; - let stream = provider.read_stream(&self.ctx).await?; + let dal_ctx: nvisy_dal::core::Context = self.ctx.clone().into(); + let stream = provider.read_stream(&dal_ctx).await?; // Map the stream to our Result type use futures::StreamExt; @@ -236,18 +265,15 @@ impl<'a> WorkflowCompiler<'a> { } /// Creates an output stream from an output definition. - async fn create_output_stream( - &self, - output: &crate::definition::OutputDef, - ) -> Result { - match &output.target { - OutputTarget::Provider(provider_def) => { + async fn create_output_stream(&self, output: &Output) -> Result { + match output { + Output::Provider(provider_def) => { let stream = self .create_provider_output_stream(&provider_def.provider) .await?; Ok(stream) } - OutputTarget::Cache(_) => { + Output::Cache(_) => { // Cache outputs are resolved during cache slot resolution Err(Error::Internal( "cache output nodes should be resolved before compilation".into(), @@ -264,7 +290,8 @@ impl<'a> WorkflowCompiler<'a> { let creds = self.registry.get(params.credentials_id())?; let provider = params.clone().into_provider(creds.clone()).await?; - let sink = provider.write_sink(&self.ctx).await?; + let dal_ctx: nvisy_dal::core::Context = self.ctx.clone().into(); + let sink = provider.write_sink(&dal_ctx).await?; Ok(OutputStream::new(sink)) } diff --git a/crates/nvisy-runtime/src/engine/context.rs b/crates/nvisy-runtime/src/engine/context.rs index 09fc2c9..28b746a 100644 --- a/crates/nvisy-runtime/src/engine/context.rs +++ b/crates/nvisy-runtime/src/engine/context.rs @@ -1,10 +1,90 @@ -//! Execution context for workflow runs. +//! Context types for workflow execution. use derive_builder::Builder; use nvisy_dal::AnyDataValue; use crate::provider::CredentialsRegistry; +/// Context for provider operations during compilation and execution. +/// +/// Provides configuration for read/write operations including target, +/// pagination cursor, and limits. +#[derive(Debug, Clone, Default)] +pub struct Context { + /// Target collection, table, bucket, topic, etc. + pub target: Option, + /// Cursor for pagination (provider-specific format). + pub cursor: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +impl Context { + /// Creates a new empty context. + pub fn new() -> Self { + Self::default() + } + + /// Sets the target. + pub fn with_target(mut self, target: impl Into) -> Self { + self.target = Some(target.into()); + self + } + + /// Sets the cursor for pagination. + pub fn with_cursor(mut self, cursor: impl Into) -> Self { + self.cursor = Some(cursor.into()); + self + } + + /// Sets the limit. + pub fn with_limit(mut self, limit: usize) -> Self { + self.limit = Some(limit); + self + } + + /// Returns the target, if set. + pub fn target(&self) -> Option<&str> { + self.target.as_deref() + } + + /// Returns the cursor, if set. + pub fn cursor(&self) -> Option<&str> { + self.cursor.as_deref() + } + + /// Returns the limit, if set. + pub fn limit(&self) -> Option { + self.limit + } +} + +impl From for nvisy_dal::core::Context { + fn from(ctx: Context) -> Self { + let mut dal_ctx = nvisy_dal::core::Context::new(); + if let Some(target) = ctx.target { + dal_ctx = dal_ctx.with_target(target); + } + if let Some(cursor) = ctx.cursor { + dal_ctx = dal_ctx.with_cursor(cursor); + } + if let Some(limit) = ctx.limit { + dal_ctx = dal_ctx.with_limit(limit); + } + dal_ctx + } +} + +impl From for Context { + fn from(ctx: nvisy_dal::core::Context) -> Self { + Self { + target: ctx.target, + cursor: ctx.cursor, + limit: ctx.limit, + } + } +} + /// Execution context for a workflow run. /// /// Manages the current data items flowing through the pipeline and holds diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index 9d12a75..a94f877 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -2,14 +2,14 @@ use std::sync::Arc; +use super::context::Context; use futures::{SinkExt, StreamExt}; -use nvisy_dal::core::Context; use tokio::sync::Semaphore; use super::EngineConfig; use super::compiler::WorkflowCompiler; use super::context::ExecutionContext; -use crate::definition::{NodeId, WorkflowDefinition}; +use crate::definition::{NodeId, Workflow}; use crate::error::{Error, Result}; use crate::graph::{CompiledGraph, CompiledNode, InputStream, OutputStream, Process}; use crate::provider::CredentialsRegistry; @@ -60,7 +60,7 @@ impl Engine { /// the next item is processed. pub async fn execute( &self, - definition: WorkflowDefinition, + definition: Workflow, credentials: CredentialsRegistry, ctx: Context, ) -> Result { diff --git a/crates/nvisy-runtime/src/engine/mod.rs b/crates/nvisy-runtime/src/engine/mod.rs index 62a8509..c74efae 100644 --- a/crates/nvisy-runtime/src/engine/mod.rs +++ b/crates/nvisy-runtime/src/engine/mod.rs @@ -11,5 +11,5 @@ mod context; mod executor; pub use config::EngineConfig; -pub use context::ExecutionContext; +pub use context::{Context, ExecutionContext}; pub use executor::Engine; diff --git a/crates/nvisy-runtime/src/graph/compiled.rs b/crates/nvisy-runtime/src/graph/compiled.rs deleted file mode 100644 index 4842408..0000000 --- a/crates/nvisy-runtime/src/graph/compiled.rs +++ /dev/null @@ -1,195 +0,0 @@ -//! Compiled workflow graph. - -use std::collections::HashMap; - -use petgraph::Direction; -use petgraph::graph::{DiGraph, NodeIndex}; - -use super::input::CompiledInput; -use super::node::CompiledNode; -use super::output::CompiledOutput; -use super::route::CompiledSwitch; -use crate::definition::{ - ContentTypeCategory, ContentTypeCondition, EdgeData, NodeId, SwitchCondition, WorkflowMetadata, -}; - -/// A compiled workflow graph ready for execution. -/// -/// This is the runtime representation of a workflow after compilation. -/// All cache slots are resolved into direct edges, and all node definitions -/// are compiled into their executable forms. -pub struct CompiledGraph { - /// The underlying directed graph. - graph: DiGraph, - /// Map from node IDs to graph indices. - node_indices: HashMap, - /// Map from graph indices to node IDs. - index_to_id: HashMap, - /// Workflow metadata. - metadata: WorkflowMetadata, -} - -impl CompiledGraph { - /// Creates a new compiled graph. - pub fn new( - graph: DiGraph, - node_indices: HashMap, - metadata: WorkflowMetadata, - ) -> Self { - let index_to_id = node_indices.iter().map(|(k, v)| (*v, *k)).collect(); - Self { - graph, - node_indices, - index_to_id, - metadata, - } - } - - /// Returns the workflow metadata. - pub fn metadata(&self) -> &WorkflowMetadata { - &self.metadata - } - - /// Returns the number of nodes in the graph. - pub fn node_count(&self) -> usize { - self.graph.node_count() - } - - /// Returns the number of edges in the graph. - pub fn edge_count(&self) -> usize { - self.graph.edge_count() - } - - /// Returns a reference to a node by ID. - pub fn node(&self, id: &NodeId) -> Option<&CompiledNode> { - self.node_indices - .get(id) - .and_then(|&idx| self.graph.node_weight(idx)) - } - - /// Returns a mutable reference to a node by ID. - pub fn node_mut(&mut self, id: &NodeId) -> Option<&mut CompiledNode> { - self.node_indices - .get(id) - .copied() - .and_then(|idx| self.graph.node_weight_mut(idx)) - } - - /// Returns the node ID for a graph index. - pub fn node_id(&self, index: NodeIndex) -> Option { - self.index_to_id.get(&index).copied() - } - - /// Returns the graph index for a node ID. - pub fn node_index(&self, id: &NodeId) -> Option { - self.node_indices.get(id).copied() - } - - /// Returns an iterator over all node IDs. - pub fn node_ids(&self) -> impl Iterator { - self.node_indices.keys() - } - - /// Returns an iterator over all nodes with their IDs. - pub fn nodes(&self) -> impl Iterator { - self.node_indices - .iter() - .filter_map(|(id, &idx)| self.graph.node_weight(idx).map(|node| (id, node))) - } - - /// Returns an iterator over input nodes. - pub fn input_nodes(&self) -> impl Iterator { - self.nodes() - .filter_map(|(id, node)| node.as_input().map(|input| (id, input))) - } - - /// Returns an iterator over output nodes. - pub fn output_nodes(&self) -> impl Iterator { - self.nodes() - .filter_map(|(id, node)| node.as_output().map(|output| (id, output))) - } - - /// Returns the predecessors (incoming nodes) of a node. - pub fn predecessors(&self, id: &NodeId) -> impl Iterator { - self.node_indices.get(id).into_iter().flat_map(|&idx| { - self.graph - .neighbors_directed(idx, Direction::Incoming) - .filter_map(|pred_idx| self.index_to_id.get(&pred_idx)) - }) - } - - /// Returns the successors (outgoing nodes) of a node. - pub fn successors(&self, id: &NodeId) -> impl Iterator { - self.node_indices.get(id).into_iter().flat_map(|&idx| { - self.graph - .neighbors_directed(idx, Direction::Outgoing) - .filter_map(|succ_idx| self.index_to_id.get(&succ_idx)) - }) - } - - /// Returns the edge data between two nodes, if an edge exists. - pub fn edge(&self, from: &NodeId, to: &NodeId) -> Option<&EdgeData> { - let from_idx = self.node_indices.get(from)?; - let to_idx = self.node_indices.get(to)?; - self.graph - .find_edge(*from_idx, *to_idx) - .and_then(|e| self.graph.edge_weight(e)) - } - - /// Returns topologically sorted node IDs (sources first). - /// - /// Returns `None` if the graph contains a cycle. - pub fn topological_order(&self) -> Option> { - petgraph::algo::toposort(&self.graph, None) - .ok() - .map(|indices| { - indices - .into_iter() - .filter_map(|idx| self.index_to_id.get(&idx).copied()) - .collect() - }) - } - - /// Consumes the graph and returns ownership of all nodes. - /// - /// Returns a map from node IDs to compiled nodes. - pub fn into_nodes(mut self) -> HashMap { - let mut nodes = HashMap::with_capacity(self.node_indices.len()); - for (id, &idx) in &self.node_indices { - if let Some(node) = self.graph.node_weight_mut(idx) { - // Use mem::replace with a placeholder to take ownership - // This is safe because we won't access the graph again - let placeholder = CompiledNode::Switch(CompiledSwitch::new( - SwitchCondition::ContentType(ContentTypeCondition { - category: ContentTypeCategory::Other, - }), - String::new(), - String::new(), - )); - let owned = std::mem::replace(node, placeholder); - nodes.insert(*id, owned); - } - } - nodes - } - - /// Returns a reference to the underlying petgraph. - pub fn inner(&self) -> &DiGraph { - &self.graph - } - - /// Returns a mutable reference to the underlying petgraph. - pub fn inner_mut(&mut self) -> &mut DiGraph { - &mut self.graph - } -} - -impl std::fmt::Debug for CompiledGraph { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("CompiledGraph") - .field("node_count", &self.graph.node_count()) - .field("edge_count", &self.graph.edge_count()) - .field("metadata", &self.metadata) - .finish() - } -} diff --git a/crates/nvisy-runtime/src/graph/edge.rs b/crates/nvisy-runtime/src/graph/edge.rs new file mode 100644 index 0000000..c2f82ba --- /dev/null +++ b/crates/nvisy-runtime/src/graph/edge.rs @@ -0,0 +1,30 @@ +//! Edge data for compiled graphs. + +use derive_builder::Builder; +use serde::{Deserialize, Serialize}; + +/// Edge data stored in the compiled graph. +#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)] +#[derive(Serialize, Deserialize, Builder)] +#[builder( + name = "EdgeDataBuilder", + pattern = "owned", + setter(into, strip_option, prefix = "with") +)] +pub struct EdgeData { + /// Optional port/slot name on the source node. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub from_port: Option, + /// Optional port/slot name on the target node. + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default)] + pub to_port: Option, +} + +impl EdgeData { + /// Returns a builder for creating edge data. + pub fn builder() -> EdgeDataBuilder { + EdgeDataBuilder::default() + } +} diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index cbd9e2c..b9d6647 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -7,21 +7,205 @@ //! - Pre-resolved cache slots //! - Pre-instantiated providers and agents //! -//! To create compiled types, use the [`crate::graph::compiler`] module. +//! To create compiled types, use the [`crate::engine::compiler`] module. -mod compiled; +use std::collections::HashMap; + +use petgraph::Direction; +use petgraph::graph::{DiGraph, NodeIndex}; + +mod edge; mod input; mod node; mod output; mod route; mod transform; -pub use compiled::CompiledGraph; +pub use edge::EdgeData; pub use input::{CompiledInput, DataStream, InputStream}; pub use node::CompiledNode; pub use output::{CompiledOutput, DataSink, OutputStream}; -pub use route::CompiledSwitch; +pub use route::{CompiledSwitch, FileCategoryEvaluator, LanguageEvaluator, SwitchEvaluator}; pub use transform::{ ChunkProcessor, CompiledTransform, DeriveProcessor, EmbeddingProcessor, EnrichProcessor, ExtractProcessor, PartitionProcessor, Process, }; + +use crate::definition::{FileCategory, NodeId, WorkflowMetadata}; + +/// A compiled workflow graph ready for execution. +/// +/// This is the runtime representation of a workflow after compilation. +/// All cache slots are resolved into direct edges, and all node definitions +/// are compiled into their executable forms. +pub struct CompiledGraph { + /// The underlying directed graph. + graph: DiGraph, + /// Map from node IDs to graph indices. + node_indices: HashMap, + /// Map from graph indices to node IDs. + index_to_id: HashMap, + /// Workflow metadata. + metadata: WorkflowMetadata, +} + +impl CompiledGraph { + /// Creates a new compiled graph. + pub fn new( + graph: DiGraph, + node_indices: HashMap, + metadata: WorkflowMetadata, + ) -> Self { + let index_to_id = node_indices.iter().map(|(k, v)| (*v, *k)).collect(); + Self { + graph, + node_indices, + index_to_id, + metadata, + } + } + + /// Returns the workflow metadata. + pub fn metadata(&self) -> &WorkflowMetadata { + &self.metadata + } + + /// Returns the number of nodes in the graph. + pub fn node_count(&self) -> usize { + self.graph.node_count() + } + + /// Returns the number of edges in the graph. + pub fn edge_count(&self) -> usize { + self.graph.edge_count() + } + + /// Returns a reference to a node by ID. + pub fn node(&self, id: &NodeId) -> Option<&CompiledNode> { + self.node_indices + .get(id) + .and_then(|&idx| self.graph.node_weight(idx)) + } + + /// Returns a mutable reference to a node by ID. + pub fn node_mut(&mut self, id: &NodeId) -> Option<&mut CompiledNode> { + self.node_indices + .get(id) + .copied() + .and_then(|idx| self.graph.node_weight_mut(idx)) + } + + /// Returns the node ID for a graph index. + pub fn node_id(&self, index: NodeIndex) -> Option { + self.index_to_id.get(&index).copied() + } + + /// Returns the graph index for a node ID. + pub fn node_index(&self, id: &NodeId) -> Option { + self.node_indices.get(id).copied() + } + + /// Returns an iterator over all node IDs. + pub fn node_ids(&self) -> impl Iterator { + self.node_indices.keys() + } + + /// Returns an iterator over all nodes with their IDs. + pub fn nodes(&self) -> impl Iterator { + self.node_indices + .iter() + .filter_map(|(id, &idx)| self.graph.node_weight(idx).map(|node| (id, node))) + } + + /// Returns an iterator over input nodes. + pub fn input_nodes(&self) -> impl Iterator { + self.nodes() + .filter_map(|(id, node)| node.as_input().map(|input| (id, input))) + } + + /// Returns an iterator over output nodes. + pub fn output_nodes(&self) -> impl Iterator { + self.nodes() + .filter_map(|(id, node)| node.as_output().map(|output| (id, output))) + } + + /// Returns the predecessors (incoming nodes) of a node. + pub fn predecessors(&self, id: &NodeId) -> impl Iterator { + self.node_indices.get(id).into_iter().flat_map(|&idx| { + self.graph + .neighbors_directed(idx, Direction::Incoming) + .filter_map(|pred_idx| self.index_to_id.get(&pred_idx)) + }) + } + + /// Returns the successors (outgoing nodes) of a node. + pub fn successors(&self, id: &NodeId) -> impl Iterator { + self.node_indices.get(id).into_iter().flat_map(|&idx| { + self.graph + .neighbors_directed(idx, Direction::Outgoing) + .filter_map(|succ_idx| self.index_to_id.get(&succ_idx)) + }) + } + + /// Returns the edge data between two nodes, if an edge exists. + pub fn edge(&self, from: &NodeId, to: &NodeId) -> Option<&EdgeData> { + let from_idx = self.node_indices.get(from)?; + let to_idx = self.node_indices.get(to)?; + self.graph + .find_edge(*from_idx, *to_idx) + .and_then(|e| self.graph.edge_weight(e)) + } + + /// Returns topologically sorted node IDs (sources first). + /// + /// Returns `None` if the graph contains a cycle. + pub fn topological_order(&self) -> Option> { + petgraph::algo::toposort(&self.graph, None) + .ok() + .map(|indices| { + indices + .into_iter() + .filter_map(|idx| self.index_to_id.get(&idx).copied()) + .collect() + }) + } + + /// Consumes the graph and returns ownership of all nodes. + /// + /// Returns a map from node IDs to compiled nodes. + pub fn into_nodes(mut self) -> HashMap { + let mut nodes = HashMap::with_capacity(self.node_indices.len()); + for (id, &idx) in &self.node_indices { + if let Some(node) = self.graph.node_weight_mut(idx) { + // Use mem::replace with a placeholder to take ownership + // This is safe because we won't access the graph again + let placeholder = CompiledNode::Switch(CompiledSwitch::new( + SwitchEvaluator::FileCategory(FileCategoryEvaluator::new(FileCategory::Other)), + )); + let owned = std::mem::replace(node, placeholder); + nodes.insert(*id, owned); + } + } + nodes + } + + /// Returns a reference to the underlying petgraph. + pub fn inner(&self) -> &DiGraph { + &self.graph + } + + /// Returns a mutable reference to the underlying petgraph. + pub fn inner_mut(&mut self) -> &mut DiGraph { + &mut self.graph + } +} + +impl std::fmt::Debug for CompiledGraph { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompiledGraph") + .field("node_count", &self.graph.node_count()) + .field("edge_count", &self.graph.edge_count()) + .field("metadata", &self.metadata) + .finish() + } +} diff --git a/crates/nvisy-runtime/src/graph/route.rs b/crates/nvisy-runtime/src/graph/route.rs deleted file mode 100644 index 5ae38be..0000000 --- a/crates/nvisy-runtime/src/graph/route.rs +++ /dev/null @@ -1,374 +0,0 @@ -//! Compiled routing node types. - -use jiff::Timestamp; -use nvisy_dal::AnyDataValue; - -use crate::definition::{ - ContentTypeCategory, DateField, PatternMatchType, SwitchCondition, SwitchDef, -}; - -/// Compiled switch node - ready to route data. -/// -/// Evaluates a condition against input data and determines -/// which output port to route the data to. -#[derive(Debug, Clone)] -pub struct CompiledSwitch { - /// The condition to evaluate. - condition: SwitchCondition, - /// Output port for data matching the condition. - match_port: String, - /// Output port for data not matching the condition. - else_port: String, -} - -impl CompiledSwitch { - /// Creates a new compiled switch. - pub fn new(condition: SwitchCondition, match_port: String, else_port: String) -> Self { - Self { - condition, - match_port, - else_port, - } - } - - /// Returns all output port names. - pub fn output_ports(&self) -> impl Iterator { - [self.match_port.as_str(), self.else_port.as_str()].into_iter() - } - - /// Evaluates the switch condition against input data. - /// - /// Returns the appropriate output port name based on whether - /// the condition matches. - pub fn evaluate(&self, data: &AnyDataValue) -> &str { - if self.evaluate_condition(data) { - &self.match_port - } else { - &self.else_port - } - } - - /// Evaluates the condition against the data. - fn evaluate_condition(&self, data: &AnyDataValue) -> bool { - match &self.condition { - SwitchCondition::ContentType(c) => self.match_content_type(data, c.category), - - SwitchCondition::FileExtension(c) => { - match data { - AnyDataValue::Blob(blob) => blob.path.rsplit('.').next().is_some_and(|ext| { - c.extensions.iter().any(|e| ext.eq_ignore_ascii_case(e)) - }), - _ => false, - } - } - - SwitchCondition::FileSize(c) => match data { - AnyDataValue::Blob(blob) => { - let size = blob.data.len() as u64; - let above_min = c.min_bytes.is_none_or(|min| size >= min); - let below_max = c.max_bytes.is_none_or(|max| size <= max); - above_min && below_max - } - _ => false, - }, - - SwitchCondition::PageCount(c) => { - let page_count = self.get_metadata_u32(data, "page_count"); - match page_count { - Some(count) => { - let above_min = c.min_pages.is_none_or(|min| count >= min); - let below_max = c.max_pages.is_none_or(|max| count <= max); - above_min && below_max - } - None => false, - } - } - - SwitchCondition::Duration(c) => { - let duration_secs = self.get_metadata_i64(data, "duration_seconds"); - match duration_secs { - Some(secs) => { - let above_min = c.min_seconds.is_none_or(|min| secs >= min); - let below_max = c.max_seconds.is_none_or(|max| secs <= max); - above_min && below_max - } - None => false, - } - } - - SwitchCondition::Language(c) => { - let detected_lang = self.get_metadata_string(data, "language"); - let confidence = self.get_metadata_f32(data, "language_confidence"); - match (detected_lang, confidence) { - (Some(lang), Some(conf)) => { - lang.eq_ignore_ascii_case(&c.code) && conf >= c.min_confidence - } - (Some(lang), None) => lang.eq_ignore_ascii_case(&c.code), - _ => false, - } - } - - SwitchCondition::FileDate(c) => { - let timestamp = match c.field { - DateField::Created => self.get_metadata_timestamp(data, "created_at"), - DateField::Modified => self.get_metadata_timestamp(data, "modified_at"), - }; - match timestamp { - Some(ts) => { - let after_ok = c.after.is_none_or(|after| ts >= after); - let before_ok = c.before.is_none_or(|before| ts <= before); - after_ok && before_ok - } - None => false, - } - } - - SwitchCondition::FileName(c) => match data { - AnyDataValue::Blob(blob) => { - let filename = blob.path.rsplit('/').next().unwrap_or(&blob.path); - match c.match_type { - PatternMatchType::Glob => glob_match(&c.pattern, filename), - PatternMatchType::Regex => { - // Fall back to glob matching for now - glob_match(&c.pattern, filename) - } - PatternMatchType::Exact => filename == c.pattern, - PatternMatchType::Contains => { - filename.to_lowercase().contains(&c.pattern.to_lowercase()) - } - } - } - _ => false, - }, - } - } - - /// Matches content type category against data. - fn match_content_type(&self, data: &AnyDataValue, category: ContentTypeCategory) -> bool { - match data { - AnyDataValue::Blob(blob) => { - let mime = blob - .content_type - .as_deref() - .unwrap_or("application/octet-stream"); - match category { - ContentTypeCategory::Image => mime.starts_with("image/"), - ContentTypeCategory::Document => { - mime == "application/pdf" - || mime.starts_with("application/vnd.") - || mime == "application/msword" - } - ContentTypeCategory::Text => { - mime.starts_with("text/") || mime == "application/json" - } - ContentTypeCategory::Audio => mime.starts_with("audio/"), - ContentTypeCategory::Video => mime.starts_with("video/"), - ContentTypeCategory::Spreadsheet => { - mime == "application/vnd.ms-excel" - || mime.contains("spreadsheet") - || mime == "text/csv" - } - ContentTypeCategory::Presentation => { - mime == "application/vnd.ms-powerpoint" || mime.contains("presentation") - } - ContentTypeCategory::Archive => { - mime == "application/zip" - || mime == "application/x-tar" - || mime == "application/gzip" - || mime == "application/x-rar-compressed" - || mime == "application/x-7z-compressed" - } - ContentTypeCategory::Code => { - mime.starts_with("text/x-") - || mime == "application/javascript" - || mime == "application/typescript" - || mime == "application/x-python" - } - ContentTypeCategory::Other => true, - } - } - _ => false, - } - } - - /// Gets a string metadata value from JSON. - fn get_metadata_string(&self, data: &AnyDataValue, key: &str) -> Option { - match data { - AnyDataValue::Blob(blob) => blob.metadata.get(key).and_then(json_to_string), - AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_string), - _ => None, - } - } - - /// Gets a u32 metadata value. - fn get_metadata_u32(&self, data: &AnyDataValue, key: &str) -> Option { - match data { - AnyDataValue::Blob(blob) => blob - .metadata - .get(key) - .and_then(json_to_u64) - .map(|v| v as u32), - AnyDataValue::Record(record) => record - .columns - .get(key) - .and_then(json_to_u64) - .map(|v| v as u32), - _ => None, - } - } - - /// Gets an i64 metadata value. - fn get_metadata_i64(&self, data: &AnyDataValue, key: &str) -> Option { - match data { - AnyDataValue::Blob(blob) => blob.metadata.get(key).and_then(json_to_i64), - AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_i64), - _ => None, - } - } - - /// Gets an f32 metadata value. - fn get_metadata_f32(&self, data: &AnyDataValue, key: &str) -> Option { - self.get_metadata_f64(data, key).map(|v| v as f32) - } - - /// Gets an f64 metadata value. - fn get_metadata_f64(&self, data: &AnyDataValue, key: &str) -> Option { - match data { - AnyDataValue::Blob(blob) => blob.metadata.get(key).and_then(json_to_f64), - AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_f64), - _ => None, - } - } - - /// Gets a timestamp metadata value. - fn get_metadata_timestamp(&self, data: &AnyDataValue, key: &str) -> Option { - let s = self.get_metadata_string(data, key)?; - s.parse().ok() - } -} - -impl From for CompiledSwitch { - fn from(def: SwitchDef) -> Self { - Self::new(def.condition, def.match_port, def.else_port) - } -} - -/// Converts a JSON value to a string. -fn json_to_string(value: &serde_json::Value) -> Option { - match value { - serde_json::Value::String(s) => Some(s.clone()), - serde_json::Value::Number(n) => Some(n.to_string()), - serde_json::Value::Bool(b) => Some(b.to_string()), - _ => None, - } -} - -/// Converts a JSON value to u64. -fn json_to_u64(value: &serde_json::Value) -> Option { - value.as_u64().or_else(|| value.as_f64().map(|f| f as u64)) -} - -/// Converts a JSON value to i64. -fn json_to_i64(value: &serde_json::Value) -> Option { - value.as_i64().or_else(|| value.as_f64().map(|f| f as i64)) -} - -/// Converts a JSON value to f64. -fn json_to_f64(value: &serde_json::Value) -> Option { - value.as_f64() -} - -/// Simple glob-style pattern matching. -/// -/// Supports: -/// - `*` matches any sequence of characters -/// - `?` matches any single character -/// - Literal matching for other characters (case-insensitive) -fn glob_match(pattern: &str, text: &str) -> bool { - let mut pattern_chars = pattern.chars().peekable(); - let mut text_chars = text.chars().peekable(); - - while let Some(p) = pattern_chars.next() { - match p { - '*' => { - // Try matching zero or more characters - if pattern_chars.peek().is_none() { - // Pattern ends with *, matches everything remaining - return true; - } - // Try each position in the remaining text - loop { - let remaining_pattern: String = pattern_chars.clone().collect(); - let remaining_text: String = text_chars.clone().collect(); - if glob_match(&remaining_pattern, &remaining_text) { - return true; - } - if text_chars.next().is_none() { - return false; - } - } - } - '?' => { - // Match any single character - if text_chars.next().is_none() { - return false; - } - } - c => { - // Literal match (case-insensitive) - match text_chars.next() { - Some(t) if c.eq_ignore_ascii_case(&t) => {} - _ => return false, - } - } - } - } - - // Pattern is exhausted, text should also be exhausted - text_chars.peek().is_none() -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::definition::{ContentTypeCondition, FileExtensionCondition}; - - #[test] - fn test_evaluate_file_extension() { - let switch = CompiledSwitch::new( - SwitchCondition::FileExtension(FileExtensionCondition { - extensions: vec!["pdf".into(), "docx".into()], - }), - "documents".into(), - "other".into(), - ); - - let pdf = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("report.pdf", vec![])); - let txt = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("notes.txt", vec![])); - - assert_eq!(switch.evaluate(&pdf), "documents"); - assert_eq!(switch.evaluate(&txt), "other"); - } - - #[test] - fn test_evaluate_content_type() { - let switch = CompiledSwitch::new( - SwitchCondition::ContentType(ContentTypeCondition { - category: ContentTypeCategory::Image, - }), - "images".into(), - "other".into(), - ); - - let mut blob = nvisy_dal::datatype::Blob::new("photo.jpg", vec![]); - blob.content_type = Some("image/jpeg".into()); - let image = AnyDataValue::Blob(blob); - - let mut blob = nvisy_dal::datatype::Blob::new("doc.pdf", vec![]); - blob.content_type = Some("application/pdf".into()); - let pdf = AnyDataValue::Blob(blob); - - assert_eq!(switch.evaluate(&image), "images"); - assert_eq!(switch.evaluate(&pdf), "other"); - } -} diff --git a/crates/nvisy-runtime/src/graph/route/file_category.rs b/crates/nvisy-runtime/src/graph/route/file_category.rs new file mode 100644 index 0000000..53e9eb6 --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/file_category.rs @@ -0,0 +1,151 @@ +//! File category evaluator for routing by file extension. + +use nvisy_dal::AnyDataValue; + +use crate::definition::FileCategory; + +/// Evaluates file category based on extension. +#[derive(Debug, Clone)] +pub struct FileCategoryEvaluator { + /// File category to match. + category: FileCategory, +} + +impl FileCategoryEvaluator { + /// Creates a new file category evaluator. + pub fn new(category: FileCategory) -> Self { + Self { category } + } + + /// Evaluates whether the data matches the file category. + pub fn evaluate(&self, data: &AnyDataValue) -> bool { + let ext = match data { + AnyDataValue::Blob(blob) => blob.path.rsplit('.').next(), + _ => return false, + }; + + let Some(ext) = ext else { + return self.category == FileCategory::Other; + }; + + let ext = ext.to_lowercase(); + match self.category { + FileCategory::Text => { + matches!(ext.as_str(), "txt" | "md" | "markdown" | "rst" | "text") + } + FileCategory::Image => { + matches!( + ext.as_str(), + "jpg" + | "jpeg" + | "png" + | "gif" + | "bmp" + | "webp" + | "svg" + | "ico" + | "tiff" + | "tif" + ) + } + FileCategory::Audio => { + matches!( + ext.as_str(), + "mp3" | "wav" | "flac" | "aac" | "ogg" | "wma" | "m4a" + ) + } + FileCategory::Video => { + matches!( + ext.as_str(), + "mp4" | "webm" | "avi" | "mov" | "mkv" | "wmv" | "flv" | "m4v" + ) + } + FileCategory::Document => { + matches!( + ext.as_str(), + "pdf" | "doc" | "docx" | "odt" | "rtf" | "epub" + ) + } + FileCategory::Archive => { + matches!( + ext.as_str(), + "zip" | "tar" | "gz" | "rar" | "7z" | "bz2" | "xz" + ) + } + FileCategory::Spreadsheet => { + matches!(ext.as_str(), "xls" | "xlsx" | "csv" | "ods" | "tsv") + } + FileCategory::Presentation => { + matches!(ext.as_str(), "ppt" | "pptx" | "odp" | "key") + } + FileCategory::Code => { + matches!( + ext.as_str(), + "rs" | "py" + | "js" + | "ts" + | "java" + | "c" + | "cpp" + | "h" + | "hpp" + | "go" + | "rb" + | "php" + | "swift" + | "kt" + | "scala" + | "sh" + | "bash" + | "zsh" + | "sql" + | "html" + | "css" + | "json" + | "yaml" + | "yml" + | "toml" + | "xml" + ) + } + FileCategory::Other => true, + } + } +} + +impl From for FileCategoryEvaluator { + fn from(category: FileCategory) -> Self { + Self::new(category) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_evaluate_image() { + let evaluator = FileCategoryEvaluator::new(FileCategory::Image); + + let jpg = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("photo.jpg", vec![])); + let png = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("image.PNG", vec![])); + let pdf = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("doc.pdf", vec![])); + + assert!(evaluator.evaluate(&jpg)); + assert!(evaluator.evaluate(&png)); + assert!(!evaluator.evaluate(&pdf)); + } + + #[test] + fn test_evaluate_document() { + let evaluator = FileCategoryEvaluator::new(FileCategory::Document); + + let pdf = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("report.pdf", vec![])); + let docx = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("letter.docx", vec![])); + let txt = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("notes.txt", vec![])); + + assert!(evaluator.evaluate(&pdf)); + assert!(evaluator.evaluate(&docx)); + assert!(!evaluator.evaluate(&txt)); + } +} diff --git a/crates/nvisy-runtime/src/graph/route/language.rs b/crates/nvisy-runtime/src/graph/route/language.rs new file mode 100644 index 0000000..12a224a --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/language.rs @@ -0,0 +1,119 @@ +//! Language evaluator for routing by detected content language. + +use nvisy_dal::AnyDataValue; + +/// Evaluates language based on metadata. +#[derive(Debug, Clone)] +pub struct LanguageEvaluator { + /// Language codes to match. + codes: Vec, + /// Minimum confidence threshold. + min_confidence: f32, +} + +impl LanguageEvaluator { + /// Creates a new language evaluator. + pub fn new(codes: Vec, min_confidence: f32) -> Self { + Self { + codes, + min_confidence, + } + } + + /// Evaluates whether the data matches any of the language codes. + pub fn evaluate(&self, data: &AnyDataValue) -> bool { + let detected_lang = self.get_metadata_string(data, "language"); + let confidence = self.get_metadata_f32(data, "language_confidence"); + + match (detected_lang, confidence) { + (Some(lang), Some(conf)) => { + self.codes + .iter() + .any(|code| lang.eq_ignore_ascii_case(code)) + && conf >= self.min_confidence + } + (Some(lang), None) => self + .codes + .iter() + .any(|code| lang.eq_ignore_ascii_case(code)), + _ => false, + } + } + + /// Gets a string metadata value. + fn get_metadata_string(&self, data: &AnyDataValue, key: &str) -> Option { + match data { + AnyDataValue::Blob(blob) => blob.metadata.get(key).and_then(json_to_string), + AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_string), + _ => None, + } + } + + /// Gets an f32 metadata value. + fn get_metadata_f32(&self, data: &AnyDataValue, key: &str) -> Option { + match data { + AnyDataValue::Blob(blob) => blob + .metadata + .get(key) + .and_then(|v| v.as_f64()) + .map(|v| v as f32), + AnyDataValue::Record(record) => record + .columns + .get(key) + .and_then(|v| v.as_f64()) + .map(|v| v as f32), + _ => None, + } + } +} + +/// Converts a JSON value to a string. +fn json_to_string(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::String(s) => Some(s.clone()), + serde_json::Value::Number(n) => Some(n.to_string()), + serde_json::Value::Bool(b) => Some(b.to_string()), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_evaluate_with_confidence() { + let evaluator = LanguageEvaluator::new(vec!["en".into(), "es".into()], 0.8); + + let mut blob = nvisy_dal::datatype::Blob::new("doc.txt", vec![]); + blob.metadata + .insert("language".into(), serde_json::json!("en")); + blob.metadata + .insert("language_confidence".into(), serde_json::json!(0.95)); + let english = AnyDataValue::Blob(blob); + + assert!(evaluator.evaluate(&english)); + + let mut blob = nvisy_dal::datatype::Blob::new("doc.txt", vec![]); + blob.metadata + .insert("language".into(), serde_json::json!("en")); + blob.metadata + .insert("language_confidence".into(), serde_json::json!(0.5)); + let low_conf = AnyDataValue::Blob(blob); + + assert!(!evaluator.evaluate(&low_conf)); + } + + #[test] + fn test_evaluate_without_confidence() { + let evaluator = LanguageEvaluator::new(vec!["fr".into()], 0.8); + + let mut blob = nvisy_dal::datatype::Blob::new("doc.txt", vec![]); + blob.metadata + .insert("language".into(), serde_json::json!("fr")); + let french = AnyDataValue::Blob(blob); + + // Without confidence metadata, still matches by language + assert!(evaluator.evaluate(&french)); + } +} diff --git a/crates/nvisy-runtime/src/graph/route/mod.rs b/crates/nvisy-runtime/src/graph/route/mod.rs new file mode 100644 index 0000000..985ac5c --- /dev/null +++ b/crates/nvisy-runtime/src/graph/route/mod.rs @@ -0,0 +1,89 @@ +//! Compiled routing types for conditional data flow. + +mod file_category; +mod language; + +pub use file_category::FileCategoryEvaluator; +pub use language::LanguageEvaluator; + +use nvisy_dal::AnyDataValue; + +use crate::definition::SwitchDef; + +/// Compiled switch node - evaluates conditions and returns true/false. +#[derive(Debug, Clone)] +pub struct CompiledSwitch { + /// The evaluator for this switch. + evaluator: SwitchEvaluator, +} + +/// Evaluator enum for switch conditions. +#[derive(Debug, Clone)] +pub enum SwitchEvaluator { + /// Evaluate by file category (extension). + FileCategory(FileCategoryEvaluator), + /// Evaluate by detected language. + Language(LanguageEvaluator), +} + +impl SwitchEvaluator { + /// Evaluates the condition against the data. + pub fn evaluate(&self, data: &AnyDataValue) -> bool { + match self { + SwitchEvaluator::FileCategory(e) => e.evaluate(data), + SwitchEvaluator::Language(e) => e.evaluate(data), + } + } +} + +impl CompiledSwitch { + /// Creates a new compiled switch. + pub fn new(evaluator: SwitchEvaluator) -> Self { + Self { evaluator } + } + + /// Evaluates the switch condition against input data. + /// + /// Returns `true` if the condition matches, `false` otherwise. + pub fn evaluate(&self, data: &AnyDataValue) -> bool { + self.evaluator.evaluate(data) + } +} + +impl From for CompiledSwitch { + fn from(def: SwitchDef) -> Self { + use crate::definition::SwitchCondition; + + let evaluator = match def.condition { + SwitchCondition::FileCategory(c) => { + SwitchEvaluator::FileCategory(FileCategoryEvaluator::new(c.category)) + } + SwitchCondition::Language(c) => { + SwitchEvaluator::Language(LanguageEvaluator::new(c.codes, c.min_confidence)) + } + }; + + Self::new(evaluator) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::definition::{FileCategory, FileCategoryCondition, SwitchCondition}; + + #[test] + fn test_compiled_switch_from_def() { + let def = SwitchDef::new(SwitchCondition::FileCategory(FileCategoryCondition { + category: FileCategory::Image, + })); + + let switch = CompiledSwitch::from(def); + + let jpg = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("photo.jpg", vec![])); + let pdf = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("doc.pdf", vec![])); + + assert!(switch.evaluate(&jpg)); + assert!(!switch.evaluate(&pdf)); + } +} diff --git a/crates/nvisy-runtime/stream/event.rs b/crates/nvisy-runtime/stream/event.rs deleted file mode 100644 index 164e3d7..0000000 --- a/crates/nvisy-runtime/stream/event.rs +++ /dev/null @@ -1,118 +0,0 @@ -//! Event types for stream processing. -//! -//! This module contains common event types and the file job type -//! used in processing pipelines. - -use jiff::Timestamp; -#[cfg(feature = "schema")] -use schemars::JsonSchema; -use serde::de::DeserializeOwned; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// File processing job. -/// -/// Represents a unit of work in a file processing pipeline. -/// Each job targets a specific file and carries a generic payload -/// that defines the processing parameters. -/// -/// The generic parameter `T` is the job-specific data payload. -/// Callers define their own payload types for different pipeline stages. -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -#[cfg_attr(feature = "schema", derive(JsonSchema))] -#[serde(bound = "T: Serialize + DeserializeOwned")] -pub struct FileJob { - /// Unique job identifier (UUID v7 for time-ordering). - pub id: Uuid, - /// Database file ID to process. - pub file_id: Uuid, - /// Storage path in NATS object store (DocumentKey encoded). - pub object_key: String, - /// File extension for format detection. - pub file_extension: String, - /// Job-specific data payload. - pub data: T, - /// When the job was created. - pub created_at: Timestamp, - /// NATS subject to publish result to (for internal job chaining). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub callback_subject: Option, - /// Idempotency key to prevent duplicate job processing. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub idempotency_key: Option, -} - -impl FileJob { - /// Creates a new file job with the given data payload. - pub fn new(file_id: Uuid, object_key: String, file_extension: String, data: T) -> Self { - Self { - id: Uuid::now_v7(), - file_id, - object_key, - file_extension, - data, - created_at: Timestamp::now(), - callback_subject: None, - idempotency_key: None, - } - } - - /// Sets a callback subject for job chaining. - pub fn with_callback(mut self, subject: impl Into) -> Self { - self.callback_subject = Some(subject.into()); - self - } - - /// Sets an idempotency key. - pub fn with_idempotency_key(mut self, key: impl Into) -> Self { - self.idempotency_key = Some(key.into()); - self - } - - /// Returns job age since creation. - pub fn age(&self) -> std::time::Duration { - let now = Timestamp::now(); - let signed_dur = now.duration_since(self.created_at); - std::time::Duration::from_secs(signed_dur.as_secs().max(0) as u64) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] - struct TestPayload { - message: String, - } - - #[test] - fn test_serialization_roundtrip() { - let file_id = Uuid::now_v7(); - let job = FileJob::new( - file_id, - "path".to_string(), - "pdf".to_string(), - TestPayload { - message: "hello".to_string(), - }, - ); - - let json = serde_json::to_string(&job).unwrap(); - let parsed: FileJob = serde_json::from_str(&json).unwrap(); - - assert_eq!(job.file_id, parsed.file_id); - assert_eq!(job.data, parsed.data); - } - - #[test] - fn test_with_unit_payload() { - let file_id = Uuid::now_v7(); - let job: FileJob<()> = FileJob::new(file_id, "path".to_string(), "pdf".to_string(), ()); - - let json = serde_json::to_string(&job).unwrap(); - let parsed: FileJob<()> = serde_json::from_str(&json).unwrap(); - - assert_eq!(job.file_id, parsed.file_id); - } -} diff --git a/crates/nvisy-runtime/stream/event_pub.rs b/crates/nvisy-runtime/stream/event_pub.rs deleted file mode 100644 index ee826b7..0000000 --- a/crates/nvisy-runtime/stream/event_pub.rs +++ /dev/null @@ -1,76 +0,0 @@ -//! Generic event stream publisher. - -use std::marker::PhantomData; - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; -use serde::Serialize; - -use super::event_stream::EventStream; -use super::stream_pub::StreamPublisher; -use crate::Result; - -/// Generic event publisher for delivering typed events to workers. -/// -/// This publisher is generic over: -/// - `T`: The event/message type to publish -/// - `S`: The stream configuration (determines stream name, subject, etc.) -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct EventPublisher -where - T: Serialize + Send + Sync + 'static, - S: EventStream, -{ - #[deref] - #[deref_mut] - publisher: StreamPublisher, - _stream: PhantomData, -} - -impl EventPublisher -where - T: Serialize + Send + Sync + 'static, - S: EventStream, -{ - /// Create a new event publisher for the stream type. - pub(crate) async fn new(jetstream: &Context) -> Result { - let publisher = StreamPublisher::new(jetstream, S::NAME).await?; - Ok(Self { - publisher, - _stream: PhantomData, - }) - } - - /// Publish an event to the stream's configured subject. - pub async fn publish(&self, event: &T) -> Result<()> { - self.publisher.publish(S::SUBJECT, event).await - } - - /// Publish an event with a sub-subject appended to the stream subject. - /// - /// Events are published to `{stream_subject}.{sub_subject}`. - pub async fn publish_to(&self, sub_subject: &str, event: &T) -> Result<()> { - let subject = format!("{}.{}", S::SUBJECT, sub_subject); - self.publisher.publish(&subject, event).await - } - - /// Publish multiple events to the stream's configured subject. - pub async fn publish_batch(&self, events: &[T]) -> Result<()> - where - T: Clone, - { - self.publisher.publish_batch(S::SUBJECT, events).await - } - - /// Returns the stream name. - #[inline] - pub fn stream_name(&self) -> &'static str { - S::NAME - } - - /// Returns the subject. - #[inline] - pub fn subject(&self) -> &'static str { - S::SUBJECT - } -} diff --git a/crates/nvisy-runtime/stream/event_stream.rs b/crates/nvisy-runtime/stream/event_stream.rs deleted file mode 100644 index 3fb0efb..0000000 --- a/crates/nvisy-runtime/stream/event_stream.rs +++ /dev/null @@ -1,74 +0,0 @@ -//! Event stream configuration for NATS JetStream. - -use std::time::Duration; - -/// Marker trait for event streams. -/// -/// This trait defines the configuration for a NATS JetStream stream. -pub trait EventStream: Clone + Send + Sync + 'static { - /// Stream name used in NATS JetStream. - const NAME: &'static str; - - /// Subject pattern for publishing/subscribing to this stream. - const SUBJECT: &'static str; - - /// Maximum age for messages in this stream. - /// Returns `None` for streams where messages should not expire. - const MAX_AGE: Option; - - /// Default consumer name for this stream. - const CONSUMER_NAME: &'static str; -} - -/// Stream for file processing jobs. -/// -/// Messages expire after 7 days. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] -pub struct FileStream; - -impl EventStream for FileStream { - const CONSUMER_NAME: &'static str = "file-worker"; - const MAX_AGE: Option = Some(Duration::from_secs(7 * 24 * 60 * 60)); - const NAME: &'static str = "FILE_JOBS"; - const SUBJECT: &'static str = "file.jobs"; -} - -/// Stream for webhook delivery. -/// -/// Messages expire after 1 day. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] -pub struct WebhookStream; - -impl EventStream for WebhookStream { - const CONSUMER_NAME: &'static str = "webhook-worker"; - const MAX_AGE: Option = Some(Duration::from_secs(24 * 60 * 60)); - const NAME: &'static str = "WEBHOOKS"; - const SUBJECT: &'static str = "webhooks"; -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_file_stream() { - assert_eq!(FileStream::NAME, "FILE_JOBS"); - assert_eq!(FileStream::SUBJECT, "file.jobs"); - assert_eq!( - FileStream::MAX_AGE, - Some(Duration::from_secs(7 * 24 * 60 * 60)) - ); - assert_eq!(FileStream::CONSUMER_NAME, "file-worker"); - } - - #[test] - fn test_webhook_stream() { - assert_eq!(WebhookStream::NAME, "WEBHOOKS"); - assert_eq!(WebhookStream::SUBJECT, "webhooks"); - assert_eq!( - WebhookStream::MAX_AGE, - Some(Duration::from_secs(24 * 60 * 60)) - ); - assert_eq!(WebhookStream::CONSUMER_NAME, "webhook-worker"); - } -} diff --git a/crates/nvisy-runtime/stream/event_sub.rs b/crates/nvisy-runtime/stream/event_sub.rs deleted file mode 100644 index 974fd59..0000000 --- a/crates/nvisy-runtime/stream/event_sub.rs +++ /dev/null @@ -1,63 +0,0 @@ -//! Generic event stream subscriber. - -use std::marker::PhantomData; - -use async_nats::jetstream::Context; -use derive_more::{Deref, DerefMut}; -use serde::de::DeserializeOwned; - -use super::event_stream::EventStream; -use super::stream_sub::StreamSubscriber; -use crate::Result; - -/// Generic event subscriber for consuming typed events. -/// -/// This subscriber is generic over: -/// - `T`: The event/message type to consume -/// - `S`: The stream configuration (determines stream name, subject, consumer name) -#[derive(Debug, Deref, DerefMut)] -pub struct EventSubscriber -where - T: DeserializeOwned + Send + Sync + 'static, - S: EventStream, -{ - #[deref] - #[deref_mut] - subscriber: StreamSubscriber, - _stream: PhantomData, -} - -impl EventSubscriber -where - T: DeserializeOwned + Send + Sync + 'static, - S: EventStream, -{ - /// Create a new event subscriber using the stream's default consumer name. - pub(crate) async fn new(jetstream: &Context) -> Result { - let subscriber = StreamSubscriber::new(jetstream, S::NAME, S::CONSUMER_NAME) - .await? - .with_filter_subject(format!("{}.>", S::NAME)); - Ok(Self { - subscriber, - _stream: PhantomData, - }) - } - - /// Returns the stream name. - #[inline] - pub fn stream_name(&self) -> &'static str { - S::NAME - } - - /// Returns the subject. - #[inline] - pub fn subject(&self) -> &'static str { - S::SUBJECT - } - - /// Returns the consumer name. - #[inline] - pub fn consumer_name(&self) -> &'static str { - S::CONSUMER_NAME - } -} diff --git a/crates/nvisy-runtime/stream/mod.rs b/crates/nvisy-runtime/stream/mod.rs deleted file mode 100644 index 1a939c2..0000000 --- a/crates/nvisy-runtime/stream/mod.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! JetStream streams for real-time updates and distributed job processing. -//! -//! This module provides type-safe streaming capabilities for: -//! -//! - File processing jobs via [`FileJob`], [`EventPublisher`], [`EventSubscriber`] -//! - Generic event publishing and subscribing with stream configuration via [`EventStream`] - -mod event; -mod event_pub; -mod event_stream; -mod event_sub; -mod stream_pub; -mod stream_sub; - -pub use event::FileJob; -pub use event_pub::EventPublisher; -pub use event_stream::{EventStream, FileStream, WebhookStream}; -pub use event_sub::EventSubscriber; -pub use stream_pub::StreamPublisher; -pub use stream_sub::{StreamSubscriber, TypedBatchStream, TypedMessage, TypedMessageStream}; diff --git a/crates/nvisy-runtime/stream/stream_pub.rs b/crates/nvisy-runtime/stream/stream_pub.rs deleted file mode 100644 index 4dde9e0..0000000 --- a/crates/nvisy-runtime/stream/stream_pub.rs +++ /dev/null @@ -1,232 +0,0 @@ -//! Type-safe publisher for JetStream streams. - -use std::marker::PhantomData; -use std::sync::Arc; - -use async_nats::jetstream::{Context, stream}; -use serde::Serialize; -use tokio::sync::Semaphore; -use tracing::{debug, instrument}; - -use crate::{Error, Result, TRACING_TARGET_STREAM}; - -/// Inner data for StreamPublisher -#[derive(Debug)] -struct StreamPublisherInner { - jetstream: Context, - stream_name: String, -} - -/// Type-safe stream publisher with compile-time guarantees -/// -/// This publisher provides a generic interface over JetStream for a specific -/// serializable data type T, ensuring compile-time type safety for all publish -/// operations. The type parameter prevents mixing different message types. -#[derive(Debug, Clone)] -pub struct StreamPublisher { - inner: Arc, - _marker: PhantomData, -} - -impl StreamPublisher -where - T: Serialize + Send + Sync + 'static, -{ - /// Create a new type-safe stream publisher - #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] - pub(crate) async fn new(jetstream: &Context, stream_name: &str) -> Result { - let stream_config = stream::Config { - name: stream_name.to_string(), - description: Some(format!("Type-safe stream: {}", stream_name)), - subjects: vec![format!("{}.>", stream_name)], - max_age: std::time::Duration::from_secs(3600), // Keep messages for 1 hour - ..Default::default() - }; - - // Try to get existing stream first - match jetstream.get_stream(stream_name).await { - Ok(_) => { - debug!( - target: TRACING_TARGET_STREAM, - stream = %stream_name, - type_name = std::any::type_name::(), - "Using existing stream" - ); - } - Err(_) => { - // Stream doesn't exist, create it - debug!( - target: TRACING_TARGET_STREAM, - stream = %stream_name, - type_name = std::any::type_name::(), - max_age_secs = 3600, - "Creating new stream" - ); - jetstream - .create_stream(stream_config) - .await - .map_err(|e| Error::operation("stream_create", e.to_string()))?; - } - } - - Ok(Self { - inner: Arc::new(StreamPublisherInner { - jetstream: jetstream.clone(), - stream_name: stream_name.to_string(), - }), - _marker: PhantomData, - }) - } - - /// Publish an event to the stream - #[instrument(skip(self, event), target = TRACING_TARGET_STREAM)] - pub async fn publish(&self, subject: &str, event: &T) -> Result<()> { - let full_subject = format!("{}.{}", self.inner.stream_name, subject); - let payload = serde_json::to_vec(event).map_err(Error::Serialization)?; - let payload_size = payload.len(); - - self.inner - .jetstream - .publish(full_subject.clone(), payload.into()) - .await - .map_err(|e| Error::delivery_failed(&full_subject, e.to_string()))? - .await - .map_err(|e| Error::operation("stream_publish", e.to_string()))?; - - debug!( - target: TRACING_TARGET_STREAM, - subject = %full_subject, - payload_size = payload_size, - type_name = std::any::type_name::(), - "Published typed event" - ); - Ok(()) - } - - /// Publish multiple events in batch with parallel processing - #[instrument(skip(self, events), target = TRACING_TARGET_STREAM)] - pub async fn publish_batch(&self, subject: &str, events: &[T]) -> Result<()> - where - T: Clone, - { - self.publish_batch_parallel(subject, events, 10).await - } - - /// Publish multiple events in batch with configurable parallelism - #[instrument(skip(self, events), target = TRACING_TARGET_STREAM)] - pub async fn publish_batch_parallel( - &self, - subject: &str, - events: &[T], - parallelism: usize, - ) -> Result<()> - where - T: Clone, - { - if events.is_empty() { - return Ok(()); - } - - let count = events.len(); - let semaphore = Arc::new(Semaphore::new(parallelism)); - let mut tasks = Vec::with_capacity(events.len()); - - for event in events.iter() { - let event = event.clone(); - let subject = subject.to_string(); - let publisher = self.clone(); - let permit = semaphore.clone(); - - let task = tokio::spawn(async move { - let _permit = permit - .acquire() - .await - .map_err(|_| Error::operation("semaphore", "Failed to acquire permit"))?; - publisher.publish(&subject, &event).await - }); - - tasks.push(task); - } - - // Wait for all tasks and collect errors - let mut errors = Vec::new(); - for task in tasks { - match task.await { - Ok(Ok(())) => {} // Success - Ok(Err(e)) => errors.push(e), - Err(e) => errors.push(Error::operation("task_join", e.to_string())), - } - } - - if !errors.is_empty() { - return Err(Error::operation( - "batch_publish", - format!("Failed to publish {} out of {} events", errors.len(), count), - )); - } - - debug!( - target: TRACING_TARGET_STREAM, - count = count, - parallelism = parallelism, - stream = %self.inner.stream_name, - subject = %subject, - "Published batch of typed events in parallel" - ); - Ok(()) - } - - /// Get the stream name - pub fn stream_name(&self) -> &str { - &self.inner.stream_name - } - - /// Check if the stream is healthy and accessible - #[instrument(skip(self), target = TRACING_TARGET_STREAM)] - pub async fn health_check(&self) -> Result { - match self - .inner - .jetstream - .get_stream(&self.inner.stream_name) - .await - { - Ok(_) => { - debug!( - target: TRACING_TARGET_STREAM, - stream = %self.inner.stream_name, - "Stream health check passed" - ); - Ok(true) - } - Err(e) => { - debug!( - target: TRACING_TARGET_STREAM, - stream = %self.inner.stream_name, - error = %e, - "Stream health check failed" - ); - Ok(false) - } - } - } - - /// Get stream information - #[instrument(skip(self), target = TRACING_TARGET_STREAM)] - pub async fn stream_info(&self) -> Result { - let mut stream = self - .inner - .jetstream - .get_stream(&self.inner.stream_name) - .await - .map_err(|e| Error::stream_error(&self.inner.stream_name, e.to_string()))?; - - stream - .info() - .await - .map_err(|e| Error::operation("stream_info", e.to_string())) - .map(|info| (*info).clone()) - } -} - -#[cfg(test)] -mod tests {} diff --git a/crates/nvisy-runtime/stream/stream_sub.rs b/crates/nvisy-runtime/stream/stream_sub.rs deleted file mode 100644 index 458448d..0000000 --- a/crates/nvisy-runtime/stream/stream_sub.rs +++ /dev/null @@ -1,535 +0,0 @@ -//! Type-safe subscriber for JetStream streams. - -use std::marker::PhantomData; -use std::sync::Arc; - -use async_nats::jetstream::consumer::{self, Consumer}; -use async_nats::jetstream::{self, Context, Message}; -use futures::StreamExt; -use serde::de::DeserializeOwned; -use tracing::{debug, instrument, warn}; - -use crate::{Error, Result, TRACING_TARGET_STREAM}; - -/// Inner data for StreamSubscriber. -#[derive(Debug, Clone)] -struct StreamSubscriberInner { - jetstream: Context, - stream_name: String, - consumer_name: String, - filter_subject: Option, -} - -/// Type-safe stream subscriber with compile-time guarantees. -/// -/// This subscriber provides a generic interface over JetStream for a specific -/// deserializable data type T, ensuring compile-time type safety for all receive -/// operations. The type parameter prevents mixing different message types. -#[derive(Debug, Clone)] -pub struct StreamSubscriber { - inner: Arc, - _marker: PhantomData, -} - -impl StreamSubscriber -where - T: DeserializeOwned + Send + Sync + 'static, -{ - /// Create a new type-safe stream subscriber. - #[instrument(skip(jetstream), target = TRACING_TARGET_STREAM)] - pub(crate) async fn new( - jetstream: &Context, - stream_name: &str, - consumer_name: &str, - ) -> Result { - // Verify stream exists - jetstream - .get_stream(stream_name) - .await - .map_err(|e| Error::stream_error(stream_name, e.to_string()))?; - - debug!( - target: TRACING_TARGET_STREAM, - stream = %stream_name, - consumer = %consumer_name, - type_name = std::any::type_name::(), - "Created type-safe stream subscriber" - ); - - Ok(Self { - inner: Arc::new(StreamSubscriberInner { - jetstream: jetstream.clone(), - stream_name: stream_name.to_string(), - consumer_name: consumer_name.to_string(), - filter_subject: None, - }), - _marker: PhantomData, - }) - } - - /// Add a subject filter to the subscriber (builder pattern). - pub fn with_filter_subject(self, filter: impl Into) -> Self { - let mut inner = Arc::try_unwrap(self.inner).unwrap_or_else(|arc| (*arc).clone()); - inner.filter_subject = Some(filter.into()); - Self { - inner: Arc::new(inner), - _marker: PhantomData, - } - } - - /// Subscribe to the stream and get a typed message stream. - #[instrument(skip(self), target = TRACING_TARGET_STREAM)] - pub async fn subscribe(&self) -> Result> { - let mut consumer_config = consumer::pull::Config { - durable_name: Some(self.inner.consumer_name.clone()), - description: Some(format!("Consumer for stream {}", self.inner.stream_name)), - ack_policy: consumer::AckPolicy::Explicit, - ..Default::default() - }; - - if let Some(filter) = &self.inner.filter_subject { - consumer_config.filter_subject = filter.clone(); - } - - // Get or create consumer - let stream = self - .inner - .jetstream - .get_stream(&self.inner.stream_name) - .await - .map_err(|e| { - Error::stream_error( - &self.inner.stream_name, - format!("Failed to get stream: {}", e), - ) - })?; - - let consumer = stream - .get_or_create_consumer(&self.inner.consumer_name, consumer_config) - .await - .map_err(|e| { - Error::consumer_error( - &self.inner.consumer_name, - format!("Failed to create consumer: {}", e), - ) - })?; - - debug!( - target: TRACING_TARGET_STREAM, - stream = %self.inner.stream_name, - consumer = %self.inner.consumer_name, - "Subscribed to stream" - ); - - Ok(TypedMessageStream { - consumer, - _marker: PhantomData, - }) - } - - /// Subscribe with a batch size for fetching messages. - #[instrument(skip(self), target = TRACING_TARGET_STREAM)] - pub async fn subscribe_batch(&self, batch_size: usize) -> Result> { - let mut consumer_config = consumer::pull::Config { - durable_name: Some(self.inner.consumer_name.clone()), - description: Some(format!( - "Batch consumer for stream {}", - self.inner.stream_name - )), - ack_policy: consumer::AckPolicy::Explicit, - ..Default::default() - }; - - if let Some(filter) = &self.inner.filter_subject { - consumer_config.filter_subject = filter.clone(); - } - - let stream = self - .inner - .jetstream - .get_stream(&self.inner.stream_name) - .await - .map_err(|e| { - Error::stream_error( - &self.inner.stream_name, - format!("Failed to get stream: {}", e), - ) - })?; - - let consumer = stream - .get_or_create_consumer(&self.inner.consumer_name, consumer_config) - .await - .map_err(|e| { - Error::consumer_error( - &self.inner.consumer_name, - format!("Failed to create consumer: {}", e), - ) - })?; - - debug!( - target: TRACING_TARGET_STREAM, - stream = %self.inner.stream_name, - consumer = %self.inner.consumer_name, - batch_size = batch_size, - "Subscribed to stream with batching" - ); - - Ok(TypedBatchStream { - consumer, - batch_size, - _marker: PhantomData, - }) - } - - /// Get the stream name. - #[inline] - pub fn stream_name(&self) -> &str { - &self.inner.stream_name - } - - /// Get the consumer name. - #[inline] - pub fn consumer_name(&self) -> &str { - &self.inner.consumer_name - } - - /// Check if the stream and consumer are healthy and accessible. - #[instrument(skip(self), target = TRACING_TARGET_STREAM)] - pub async fn health_check(&self) -> Result { - match self - .inner - .jetstream - .get_stream(&self.inner.stream_name) - .await - { - Ok(stream) => match stream - .get_consumer::(&self.inner.consumer_name) - .await - { - Ok(_) => { - debug!( - target: TRACING_TARGET_STREAM, - stream = %self.inner.stream_name, - consumer = %self.inner.consumer_name, - "Subscriber health check passed" - ); - Ok(true) - } - Err(e) => { - debug!( - target: TRACING_TARGET_STREAM, - stream = %self.inner.stream_name, - consumer = %self.inner.consumer_name, - error = %e, - "Consumer health check failed" - ); - Ok(false) - } - }, - Err(e) => { - debug!( - target: TRACING_TARGET_STREAM, - stream = %self.inner.stream_name, - error = %e, - "Stream health check failed" - ); - Ok(false) - } - } - } - - /// Get consumer information. - #[instrument(skip(self), target = TRACING_TARGET_STREAM)] - pub async fn consumer_info(&self) -> Result { - let stream = self - .inner - .jetstream - .get_stream(&self.inner.stream_name) - .await - .map_err(|e| Error::stream_error(&self.inner.stream_name, e.to_string()))?; - - let mut consumer = stream - .get_consumer::(&self.inner.consumer_name) - .await - .map_err(|e| Error::consumer_error(&self.inner.consumer_name, e.to_string()))?; - - consumer - .info() - .await - .map_err(|e| Error::operation("consumer_info", e.to_string())) - .map(|info| (*info).clone()) - } -} - -/// Type-safe message stream wrapper. -pub struct TypedMessageStream { - consumer: Consumer, - _marker: PhantomData, -} - -impl TypedMessageStream -where - T: DeserializeOwned + Send + 'static, -{ - /// Fetch the next message from the stream with timeout. - pub async fn next_with_timeout( - &mut self, - timeout: std::time::Duration, - ) -> Result>> { - let result = tokio::time::timeout(timeout, self.next()).await; - match result { - Ok(msg_result) => msg_result, - Err(_) => Ok(None), // Timeout occurred - } - } - - /// Fetch the next message from the stream. - pub async fn next(&mut self) -> Result>> { - match self.consumer.messages().await { - Ok(mut messages) => { - if let Some(msg) = messages.next().await { - match msg { - Ok(message) => { - let payload: T = serde_json::from_slice(&message.payload)?; - - debug!( - target: TRACING_TARGET_STREAM, - subject = %message.subject, - "Received typed message" - ); - - Ok(Some(TypedMessage { payload, message })) - } - Err(e) => { - warn!( - target: TRACING_TARGET_STREAM, - error = %e, - "Error receiving message" - ); - Err(Error::operation("message_receive", e.to_string())) - } - } - } else { - Ok(None) - } - } - Err(e) => Err(Error::operation("messages_stream", e.to_string())), - } - } -} - -/// Type-safe batch message stream wrapper. -pub struct TypedBatchStream { - consumer: Consumer, - batch_size: usize, - _marker: PhantomData, -} - -impl TypedBatchStream -where - T: DeserializeOwned, -{ - /// Fetch the next batch of messages with timeout. - pub async fn next_batch_with_timeout( - &mut self, - timeout: std::time::Duration, - ) -> Result>> { - let result = tokio::time::timeout(timeout, self.next_batch()).await; - match result { - Ok(batch_result) => batch_result, - Err(_) => Ok(Vec::new()), // Timeout occurred, return empty batch - } - } - - /// Fetch the next batch of messages with custom batch size. - pub async fn next_batch_sized(&mut self, batch_size: usize) -> Result>> { - let mut batch = Vec::with_capacity(batch_size); - - match self - .consumer - .fetch() - .max_messages(batch_size) - .messages() - .await - { - Ok(mut messages) => { - while let Some(msg_result) = messages.next().await { - match msg_result { - Ok(message) => match serde_json::from_slice::(&message.payload) { - Ok(payload) => { - batch.push(TypedMessage { payload, message }); - } - Err(e) => { - warn!( - target: TRACING_TARGET_STREAM, - error = %e, - "Failed to deserialize message payload in custom batch" - ); - // Continue processing other messages - } - }, - Err(e) => { - warn!( - target: TRACING_TARGET_STREAM, - error = %e, - "Error receiving message in custom batch" - ); - } - } - } - - debug!( - target: TRACING_TARGET_STREAM, - batch_size = batch.len(), - requested_size = batch_size, - "Received custom-sized batch of typed messages" - ); - - Ok(batch) - } - Err(e) => Err(Error::operation("custom_batch_fetch", e.to_string())), - } - } - - /// Fetch the next batch of messages. - pub async fn next_batch(&mut self) -> Result>> { - let mut batch = Vec::with_capacity(self.batch_size); - - match self - .consumer - .fetch() - .max_messages(self.batch_size) - .messages() - .await - { - Ok(mut messages) => { - while let Some(msg_result) = messages.next().await { - match msg_result { - Ok(message) => match serde_json::from_slice::(&message.payload) { - Ok(payload) => { - batch.push(TypedMessage { payload, message }); - } - Err(e) => { - warn!( - target: TRACING_TARGET_STREAM, - error = %e, - "Failed to deserialize message payload" - ); - // Continue processing other messages - } - }, - Err(e) => { - warn!( - target: TRACING_TARGET_STREAM, - error = %e, - "Error receiving message in batch" - ); - } - } - } - - debug!( - target: TRACING_TARGET_STREAM, - batch_size = batch.len(), - "Received batch of typed messages" - ); - - Ok(batch) - } - Err(e) => Err(Error::operation("batch_fetch", e.to_string())), - } - } -} - -/// A typed message from the stream. -pub struct TypedMessage { - /// The deserialized payload. - pub payload: T, - /// The underlying NATS message for metadata and acknowledgment. - message: Message, -} - -impl TypedMessage { - /// Get the message subject. - pub fn subject(&self) -> &str { - &self.message.subject - } - - /// Get the message metadata. - pub fn info(&self) -> Result> { - self.message - .info() - .map_err(|e| Error::operation("message_info", e.to_string())) - } - - /// Acknowledge the message. - pub async fn ack(&mut self) -> Result<()> { - self.message - .ack() - .await - .map_err(|e| Error::operation("message_ack", e.to_string())) - } - - /// Negative acknowledge the message (trigger redelivery). - pub async fn nack(&mut self) -> Result<()> { - self.message - .ack_with(jetstream::AckKind::Nak(None)) - .await - .map_err(|e| Error::operation("message_nack", e.to_string())) - } - - /// Get a reference to the typed payload. - pub fn payload(&self) -> &T { - &self.payload - } - - /// Consume the message and return the payload. - pub fn into_payload(self) -> T { - self.payload - } - - /// Get message headers if available. - pub fn headers(&self) -> Option<&async_nats::HeaderMap> { - self.message.headers.as_ref() - } - - /// Get message sequence number. - pub fn sequence(&self) -> Result { - self.info() - .map(|info| info.stream_sequence) - .map_err(|e| Error::operation("get_sequence", e.to_string())) - } - - /// Check if this message is a redelivery. - pub fn is_redelivery(&self) -> Result { - self.info() - .map(|info| info.delivered > 1) - .map_err(|e| Error::operation("check_redelivery", e.to_string())) - } - - /// Get the number of delivery attempts. - pub fn delivery_count(&self) -> Result { - self.info() - .map(|info| info.delivered as usize) - .map_err(|e| Error::operation("get_delivery_count", e.to_string())) - } - - /// Acknowledge with explicit acknowledgment kind. - pub async fn ack_with(&mut self, ack_kind: jetstream::AckKind) -> Result<()> { - self.message - .ack_with(ack_kind) - .await - .map_err(|e| Error::operation("message_ack_with", e.to_string())) - } - - /// Double acknowledge (useful for at-least-once processing). - pub async fn double_ack(&mut self) -> Result<()> { - self.message - .double_ack() - .await - .map_err(|e| Error::operation("message_double_ack", e.to_string())) - } -} - -#[cfg(test)] -mod tests {} diff --git a/crates/nvisy-server/src/handler/request/pipelines.rs b/crates/nvisy-server/src/handler/request/pipelines.rs index b4426ac..cd86c01 100644 --- a/crates/nvisy-server/src/handler/request/pipelines.rs +++ b/crates/nvisy-server/src/handler/request/pipelines.rs @@ -6,7 +6,7 @@ use nvisy_postgres::model::{NewPipeline, UpdatePipeline as UpdatePipelineModel}; use nvisy_postgres::types::PipelineStatus; -use nvisy_runtime::definition::WorkflowDefinition; +use nvisy_runtime::definition::Workflow; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -65,7 +65,7 @@ pub struct UpdatePipeline { pub status: Option, /// New definition for the pipeline (strictly typed workflow definition). #[schemars(with = "Option")] - pub definition: Option, + pub definition: Option, } impl UpdatePipeline { diff --git a/crates/nvisy-server/src/handler/response/pipelines.rs b/crates/nvisy-server/src/handler/response/pipelines.rs index 3bccf41..86a460d 100644 --- a/crates/nvisy-server/src/handler/response/pipelines.rs +++ b/crates/nvisy-server/src/handler/response/pipelines.rs @@ -3,7 +3,7 @@ use jiff::Timestamp; use nvisy_postgres::model; use nvisy_postgres::types::PipelineStatus; -use nvisy_runtime::definition::WorkflowDefinition; +use nvisy_runtime::definition::Workflow; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -29,7 +29,7 @@ pub struct Pipeline { pub status: PipelineStatus, /// Pipeline definition (workflow graph). #[schemars(with = "serde_json::Value")] - pub definition: WorkflowDefinition, + pub definition: Workflow, /// Timestamp when the pipeline was created. pub created_at: Timestamp, /// Timestamp when the pipeline was last updated. @@ -39,7 +39,7 @@ pub struct Pipeline { impl Pipeline { /// Creates a new instance of [`Pipeline`] from the database model. pub fn from_model(pipeline: model::Pipeline) -> Self { - let definition: WorkflowDefinition = + let definition: Workflow = serde_json::from_value(pipeline.definition).unwrap_or_default(); Self { pipeline_id: pipeline.id, From 2fbaa4f83426195a2e750c9c67fe0b1d0e1ddabf Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Fri, 23 Jan 2026 08:53:30 +0100 Subject: [PATCH 20/28] refactor(dal): reorganize core traits with associated types and split providers - Refactor DataInput/DataOutput traits to use associated types for Item and Context - Split context.rs into separate files: ObjectContext, RelationalContext, VectorContext - Split stream.rs into input_stream.rs and output_stream.rs - Reorganize providers: keep struct in mod.rs, move DataInput impl to input.rs, DataOutput impl to output.rs - Vector DB providers (qdrant, pinecone, milvus, pgvector) only have output.rs (write-only) - Remove unused ProviderConfig enum - Update error.rs to use thiserror and add BoxError type alias - Update nvisy-runtime to use new trait structure - Add strum derive for ProviderCredentials.kind() - Clean up provider/mod.rs re-exports --- .github/{dependabot.yaml => dependabot.yml} | 0 Cargo.lock | 1 + crates/nvisy-dal/src/core/context.rs | 55 -------- crates/nvisy-dal/src/core/input_stream.rs | 47 +++++++ crates/nvisy-dal/src/core/mod.rs | 34 +++-- crates/nvisy-dal/src/core/object_context.rs | 37 ++++++ crates/nvisy-dal/src/core/output_stream.rs | 56 +++++++++ .../nvisy-dal/src/core/relational_context.rs | 45 +++++++ crates/nvisy-dal/src/core/stream.rs | 118 ----------------- crates/nvisy-dal/src/core/vector_context.rs | 21 ++++ crates/nvisy-dal/src/error.rs | 32 ++--- crates/nvisy-dal/src/lib.rs | 14 +-- crates/nvisy-dal/src/provider/azblob/input.rs | 58 +++++++++ crates/nvisy-dal/src/provider/azblob/mod.rs | 66 +--------- .../nvisy-dal/src/provider/azblob/output.rs | 23 ++++ crates/nvisy-dal/src/provider/config.rs | 32 ----- crates/nvisy-dal/src/provider/gcs/input.rs | 58 +++++++++ crates/nvisy-dal/src/provider/gcs/mod.rs | 66 +--------- crates/nvisy-dal/src/provider/gcs/output.rs | 23 ++++ crates/nvisy-dal/src/provider/milvus/mod.rs | 83 ++---------- .../nvisy-dal/src/provider/milvus/output.rs | 62 +++++++++ crates/nvisy-dal/src/provider/mod.rs | 18 +-- crates/nvisy-dal/src/provider/mysql/input.rs | 64 ++++++++++ crates/nvisy-dal/src/provider/mysql/mod.rs | 92 +------------- crates/nvisy-dal/src/provider/mysql/output.rs | 32 +++++ crates/nvisy-dal/src/provider/pgvector/mod.rs | 87 ++----------- .../nvisy-dal/src/provider/pgvector/output.rs | 65 ++++++++++ crates/nvisy-dal/src/provider/pinecone/mod.rs | 99 ++++----------- .../nvisy-dal/src/provider/pinecone/output.rs | 47 +++++++ .../nvisy-dal/src/provider/postgres/input.rs | 64 ++++++++++ crates/nvisy-dal/src/provider/postgres/mod.rs | 92 +------------- .../nvisy-dal/src/provider/postgres/output.rs | 32 +++++ crates/nvisy-dal/src/provider/qdrant/mod.rs | 119 +++++------------- .../nvisy-dal/src/provider/qdrant/output.rs | 53 ++++++++ crates/nvisy-dal/src/provider/s3/input.rs | 58 +++++++++ crates/nvisy-dal/src/provider/s3/mod.rs | 66 +--------- crates/nvisy-dal/src/provider/s3/output.rs | 23 ++++ crates/nvisy-runtime/Cargo.toml | 1 + crates/nvisy-runtime/src/engine/compiler.rs | 31 +++-- crates/nvisy-runtime/src/engine/context.rs | 51 +++++--- crates/nvisy-runtime/src/error.rs | 2 +- crates/nvisy-runtime/src/provider/inputs.rs | 42 +++++-- crates/nvisy-runtime/src/provider/mod.rs | 53 ++------ crates/nvisy-runtime/src/provider/outputs.rs | 41 +++--- 44 files changed, 1129 insertions(+), 1034 deletions(-) rename .github/{dependabot.yaml => dependabot.yml} (100%) delete mode 100644 crates/nvisy-dal/src/core/context.rs create mode 100644 crates/nvisy-dal/src/core/input_stream.rs create mode 100644 crates/nvisy-dal/src/core/object_context.rs create mode 100644 crates/nvisy-dal/src/core/output_stream.rs create mode 100644 crates/nvisy-dal/src/core/relational_context.rs delete mode 100644 crates/nvisy-dal/src/core/stream.rs create mode 100644 crates/nvisy-dal/src/core/vector_context.rs create mode 100644 crates/nvisy-dal/src/provider/azblob/input.rs create mode 100644 crates/nvisy-dal/src/provider/azblob/output.rs delete mode 100644 crates/nvisy-dal/src/provider/config.rs create mode 100644 crates/nvisy-dal/src/provider/gcs/input.rs create mode 100644 crates/nvisy-dal/src/provider/gcs/output.rs create mode 100644 crates/nvisy-dal/src/provider/milvus/output.rs create mode 100644 crates/nvisy-dal/src/provider/mysql/input.rs create mode 100644 crates/nvisy-dal/src/provider/mysql/output.rs create mode 100644 crates/nvisy-dal/src/provider/pgvector/output.rs create mode 100644 crates/nvisy-dal/src/provider/pinecone/output.rs create mode 100644 crates/nvisy-dal/src/provider/postgres/input.rs create mode 100644 crates/nvisy-dal/src/provider/postgres/output.rs create mode 100644 crates/nvisy-dal/src/provider/qdrant/output.rs create mode 100644 crates/nvisy-dal/src/provider/s3/input.rs create mode 100644 crates/nvisy-dal/src/provider/s3/output.rs diff --git a/.github/dependabot.yaml b/.github/dependabot.yml similarity index 100% rename from .github/dependabot.yaml rename to .github/dependabot.yml diff --git a/Cargo.lock b/Cargo.lock index d157c38..c70099e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3482,6 +3482,7 @@ dependencies = [ "semver", "serde", "serde_json", + "strum 0.27.2", "thiserror 2.0.18", "tokio", "tracing", diff --git a/crates/nvisy-dal/src/core/context.rs b/crates/nvisy-dal/src/core/context.rs deleted file mode 100644 index dc19ca5..0000000 --- a/crates/nvisy-dal/src/core/context.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! Context types for data operations. - -/// Context for data operations. -/// -/// Provides configuration for read/write operations including target, -/// pagination cursor, and limits. -#[derive(Debug, Clone, Default)] -pub struct Context { - /// Target collection, table, bucket, topic, etc. - pub target: Option, - /// Cursor for pagination (provider-specific format). - pub cursor: Option, - /// Maximum number of items to read. - pub limit: Option, -} - -impl Context { - /// Creates a new empty context. - pub fn new() -> Self { - Self::default() - } - - /// Sets the target. - pub fn with_target(mut self, target: impl Into) -> Self { - self.target = Some(target.into()); - self - } - - /// Sets the cursor for pagination. - pub fn with_cursor(mut self, cursor: impl Into) -> Self { - self.cursor = Some(cursor.into()); - self - } - - /// Sets the limit. - pub fn with_limit(mut self, limit: usize) -> Self { - self.limit = Some(limit); - self - } - - /// Returns the target, if set. - pub fn target(&self) -> Option<&str> { - self.target.as_deref() - } - - /// Returns the cursor, if set. - pub fn cursor(&self) -> Option<&str> { - self.cursor.as_deref() - } - - /// Returns the limit, if set. - pub fn limit(&self) -> Option { - self.limit - } -} diff --git a/crates/nvisy-dal/src/core/input_stream.rs b/crates/nvisy-dal/src/core/input_stream.rs new file mode 100644 index 0000000..5b015eb --- /dev/null +++ b/crates/nvisy-dal/src/core/input_stream.rs @@ -0,0 +1,47 @@ +//! Input stream types for reading data. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use futures::Stream; +use futures::stream::BoxStream; + +use crate::Result; + +/// A boxed stream of items with a lifetime. +pub type ItemStream<'a, T> = BoxStream<'a, Result>; + +/// Input stream wrapper for reading data. +pub struct InputStream { + stream: ItemStream<'static, T>, +} + +impl InputStream { + /// Creates a new input stream. + pub fn new(stream: ItemStream<'static, T>) -> Self { + Self { stream } + } + + /// Consumes the stream and returns the inner boxed stream. + pub fn into_inner(self) -> ItemStream<'static, T> { + self.stream + } +} + +impl Stream for InputStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.stream).poll_next(cx) + } + + fn size_hint(&self) -> (usize, Option) { + self.stream.size_hint() + } +} + +impl std::fmt::Debug for InputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("InputStream").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/core/mod.rs b/crates/nvisy-dal/src/core/mod.rs index a8fcceb..499eaa5 100644 --- a/crates/nvisy-dal/src/core/mod.rs +++ b/crates/nvisy-dal/src/core/mod.rs @@ -1,31 +1,43 @@ //! Core types and traits for data operations. -mod context; -mod stream; +mod input_stream; +mod object_context; +mod output_stream; +mod relational_context; +mod vector_context; -pub use context::Context; -pub use stream::{InputStream, ItemSink, ItemStream, OutputStream}; +pub use input_stream::{InputStream, ItemStream}; +pub use object_context::ObjectContext; +pub use output_stream::{ItemSink, OutputStream}; +pub use relational_context::RelationalContext; +pub use vector_context::VectorContext; use crate::Result; -use crate::datatype::DataType; /// Trait for reading data from a source. /// /// Implementations provide streaming access to data with optional pagination. #[async_trait::async_trait] -pub trait DataInput: Send + Sync { +pub trait DataInput: Send + Sync { + /// The item type produced by this provider. + type Item; + /// The context type for read operations. + type Context; + /// Reads items from the source. /// - /// Returns an input stream containing items and an optional cursor - /// for pagination. - async fn read(&self, ctx: &Context) -> Result>; + /// Returns an input stream containing items. + async fn read(&self, ctx: &Self::Context) -> Result>; } /// Trait for writing data to a sink. /// /// Implementations accept batches of items for writing. #[async_trait::async_trait] -pub trait DataOutput: Send + Sync { +pub trait DataOutput: Send + Sync { + /// The item type accepted by this provider. + type Item; + /// Writes a batch of items to the sink. - async fn write(&self, ctx: &Context, items: Vec) -> Result<()>; + async fn write(&self, items: Vec) -> Result<()>; } diff --git a/crates/nvisy-dal/src/core/object_context.rs b/crates/nvisy-dal/src/core/object_context.rs new file mode 100644 index 0000000..ae92795 --- /dev/null +++ b/crates/nvisy-dal/src/core/object_context.rs @@ -0,0 +1,37 @@ +//! Context for object storage operations. + +/// Context for object storage operations (S3, GCS, Azure Blob). +#[derive(Debug, Clone, Default)] +pub struct ObjectContext { + /// Path prefix for listing objects. + pub prefix: Option, + /// Continuation token for pagination. + pub token: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +impl ObjectContext { + /// Creates a new empty context. + pub fn new() -> Self { + Self::default() + } + + /// Sets the prefix. + pub fn with_prefix(mut self, prefix: impl Into) -> Self { + self.prefix = Some(prefix.into()); + self + } + + /// Sets the continuation token. + pub fn with_token(mut self, token: impl Into) -> Self { + self.token = Some(token.into()); + self + } + + /// Sets the limit. + pub fn with_limit(mut self, limit: usize) -> Self { + self.limit = Some(limit); + self + } +} diff --git a/crates/nvisy-dal/src/core/output_stream.rs b/crates/nvisy-dal/src/core/output_stream.rs new file mode 100644 index 0000000..a1676a4 --- /dev/null +++ b/crates/nvisy-dal/src/core/output_stream.rs @@ -0,0 +1,56 @@ +//! Output stream types for writing data. + +use std::pin::Pin; +use std::task::{Context, Poll}; + +use futures::Sink; + +use crate::Error; + +/// A boxed sink for items with a lifetime. +pub type ItemSink<'a, T> = Pin + Send + 'a>>; + +/// Output stream wrapper for writing data. +/// +/// Wraps a boxed sink for streaming writes. +pub struct OutputStream { + sink: ItemSink<'static, T>, +} + +impl OutputStream { + /// Creates a new output stream. + pub fn new(sink: ItemSink<'static, T>) -> Self { + Self { sink } + } + + /// Consumes the stream and returns the inner boxed sink. + pub fn into_inner(self) -> ItemSink<'static, T> { + self.sink + } +} + +impl Sink for OutputStream { + type Error = Error; + + fn poll_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_ready(cx) + } + + fn start_send(mut self: Pin<&mut Self>, item: T) -> Result<(), Self::Error> { + self.sink.as_mut().start_send(item) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_flush(cx) + } + + fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + self.sink.as_mut().poll_close(cx) + } +} + +impl std::fmt::Debug for OutputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OutputStream").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/core/relational_context.rs b/crates/nvisy-dal/src/core/relational_context.rs new file mode 100644 index 0000000..db0744d --- /dev/null +++ b/crates/nvisy-dal/src/core/relational_context.rs @@ -0,0 +1,45 @@ +//! Context for relational database operations. + +/// Context for relational database operations (Postgres, MySQL). +#[derive(Debug, Clone, Default)] +pub struct RelationalContext { + /// Target table name. + pub table: Option, + /// Last seen cursor value (for keyset pagination). + pub cursor: Option, + /// Tiebreaker value for resolving cursor conflicts. + pub tiebreaker: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +impl RelationalContext { + /// Creates a new empty context. + pub fn new() -> Self { + Self::default() + } + + /// Sets the table name. + pub fn with_table(mut self, table: impl Into) -> Self { + self.table = Some(table.into()); + self + } + + /// Sets the cursor value. + pub fn with_cursor(mut self, cursor: impl Into) -> Self { + self.cursor = Some(cursor.into()); + self + } + + /// Sets the tiebreaker value. + pub fn with_tiebreaker(mut self, tiebreaker: impl Into) -> Self { + self.tiebreaker = Some(tiebreaker.into()); + self + } + + /// Sets the limit. + pub fn with_limit(mut self, limit: usize) -> Self { + self.limit = Some(limit); + self + } +} diff --git a/crates/nvisy-dal/src/core/stream.rs b/crates/nvisy-dal/src/core/stream.rs deleted file mode 100644 index 60a599c..0000000 --- a/crates/nvisy-dal/src/core/stream.rs +++ /dev/null @@ -1,118 +0,0 @@ -//! Stream types for data input and output operations. - -use std::pin::Pin; -use std::task::{Context, Poll}; - -use futures::stream::BoxStream; -use futures::{Sink, Stream}; - -use crate::Result; - -/// A boxed stream of items. -pub type ItemStream<'a, T> = BoxStream<'a, Result>; - -/// Input stream wrapper for reading data. -/// -/// Wraps a boxed stream and provides a cursor for pagination. -pub struct InputStream<'a, T> { - stream: ItemStream<'a, T>, - cursor: Option, -} - -impl<'a, T> InputStream<'a, T> { - /// Creates a new input stream. - pub fn new(stream: ItemStream<'a, T>) -> Self { - Self { - stream, - cursor: None, - } - } - - /// Creates a new input stream with a cursor. - pub fn with_cursor(stream: ItemStream<'a, T>, cursor: Option) -> Self { - Self { stream, cursor } - } - - /// Returns the cursor for the next read, if any. - pub fn cursor(&self) -> Option<&str> { - self.cursor.as_deref() - } - - /// Consumes the stream and returns the inner boxed stream. - pub fn into_inner(self) -> ItemStream<'a, T> { - self.stream - } - - /// Consumes the stream and returns both the inner stream and cursor. - pub fn into_parts(self) -> (ItemStream<'a, T>, Option) { - (self.stream, self.cursor) - } -} - -impl Stream for InputStream<'_, T> { - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.stream).poll_next(cx) - } - - fn size_hint(&self) -> (usize, Option) { - self.stream.size_hint() - } -} - -/// A boxed sink for items. -pub type ItemSink<'a, T> = Pin + Send + 'a>>; - -/// Output stream wrapper for writing data. -/// -/// Wraps a boxed sink for streaming writes. -pub struct OutputStream<'a, T> { - sink: ItemSink<'a, T>, -} - -impl<'a, T> OutputStream<'a, T> { - /// Creates a new output stream. - pub fn new(sink: ItemSink<'a, T>) -> Self { - Self { sink } - } - - /// Consumes the stream and returns the inner boxed sink. - pub fn into_inner(self) -> ItemSink<'a, T> { - self.sink - } -} - -impl Sink for OutputStream<'_, T> { - type Error = crate::Error; - - fn poll_ready(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.sink.as_mut().poll_ready(cx) - } - - fn start_send(mut self: Pin<&mut Self>, item: T) -> Result<()> { - self.sink.as_mut().start_send(item) - } - - fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.sink.as_mut().poll_flush(cx) - } - - fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.sink.as_mut().poll_close(cx) - } -} - -impl std::fmt::Debug for OutputStream<'_, T> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("OutputStream").finish_non_exhaustive() - } -} - -impl std::fmt::Debug for InputStream<'_, T> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("InputStream") - .field("cursor", &self.cursor) - .finish_non_exhaustive() - } -} diff --git a/crates/nvisy-dal/src/core/vector_context.rs b/crates/nvisy-dal/src/core/vector_context.rs new file mode 100644 index 0000000..73ddc22 --- /dev/null +++ b/crates/nvisy-dal/src/core/vector_context.rs @@ -0,0 +1,21 @@ +//! Context for vector database operations. + +/// Context for vector database operations (Qdrant, Pinecone, Milvus, pgvector). +#[derive(Debug, Clone, Default)] +pub struct VectorContext { + /// Target collection name. + pub collection: Option, +} + +impl VectorContext { + /// Creates a new empty context. + pub fn new() -> Self { + Self::default() + } + + /// Sets the collection name. + pub fn with_collection(mut self, collection: impl Into) -> Self { + self.collection = Some(collection.into()); + self + } +} diff --git a/crates/nvisy-dal/src/error.rs b/crates/nvisy-dal/src/error.rs index 211df57..6cd112e 100644 --- a/crates/nvisy-dal/src/error.rs +++ b/crates/nvisy-dal/src/error.rs @@ -1,16 +1,21 @@ //! Error types for data operations. -use std::fmt; +use thiserror::Error; + +/// Boxed error type for dynamic error handling. +pub type BoxError = Box; /// Result type for data operations. -pub type Result = std::result::Result; +pub type Result = std::result::Result; /// Error type for data operations. -#[derive(Debug)] +#[derive(Debug, Error)] +#[error("{kind}: {message}")] pub struct Error { kind: ErrorKind, message: String, - source: Option>, + #[source] + source: Option, } /// The kind of data error. @@ -68,16 +73,13 @@ impl Error { } } -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.message) - } -} - -impl std::error::Error for Error { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - self.source - .as_ref() - .map(|e| e.as_ref() as &(dyn std::error::Error + 'static)) +impl std::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Connection => write!(f, "connection"), + Self::NotFound => write!(f, "not found"), + Self::InvalidInput => write!(f, "invalid input"), + Self::Provider => write!(f, "provider"), + } } } diff --git a/crates/nvisy-dal/src/lib.rs b/crates/nvisy-dal/src/lib.rs index 4ef1900..f492c1a 100644 --- a/crates/nvisy-dal/src/lib.rs +++ b/crates/nvisy-dal/src/lib.rs @@ -12,13 +12,9 @@ pub mod provider; mod error; -pub use core::{Context, DataInput, DataOutput, InputStream, ItemSink, ItemStream, OutputStream}; - +pub use core::{ + DataInput, DataOutput, InputStream, ItemSink, ItemStream, ObjectContext, OutputStream, + RelationalContext, VectorContext, +}; pub use datatype::{AnyDataValue, DataTypeId}; -pub use error::{Error, ErrorKind, Result}; -pub use provider::ProviderConfig; - -/// Alias for backwards compatibility with nvisy-opendal. -pub type StorageError = Error; -/// Alias for backwards compatibility. -pub type StorageConfig = ProviderConfig; +pub use error::{BoxError, Error, ErrorKind, Result}; diff --git a/crates/nvisy-dal/src/provider/azblob/input.rs b/crates/nvisy-dal/src/provider/azblob/input.rs new file mode 100644 index 0000000..4272019 --- /dev/null +++ b/crates/nvisy-dal/src/provider/azblob/input.rs @@ -0,0 +1,58 @@ +//! Azure Blob DataInput implementation. + +use async_trait::async_trait; +use futures::StreamExt; + +use super::AzblobProvider; +use crate::core::{DataInput, InputStream, ObjectContext}; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataInput for AzblobProvider { + type Item = Blob; + type Context = ObjectContext; + + async fn read(&self, ctx: &ObjectContext) -> Result> { + let prefix = ctx.prefix.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let path = entry.path().to_string(); + if path.ends_with('/') { + return None; + } + + match op.read(&path).await { + Ok(data) => { + let mut blob = Blob::new(path.clone(), data.to_bytes()); + if let Ok(meta) = op.stat(&path).await + && let Some(ct) = meta.content_type() + { + blob = blob.with_content_type(ct); + } + Some(Ok(blob)) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} diff --git a/crates/nvisy-dal/src/provider/azblob/mod.rs b/crates/nvisy-dal/src/provider/azblob/mod.rs index a0300ec..2ba7f87 100644 --- a/crates/nvisy-dal/src/provider/azblob/mod.rs +++ b/crates/nvisy-dal/src/provider/azblob/mod.rs @@ -1,14 +1,13 @@ //! Azure Blob Storage provider. mod config; +mod input; +mod output; -use async_trait::async_trait; pub use config::AzblobConfig; -use futures::StreamExt; + use opendal::{Operator, services}; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Blob; use crate::error::{Error, Result}; /// Azure Blob Storage provider for blob storage. @@ -44,65 +43,6 @@ impl AzblobProvider { } } -#[async_trait] -impl DataInput for AzblobProvider { - async fn read(&self, ctx: &Context) -> Result> { - let prefix = ctx.target.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let path = entry.path().to_string(); - if path.ends_with('/') { - return None; - } - - match op.read(&path).await { - Ok(data) => { - let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await - && let Some(ct) = meta.content_type() - { - blob = blob.with_content_type(ct); - } - Some(Ok(blob)) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} - -#[async_trait] -impl DataOutput for AzblobProvider { - async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { - for blob in items { - self.operator - .write(&blob.path, blob.data) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) - } -} - impl std::fmt::Debug for AzblobProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("AzblobProvider").finish() diff --git a/crates/nvisy-dal/src/provider/azblob/output.rs b/crates/nvisy-dal/src/provider/azblob/output.rs new file mode 100644 index 0000000..49fa07e --- /dev/null +++ b/crates/nvisy-dal/src/provider/azblob/output.rs @@ -0,0 +1,23 @@ +//! Azure Blob DataOutput implementation. + +use async_trait::async_trait; + +use super::AzblobProvider; +use crate::core::DataOutput; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for AzblobProvider { + type Item = Blob; + + async fn write(&self, items: Vec) -> Result<()> { + for blob in items { + self.operator + .write(&blob.path, blob.data) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} diff --git a/crates/nvisy-dal/src/provider/config.rs b/crates/nvisy-dal/src/provider/config.rs deleted file mode 100644 index e74769f..0000000 --- a/crates/nvisy-dal/src/provider/config.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! Provider configuration types. - -use serde::{Deserialize, Serialize}; - -use super::{AzblobConfig, GcsConfig, MysqlConfig, PostgresConfig, S3Config}; -use crate::datatype::DataTypeId; - -/// Unified provider configuration for different backends. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub enum ProviderConfig { - /// Amazon S3 storage. - S3(S3Config), - /// Google Cloud Storage. - Gcs(GcsConfig), - /// Azure Blob Storage. - Azblob(AzblobConfig), - /// PostgreSQL database. - Postgres(PostgresConfig), - /// MySQL database. - Mysql(MysqlConfig), -} - -impl ProviderConfig { - /// Returns the output data type for this provider. - pub const fn output_type(&self) -> DataTypeId { - match self { - Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, - Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, - } - } -} diff --git a/crates/nvisy-dal/src/provider/gcs/input.rs b/crates/nvisy-dal/src/provider/gcs/input.rs new file mode 100644 index 0000000..7d23b57 --- /dev/null +++ b/crates/nvisy-dal/src/provider/gcs/input.rs @@ -0,0 +1,58 @@ +//! GCS DataInput implementation. + +use async_trait::async_trait; +use futures::StreamExt; + +use super::GcsProvider; +use crate::core::{DataInput, InputStream, ObjectContext}; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataInput for GcsProvider { + type Item = Blob; + type Context = ObjectContext; + + async fn read(&self, ctx: &ObjectContext) -> Result> { + let prefix = ctx.prefix.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let path = entry.path().to_string(); + if path.ends_with('/') { + return None; + } + + match op.read(&path).await { + Ok(data) => { + let mut blob = Blob::new(path.clone(), data.to_bytes()); + if let Ok(meta) = op.stat(&path).await + && let Some(ct) = meta.content_type() + { + blob = blob.with_content_type(ct); + } + Some(Ok(blob)) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} diff --git a/crates/nvisy-dal/src/provider/gcs/mod.rs b/crates/nvisy-dal/src/provider/gcs/mod.rs index 9746bdd..49e379b 100644 --- a/crates/nvisy-dal/src/provider/gcs/mod.rs +++ b/crates/nvisy-dal/src/provider/gcs/mod.rs @@ -1,14 +1,13 @@ //! Google Cloud Storage provider. mod config; +mod input; +mod output; -use async_trait::async_trait; pub use config::GcsConfig; -use futures::StreamExt; + use opendal::{Operator, services}; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Blob; use crate::error::{Error, Result}; /// Google Cloud Storage provider for blob storage. @@ -38,65 +37,6 @@ impl GcsProvider { } } -#[async_trait] -impl DataInput for GcsProvider { - async fn read(&self, ctx: &Context) -> Result> { - let prefix = ctx.target.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let path = entry.path().to_string(); - if path.ends_with('/') { - return None; - } - - match op.read(&path).await { - Ok(data) => { - let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await - && let Some(ct) = meta.content_type() - { - blob = blob.with_content_type(ct); - } - Some(Ok(blob)) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} - -#[async_trait] -impl DataOutput for GcsProvider { - async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { - for blob in items { - self.operator - .write(&blob.path, blob.data) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) - } -} - impl std::fmt::Debug for GcsProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("GcsProvider").finish() diff --git a/crates/nvisy-dal/src/provider/gcs/output.rs b/crates/nvisy-dal/src/provider/gcs/output.rs new file mode 100644 index 0000000..73c79bd --- /dev/null +++ b/crates/nvisy-dal/src/provider/gcs/output.rs @@ -0,0 +1,23 @@ +//! GCS DataOutput implementation. + +use async_trait::async_trait; + +use super::GcsProvider; +use crate::core::DataOutput; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for GcsProvider { + type Item = Blob; + + async fn write(&self, items: Vec) -> Result<()> { + for blob in items { + self.operator + .write(&blob.path, blob.data) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} diff --git a/crates/nvisy-dal/src/provider/milvus/mod.rs b/crates/nvisy-dal/src/provider/milvus/mod.rs index b623259..1ad257b 100644 --- a/crates/nvisy-dal/src/provider/milvus/mod.rs +++ b/crates/nvisy-dal/src/provider/milvus/mod.rs @@ -1,27 +1,23 @@ //! Milvus vector store provider. mod config; +mod output; use std::borrow::Cow; use std::collections::HashMap; -use async_trait::async_trait; pub use config::MilvusConfig; use milvus::client::Client; use milvus::collection::SearchOption; -use milvus::data::FieldColumn; use milvus::index::{IndexParams, IndexType, MetricType}; use milvus::schema::{CollectionSchemaBuilder, FieldSchema}; -use milvus::value::{Value, ValueVec}; +use milvus::value::Value; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Embedding; use crate::error::{Error, Result}; /// Milvus provider for vector storage. pub struct MilvusProvider { client: Client, - #[allow(dead_code)] config: MilvusConfig, } @@ -40,8 +36,13 @@ impl MilvusProvider { }) } + /// Returns the configured collection name. + pub fn collection(&self) -> Option<&str> { + self.config.collection.as_deref() + } + /// Ensures a collection exists, creating it if necessary. - async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { + pub(crate) async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { let exists = self .client .has_collection(name) @@ -52,7 +53,6 @@ impl MilvusProvider { return Ok(()); } - // Build the collection schema let mut builder = CollectionSchemaBuilder::new(name, "Vector collection"); builder.add_field(FieldSchema::new_primary_int64("_id", "primary key", true)); builder.add_field(FieldSchema::new_varchar("id", "string id", 256)); @@ -67,13 +67,11 @@ impl MilvusProvider { .build() .map_err(|e| Error::provider(e.to_string()))?; - // Create the collection self.client .create_collection(schema, None) .await .map_err(|e| Error::provider(e.to_string()))?; - // Create index on vector field let index_params = IndexParams::new( "vector_index".to_string(), IndexType::IvfFlat, @@ -92,7 +90,6 @@ impl MilvusProvider { .await .map_err(|e| Error::provider(e.to_string()))?; - // Load collection into memory collection .load(1) .await @@ -143,7 +140,6 @@ impl MilvusProvider { let score = result.score.get(i).copied().unwrap_or(0.0); - // Extract metadata from fields let metadata_str = result .field .iter() @@ -158,7 +154,6 @@ impl MilvusProvider { .and_then(|s| serde_json::from_str(&s).ok()) .unwrap_or_default(); - // Get string id if available let string_id = result .field .iter() @@ -196,68 +191,6 @@ pub struct SearchResult { pub metadata: HashMap, } -#[async_trait] -impl DataOutput for MilvusProvider { - async fn write(&self, ctx: &Context, items: Vec) -> Result<()> { - if items.is_empty() { - return Ok(()); - } - - let collection = ctx - .target - .as_deref() - .ok_or_else(|| Error::invalid_input("Collection name required in context.target"))?; - - // Get the dimension from the first vector - let dim = items.first().map(|v| v.vector.len()).unwrap_or(0); - - // Ensure collection exists - self.ensure_collection(collection, dim).await?; - - let coll = self - .client - .get_collection(collection) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let ids: Vec = items.iter().map(|v| v.id.clone()).collect(); - let embeddings: Vec = items - .iter() - .flat_map(|v| v.vector.iter().copied()) - .collect(); - let metadata: Vec = items - .iter() - .map(|v| serde_json::to_string(&v.metadata).unwrap_or_default()) - .collect(); - - // Create field schemas for columns - let id_schema = FieldSchema::new_varchar("id", "string id", 256); - let vector_schema = FieldSchema::new_float_vector("vector", "embedding vector", dim as i64); - let metadata_schema = FieldSchema::new_varchar("metadata", "json metadata", 65535); - - let columns = vec![ - FieldColumn::new(&id_schema, ValueVec::String(ids)), - FieldColumn::new(&vector_schema, ValueVec::Float(embeddings)), - FieldColumn::new(&metadata_schema, ValueVec::String(metadata)), - ]; - - coll.insert(columns, None) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - Ok(()) - } -} - -#[async_trait] -impl DataInput for MilvusProvider { - async fn read(&self, _ctx: &Context) -> Result> { - // Vector stores are primarily write/search, not sequential read - let stream = futures::stream::empty(); - Ok(InputStream::new(Box::pin(stream))) - } -} - impl std::fmt::Debug for MilvusProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("MilvusProvider").finish() diff --git a/crates/nvisy-dal/src/provider/milvus/output.rs b/crates/nvisy-dal/src/provider/milvus/output.rs new file mode 100644 index 0000000..59ff992 --- /dev/null +++ b/crates/nvisy-dal/src/provider/milvus/output.rs @@ -0,0 +1,62 @@ +//! Milvus DataOutput implementation. + +use async_trait::async_trait; +use milvus::data::FieldColumn; +use milvus::schema::FieldSchema; +use milvus::value::ValueVec; + +use super::MilvusProvider; +use crate::core::DataOutput; +use crate::datatype::Embedding; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for MilvusProvider { + type Item = Embedding; + + async fn write(&self, items: Vec) -> Result<()> { + if items.is_empty() { + return Ok(()); + } + + let collection = self + .collection() + .ok_or_else(|| Error::invalid_input("Collection name required in provider config"))?; + + let dim = items.first().map(|v| v.vector.len()).unwrap_or(0); + + self.ensure_collection(collection, dim).await?; + + let coll = self + .client + .get_collection(collection) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let ids: Vec = items.iter().map(|v| v.id.clone()).collect(); + let embeddings: Vec = items + .iter() + .flat_map(|v| v.vector.iter().copied()) + .collect(); + let metadata: Vec = items + .iter() + .map(|v| serde_json::to_string(&v.metadata).unwrap_or_default()) + .collect(); + + let id_schema = FieldSchema::new_varchar("id", "string id", 256); + let vector_schema = FieldSchema::new_float_vector("vector", "embedding vector", dim as i64); + let metadata_schema = FieldSchema::new_varchar("metadata", "json metadata", 65535); + + let columns = vec![ + FieldColumn::new(&id_schema, ValueVec::String(ids)), + FieldColumn::new(&vector_schema, ValueVec::Float(embeddings)), + FieldColumn::new(&metadata_schema, ValueVec::String(metadata)), + ]; + + coll.insert(columns, None) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + Ok(()) + } +} diff --git a/crates/nvisy-dal/src/provider/mod.rs b/crates/nvisy-dal/src/provider/mod.rs index 578d165..9771422 100644 --- a/crates/nvisy-dal/src/provider/mod.rs +++ b/crates/nvisy-dal/src/provider/mod.rs @@ -1,30 +1,18 @@ //! Data providers for various storage backends. -// Storage providers (OpenDAL-based) mod azblob; mod gcs; -mod s3; - -// Database providers (OpenDAL-based) -mod mysql; -mod postgres; - -// Vector providers mod milvus; +mod mysql; mod pgvector; mod pinecone; +mod postgres; mod qdrant; +mod s3; -mod config; - -// Re-export storage providers pub use azblob::{AzblobConfig, AzblobProvider}; -// Re-export unified config -pub use config::ProviderConfig; pub use gcs::{GcsConfig, GcsProvider}; -// Re-export vector providers pub use milvus::{MilvusConfig, MilvusProvider}; -// Re-export database providers pub use mysql::{MysqlConfig, MysqlProvider}; pub use pgvector::{DistanceMetric, IndexType, PgVectorConfig, PgVectorProvider}; pub use pinecone::{PineconeConfig, PineconeProvider}; diff --git a/crates/nvisy-dal/src/provider/mysql/input.rs b/crates/nvisy-dal/src/provider/mysql/input.rs new file mode 100644 index 0000000..9cc2d75 --- /dev/null +++ b/crates/nvisy-dal/src/provider/mysql/input.rs @@ -0,0 +1,64 @@ +//! MySQL DataInput implementation. + +use std::collections::HashMap; + +use async_trait::async_trait; +use futures::StreamExt; + +use super::MysqlProvider; +use crate::core::{DataInput, InputStream, RelationalContext}; +use crate::datatype::Record; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataInput for MysqlProvider { + type Item = Record; + type Context = RelationalContext; + + async fn read(&self, ctx: &RelationalContext) -> Result> { + let prefix = ctx.table.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let key = entry.path().to_string(); + match op.read(&key).await { + Ok(data) => { + let value: serde_json::Value = + serde_json::from_slice(&data.to_bytes()) + .unwrap_or(serde_json::json!({})); + + let columns: HashMap = + if let serde_json::Value::Object(map) = value { + map.into_iter().collect() + } else { + let mut cols = HashMap::new(); + cols.insert("_key".to_string(), serde_json::json!(key)); + cols.insert("_value".to_string(), value); + cols + }; + + Some(Ok(Record::from_columns(columns))) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} diff --git a/crates/nvisy-dal/src/provider/mysql/mod.rs b/crates/nvisy-dal/src/provider/mysql/mod.rs index 9ecd4c8..b1b8343 100644 --- a/crates/nvisy-dal/src/provider/mysql/mod.rs +++ b/crates/nvisy-dal/src/provider/mysql/mod.rs @@ -1,24 +1,19 @@ -//! MySQL provider via OpenDAL. +//! MySQL provider. mod config; +mod input; +mod output; -use std::collections::HashMap; - -use async_trait::async_trait; pub use config::MysqlConfig; -use futures::StreamExt; + use opendal::{Operator, services}; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Record; use crate::error::{Error, Result}; /// MySQL provider for relational data. #[derive(Clone)] pub struct MysqlProvider { operator: Operator, - #[allow(dead_code)] - config: MysqlConfig, } impl MysqlProvider { @@ -38,84 +33,7 @@ impl MysqlProvider { .map(|op| op.finish()) .map_err(|e| Error::connection(e.to_string()))?; - Ok(Self { - operator, - config: config.clone(), - }) - } -} - -#[async_trait] -impl DataInput for MysqlProvider { - async fn read(&self, ctx: &Context) -> Result> { - let prefix = ctx.target.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let key = entry.path().to_string(); - match op.read(&key).await { - Ok(data) => { - // Parse the value as JSON to get columns - let value: serde_json::Value = - serde_json::from_slice(&data.to_bytes()) - .unwrap_or(serde_json::json!({})); - - let columns: HashMap = - if let serde_json::Value::Object(map) = value { - map.into_iter().collect() - } else { - let mut cols = HashMap::new(); - cols.insert("_key".to_string(), serde_json::json!(key)); - cols.insert("_value".to_string(), value); - cols - }; - - Some(Ok(Record::from_columns(columns))) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} - -#[async_trait] -impl DataOutput for MysqlProvider { - async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { - for record in items { - // Use _key column as the key, or generate one - let key = record - .get("_key") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) - .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); - - let value = - serde_json::to_vec(&record.columns).map_err(|e| Error::provider(e.to_string()))?; - - self.operator - .write(&key, value) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) + Ok(Self { operator }) } } diff --git a/crates/nvisy-dal/src/provider/mysql/output.rs b/crates/nvisy-dal/src/provider/mysql/output.rs new file mode 100644 index 0000000..568668e --- /dev/null +++ b/crates/nvisy-dal/src/provider/mysql/output.rs @@ -0,0 +1,32 @@ +//! MySQL DataOutput implementation. + +use async_trait::async_trait; + +use super::MysqlProvider; +use crate::core::DataOutput; +use crate::datatype::Record; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for MysqlProvider { + type Item = Record; + + async fn write(&self, items: Vec) -> Result<()> { + for record in items { + let key = record + .get("_key") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); + + let value = + serde_json::to_vec(&record.columns).map_err(|e| Error::provider(e.to_string()))?; + + self.operator + .write(&key, value) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} diff --git a/crates/nvisy-dal/src/provider/pgvector/mod.rs b/crates/nvisy-dal/src/provider/pgvector/mod.rs index 56438d5..0254a8a 100644 --- a/crates/nvisy-dal/src/provider/pgvector/mod.rs +++ b/crates/nvisy-dal/src/provider/pgvector/mod.rs @@ -1,10 +1,10 @@ //! PostgreSQL pgvector provider. mod config; +mod output; use std::collections::HashMap; -use async_trait::async_trait; pub use config::{DistanceMetric, IndexType, PgVectorConfig}; use diesel::prelude::*; use diesel::sql_types::{Float, Integer, Text}; @@ -12,8 +12,6 @@ use diesel_async::pooled_connection::AsyncDieselConnectionManager; use diesel_async::pooled_connection::deadpool::Pool; use diesel_async::{AsyncPgConnection, RunQueryDsl}; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Embedding; use crate::error::{Error, Result}; /// pgvector provider for vector storage using PostgreSQL. @@ -32,7 +30,6 @@ impl PgVectorProvider { .build() .map_err(|e| Error::connection(e.to_string()))?; - // Test connection and ensure pgvector extension exists { let mut conn = pool .get() @@ -53,7 +50,12 @@ impl PgVectorProvider { }) } - async fn get_conn( + /// Returns the configured table name. + pub fn table(&self) -> &str { + &self.config.table + } + + pub(crate) async fn get_conn( &self, ) -> Result>> { self.pool @@ -62,15 +64,14 @@ impl PgVectorProvider { .map_err(|e| Error::connection(e.to_string())) } - fn distance_operator(&self) -> &'static str { + pub(crate) fn distance_operator(&self) -> &'static str { self.config.distance_metric.operator() } /// Ensures a collection (table) exists, creating it if necessary. - async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { + pub(crate) async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { let mut conn = self.get_conn().await?; - // Create the table let create_table = format!( r#" CREATE TABLE IF NOT EXISTS {} ( @@ -88,7 +89,6 @@ impl PgVectorProvider { .await .map_err(|e| Error::provider(e.to_string()))?; - // Create the index let index_name = format!("{}_vector_idx", name); let operator = self.distance_operator(); @@ -149,7 +149,6 @@ impl PgVectorProvider { "" }; - // For cosine and inner product, convert distance to similarity let score_expr = match self.config.distance_metric { DistanceMetric::L2 => format!("vector {} $1::vector", operator), DistanceMetric::InnerProduct => format!("-(vector {} $1::vector)", operator), @@ -211,80 +210,12 @@ pub struct SearchResult { pub metadata: HashMap, } -#[async_trait] -impl DataOutput for PgVectorProvider { - async fn write(&self, ctx: &Context, items: Vec) -> Result<()> { - if items.is_empty() { - return Ok(()); - } - - let collection = ctx - .target - .as_deref() - .ok_or_else(|| Error::invalid_input("Collection name required in context.target"))?; - - // Get dimensions from the first vector - let dimensions = <[_]>::first(&items) - .map(|v| v.vector.len()) - .ok_or_else(|| Error::invalid_input("No embeddings provided"))?; - - // Ensure collection exists - self.ensure_collection(collection, dimensions).await?; - - let mut conn = self.get_conn().await?; - - for v in items { - let vector_str = format!( - "[{}]", - v.vector - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ); - let metadata_json = - serde_json::to_string(&v.metadata).unwrap_or_else(|_| "{}".to_string()); - - let upsert_query = format!( - r#" - INSERT INTO {} (id, vector, metadata) - VALUES ($1, $2::vector, $3::jsonb) - ON CONFLICT (id) DO UPDATE SET - vector = EXCLUDED.vector, - metadata = EXCLUDED.metadata - "#, - collection - ); - - diesel::sql_query(&upsert_query) - .bind::(&v.id) - .bind::(&vector_str) - .bind::(&metadata_json) - .execute(&mut conn) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - - Ok(()) - } -} - -#[async_trait] -impl DataInput for PgVectorProvider { - async fn read(&self, _ctx: &Context) -> Result> { - // Vector stores are primarily write/search, not sequential read - let stream = futures::stream::empty(); - Ok(InputStream::new(Box::pin(stream))) - } -} - impl std::fmt::Debug for PgVectorProvider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PgVectorProvider").finish() } } -/// Parse a vector string from PostgreSQL format. fn parse_vector(s: &str) -> Result> { let trimmed = s.trim_start_matches('[').trim_end_matches(']'); trimmed diff --git a/crates/nvisy-dal/src/provider/pgvector/output.rs b/crates/nvisy-dal/src/provider/pgvector/output.rs new file mode 100644 index 0000000..5c29557 --- /dev/null +++ b/crates/nvisy-dal/src/provider/pgvector/output.rs @@ -0,0 +1,65 @@ +//! pgvector DataOutput implementation. + +use async_trait::async_trait; +use diesel::sql_types::Text; +use diesel_async::RunQueryDsl; + +use super::PgVectorProvider; +use crate::core::DataOutput; +use crate::datatype::Embedding; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for PgVectorProvider { + type Item = Embedding; + + async fn write(&self, items: Vec) -> Result<()> { + if items.is_empty() { + return Ok(()); + } + + let table = self.table(); + + let dimensions = <[_]>::first(&items) + .map(|v| v.vector.len()) + .ok_or_else(|| Error::invalid_input("No embeddings provided"))?; + + self.ensure_collection(table, dimensions).await?; + + let mut conn = self.get_conn().await?; + + for v in items { + let vector_str = format!( + "[{}]", + v.vector + .iter() + .map(|f| f.to_string()) + .collect::>() + .join(",") + ); + let metadata_json = + serde_json::to_string(&v.metadata).unwrap_or_else(|_| "{}".to_string()); + + let upsert_query = format!( + r#" + INSERT INTO {} (id, vector, metadata) + VALUES ($1, $2::vector, $3::jsonb) + ON CONFLICT (id) DO UPDATE SET + vector = EXCLUDED.vector, + metadata = EXCLUDED.metadata + "#, + table + ); + + diesel::sql_query(&upsert_query) + .bind::(&v.id) + .bind::(&vector_str) + .bind::(&metadata_json) + .execute(&mut conn) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + + Ok(()) + } +} diff --git a/crates/nvisy-dal/src/provider/pinecone/mod.rs b/crates/nvisy-dal/src/provider/pinecone/mod.rs index b28831e..88fd6ba 100644 --- a/crates/nvisy-dal/src/provider/pinecone/mod.rs +++ b/crates/nvisy-dal/src/provider/pinecone/mod.rs @@ -1,18 +1,16 @@ //! Pinecone vector store provider. mod config; +mod output; use std::collections::{BTreeMap, HashMap}; -use async_trait::async_trait; pub use config::PineconeConfig; -use pinecone_sdk::models::{Kind, Metadata, Namespace, Value as PineconeValue, Vector}; +use pinecone_sdk::models::{Kind, Metadata, Namespace, Value as PineconeValue}; use pinecone_sdk::pinecone::PineconeClientConfig; use pinecone_sdk::pinecone::data::Index; use tokio::sync::Mutex; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Embedding; use crate::error::{Error, Result}; /// Pinecone provider for vector storage. @@ -33,7 +31,6 @@ impl PineconeProvider { .client() .map_err(|e| Error::connection(e.to_string()))?; - // Describe the index to get its host let index_description = client .describe_index(&config.index) .await @@ -41,7 +38,6 @@ impl PineconeProvider { let host = &index_description.host; - // Connect to the index let index = client .index(host) .await @@ -53,7 +49,7 @@ impl PineconeProvider { }) } - fn get_namespace(&self, collection: &str) -> Namespace { + pub(crate) fn get_namespace(&self, collection: &str) -> Namespace { if collection.is_empty() { self.config .namespace @@ -65,23 +61,9 @@ impl PineconeProvider { } } - /// Convert Pinecone Metadata to HashMap - fn metadata_to_hashmap(metadata: Metadata) -> HashMap { - metadata - .fields - .into_iter() - .map(|(k, v)| (k, pinecone_value_to_json(v))) - .collect() - } - - /// Convert HashMap to Pinecone Metadata - fn hashmap_to_metadata(map: HashMap) -> Metadata { - let fields: BTreeMap = map - .into_iter() - .map(|(k, v)| (k, json_to_pinecone_value(v))) - .collect(); - - Metadata { fields } + /// Returns the configured namespace. + pub fn namespace(&self) -> Option<&str> { + self.config.namespace.as_deref() } /// Searches for similar vectors. @@ -99,7 +81,7 @@ impl PineconeProvider { let filter_metadata: Option = filter.and_then(|f| { if let serde_json::Value::Object(obj) = f { let map: HashMap = obj.clone().into_iter().collect(); - Some(Self::hashmap_to_metadata(map)) + Some(hashmap_to_metadata(map)) } else { None } @@ -109,7 +91,7 @@ impl PineconeProvider { let response = index .query_by_value( query, - None, // sparse values + None, limit as u32, &namespace, filter_metadata, @@ -123,10 +105,7 @@ impl PineconeProvider { .matches .into_iter() .map(|m| { - let metadata = m - .metadata - .map(Self::metadata_to_hashmap) - .unwrap_or_default(); + let metadata = m.metadata.map(metadata_to_hashmap).unwrap_or_default(); SearchResult { id: m.id, @@ -154,56 +133,29 @@ pub struct SearchResult { pub metadata: HashMap, } -#[async_trait] -impl DataOutput for PineconeProvider { - async fn write(&self, ctx: &Context, items: Vec) -> Result<()> { - let collection = ctx.target.as_deref().unwrap_or(""); - let namespace = self.get_namespace(collection); - - let pinecone_vectors: Vec = items - .into_iter() - .map(|v| { - let metadata = if v.metadata.is_empty() { - None - } else { - Some(Self::hashmap_to_metadata(v.metadata)) - }; - - Vector { - id: v.id, - values: v.vector, - sparse_values: None, - metadata, - } - }) - .collect(); - - let mut index = self.index.lock().await; - index - .upsert(&pinecone_vectors, &namespace) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - Ok(()) +impl std::fmt::Debug for PineconeProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PineconeProvider").finish() } } -#[async_trait] -impl DataInput for PineconeProvider { - async fn read(&self, _ctx: &Context) -> Result> { - // Vector stores are primarily write/search, not sequential read - let stream = futures::stream::empty(); - Ok(InputStream::new(Box::pin(stream))) - } +fn metadata_to_hashmap(metadata: Metadata) -> HashMap { + metadata + .fields + .into_iter() + .map(|(k, v)| (k, pinecone_value_to_json(v))) + .collect() } -impl std::fmt::Debug for PineconeProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PineconeProvider").finish() - } +pub(crate) fn hashmap_to_metadata(map: HashMap) -> Metadata { + let fields: BTreeMap = map + .into_iter() + .map(|(k, v)| (k, json_to_pinecone_value(v))) + .collect(); + + Metadata { fields } } -/// Convert Pinecone Value to serde_json::Value fn pinecone_value_to_json(value: PineconeValue) -> serde_json::Value { match value.kind { Some(Kind::NullValue(_)) => serde_json::Value::Null, @@ -232,7 +184,6 @@ fn pinecone_value_to_json(value: PineconeValue) -> serde_json::Value { } } -/// Convert serde_json::Value to Pinecone Value fn json_to_pinecone_value(value: serde_json::Value) -> PineconeValue { let kind = match value { serde_json::Value::Null => Some(Kind::NullValue(0)), diff --git a/crates/nvisy-dal/src/provider/pinecone/output.rs b/crates/nvisy-dal/src/provider/pinecone/output.rs new file mode 100644 index 0000000..0c9f5fa --- /dev/null +++ b/crates/nvisy-dal/src/provider/pinecone/output.rs @@ -0,0 +1,47 @@ +//! Pinecone DataOutput implementation. + +use async_trait::async_trait; +use pinecone_sdk::models::Vector; + +use super::{PineconeProvider, hashmap_to_metadata}; +use crate::core::DataOutput; +use crate::datatype::Embedding; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for PineconeProvider { + type Item = Embedding; + + async fn write(&self, items: Vec) -> Result<()> { + let namespace = self + .namespace() + .map(|ns| pinecone_sdk::models::Namespace::from(ns)) + .unwrap_or_default(); + + let pinecone_vectors: Vec = items + .into_iter() + .map(|v| { + let metadata = if v.metadata.is_empty() { + None + } else { + Some(hashmap_to_metadata(v.metadata)) + }; + + Vector { + id: v.id, + values: v.vector, + sparse_values: None, + metadata, + } + }) + .collect(); + + let mut index = self.index.lock().await; + index + .upsert(&pinecone_vectors, &namespace) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + Ok(()) + } +} diff --git a/crates/nvisy-dal/src/provider/postgres/input.rs b/crates/nvisy-dal/src/provider/postgres/input.rs new file mode 100644 index 0000000..1f095f5 --- /dev/null +++ b/crates/nvisy-dal/src/provider/postgres/input.rs @@ -0,0 +1,64 @@ +//! PostgreSQL DataInput implementation. + +use std::collections::HashMap; + +use async_trait::async_trait; +use futures::StreamExt; + +use super::PostgresProvider; +use crate::core::{DataInput, InputStream, RelationalContext}; +use crate::datatype::Record; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataInput for PostgresProvider { + type Item = Record; + type Context = RelationalContext; + + async fn read(&self, ctx: &RelationalContext) -> Result> { + let prefix = ctx.table.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let key = entry.path().to_string(); + match op.read(&key).await { + Ok(data) => { + let value: serde_json::Value = + serde_json::from_slice(&data.to_bytes()) + .unwrap_or(serde_json::json!({})); + + let columns: HashMap = + if let serde_json::Value::Object(map) = value { + map.into_iter().collect() + } else { + let mut cols = HashMap::new(); + cols.insert("_key".to_string(), serde_json::json!(key)); + cols.insert("_value".to_string(), value); + cols + }; + + Some(Ok(Record::from_columns(columns))) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} diff --git a/crates/nvisy-dal/src/provider/postgres/mod.rs b/crates/nvisy-dal/src/provider/postgres/mod.rs index bb28fb5..eaeb080 100644 --- a/crates/nvisy-dal/src/provider/postgres/mod.rs +++ b/crates/nvisy-dal/src/provider/postgres/mod.rs @@ -1,24 +1,19 @@ -//! PostgreSQL provider via OpenDAL. +//! PostgreSQL provider. mod config; +mod input; +mod output; -use std::collections::HashMap; - -use async_trait::async_trait; pub use config::PostgresConfig; -use futures::StreamExt; + use opendal::{Operator, services}; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Record; use crate::error::{Error, Result}; /// PostgreSQL provider for relational data. #[derive(Clone)] pub struct PostgresProvider { operator: Operator, - #[allow(dead_code)] - config: PostgresConfig, } impl PostgresProvider { @@ -39,84 +34,7 @@ impl PostgresProvider { .map(|op| op.finish()) .map_err(|e| Error::connection(e.to_string()))?; - Ok(Self { - operator, - config: config.clone(), - }) - } -} - -#[async_trait] -impl DataInput for PostgresProvider { - async fn read(&self, ctx: &Context) -> Result> { - let prefix = ctx.target.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let key = entry.path().to_string(); - match op.read(&key).await { - Ok(data) => { - // Parse the value as JSON to get columns - let value: serde_json::Value = - serde_json::from_slice(&data.to_bytes()) - .unwrap_or(serde_json::json!({})); - - let columns: HashMap = - if let serde_json::Value::Object(map) = value { - map.into_iter().collect() - } else { - let mut cols = HashMap::new(); - cols.insert("_key".to_string(), serde_json::json!(key)); - cols.insert("_value".to_string(), value); - cols - }; - - Some(Ok(Record::from_columns(columns))) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} - -#[async_trait] -impl DataOutput for PostgresProvider { - async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { - for record in items { - // Use _key column as the key, or generate one - let key = record - .get("_key") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) - .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); - - let value = - serde_json::to_vec(&record.columns).map_err(|e| Error::provider(e.to_string()))?; - - self.operator - .write(&key, value) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) + Ok(Self { operator }) } } diff --git a/crates/nvisy-dal/src/provider/postgres/output.rs b/crates/nvisy-dal/src/provider/postgres/output.rs new file mode 100644 index 0000000..7382302 --- /dev/null +++ b/crates/nvisy-dal/src/provider/postgres/output.rs @@ -0,0 +1,32 @@ +//! PostgreSQL DataOutput implementation. + +use async_trait::async_trait; + +use super::PostgresProvider; +use crate::core::DataOutput; +use crate::datatype::Record; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for PostgresProvider { + type Item = Record; + + async fn write(&self, items: Vec) -> Result<()> { + for record in items { + let key = record + .get("_key") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); + + let value = + serde_json::to_vec(&record.columns).map_err(|e| Error::provider(e.to_string()))?; + + self.operator + .write(&key, value) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} diff --git a/crates/nvisy-dal/src/provider/qdrant/mod.rs b/crates/nvisy-dal/src/provider/qdrant/mod.rs index d14e03e..46df2e2 100644 --- a/crates/nvisy-dal/src/provider/qdrant/mod.rs +++ b/crates/nvisy-dal/src/provider/qdrant/mod.rs @@ -1,28 +1,25 @@ //! Qdrant vector store provider. mod config; +mod output; use std::collections::HashMap; -use async_trait::async_trait; pub use config::QdrantConfig; use qdrant_client::Qdrant; use qdrant_client::qdrant::vectors_config::Config as VectorsConfig; use qdrant_client::qdrant::with_payload_selector::SelectorOptions; use qdrant_client::qdrant::with_vectors_selector::SelectorOptions as VectorsSelectorOptions; use qdrant_client::qdrant::{ - Condition, CreateCollectionBuilder, Distance, Filter, PointId, PointStruct, - SearchPointsBuilder, UpsertPointsBuilder, VectorParamsBuilder, + Condition, CreateCollectionBuilder, Distance, Filter, PointId, SearchPointsBuilder, + VectorParamsBuilder, }; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Embedding; use crate::error::{Error, Result}; /// Qdrant provider for vector storage. pub struct QdrantProvider { client: Qdrant, - #[allow(dead_code)] config: QdrantConfig, } @@ -41,7 +38,7 @@ impl QdrantProvider { } /// Ensures a collection exists, creating it if necessary. - async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { + pub(crate) async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { let exists = self .client .collection_exists(name) @@ -64,30 +61,9 @@ impl QdrantProvider { Ok(()) } - /// Extracts vector data from Qdrant's VectorsOutput. - fn extract_vector(vectors: Option) -> Option> { - use qdrant_client::qdrant::vectors_output::VectorsOptions; - - vectors.and_then(|v| match v.vectors_options { - #[allow(deprecated)] - Some(VectorsOptions::Vector(vec)) => Some(vec.data), - _ => None, - }) - } - - /// Extracts point ID as a string. - fn extract_point_id(id: Option) -> Option { - use qdrant_client::qdrant::point_id::PointIdOptions; - - match id { - Some(PointId { - point_id_options: Some(id), - }) => match id { - PointIdOptions::Num(n) => Some(n.to_string()), - PointIdOptions::Uuid(s) => Some(s), - }, - _ => None, - } + /// Returns the configured collection name. + pub fn collection(&self) -> Option<&str> { + self.config.collection.as_deref() } /// Searches for similar vectors. @@ -126,8 +102,8 @@ impl QdrantProvider { .result .into_iter() .map(|point| { - let id = Self::extract_point_id(point.id).unwrap_or_default(); - let vector = Self::extract_vector(point.vectors); + let id = extract_point_id(point.id).unwrap_or_default(); + let vector = extract_vector(point.vectors); let metadata: HashMap = point .payload @@ -161,66 +137,37 @@ pub struct SearchResult { pub metadata: HashMap, } -#[async_trait] -impl DataOutput for QdrantProvider { - async fn write(&self, ctx: &Context, items: Vec) -> Result<()> { - if items.is_empty() { - return Ok(()); - } - - let collection = ctx - .target - .as_deref() - .ok_or_else(|| Error::invalid_input("Collection name required in context.target"))?; - - // Get dimensions from the first vector - let dimensions = items - .first() - .map(|v| v.vector.len()) - .ok_or_else(|| Error::invalid_input("No embeddings provided"))?; - - // Ensure collection exists - self.ensure_collection(collection, dimensions).await?; - - let points: Vec = items - .into_iter() - .map(|v| { - let payload: HashMap = v - .metadata - .into_iter() - .map(|(k, v)| (k, json_to_qdrant_value(v))) - .collect(); - - PointStruct::new(v.id, v.vector, payload) - }) - .collect(); - - self.client - .upsert_points(UpsertPointsBuilder::new(collection, points)) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - Ok(()) +impl std::fmt::Debug for QdrantProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("QdrantProvider").finish() } } -#[async_trait] -impl DataInput for QdrantProvider { - async fn read(&self, _ctx: &Context) -> Result> { - // Vector stores are primarily write/search, not sequential read - let stream = futures::stream::empty(); - Ok(InputStream::new(Box::pin(stream))) - } +fn extract_vector(vectors: Option) -> Option> { + use qdrant_client::qdrant::vectors_output::VectorsOptions; + + vectors.and_then(|v| match v.vectors_options { + #[allow(deprecated)] + Some(VectorsOptions::Vector(vec)) => Some(vec.data), + _ => None, + }) } -impl std::fmt::Debug for QdrantProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("QdrantProvider").finish() +fn extract_point_id(id: Option) -> Option { + use qdrant_client::qdrant::point_id::PointIdOptions; + + match id { + Some(PointId { + point_id_options: Some(id), + }) => match id { + PointIdOptions::Num(n) => Some(n.to_string()), + PointIdOptions::Uuid(s) => Some(s), + }, + _ => None, } } -/// Converts JSON value to Qdrant value. -fn json_to_qdrant_value(value: serde_json::Value) -> qdrant_client::qdrant::Value { +pub(crate) fn json_to_qdrant_value(value: serde_json::Value) -> qdrant_client::qdrant::Value { use qdrant_client::qdrant::value::Kind; let kind = match value { @@ -253,7 +200,6 @@ fn json_to_qdrant_value(value: serde_json::Value) -> qdrant_client::qdrant::Valu qdrant_client::qdrant::Value { kind: Some(kind) } } -/// Converts Qdrant value to JSON value. fn qdrant_value_to_json(value: qdrant_client::qdrant::Value) -> serde_json::Value { use qdrant_client::qdrant::value::Kind; @@ -280,7 +226,6 @@ fn qdrant_value_to_json(value: qdrant_client::qdrant::Value) -> serde_json::Valu } } -/// Parses a JSON filter into Qdrant conditions. fn parse_filter(filter: &serde_json::Value) -> Option> { if let serde_json::Value::Object(obj) = filter { let conditions: Vec = obj diff --git a/crates/nvisy-dal/src/provider/qdrant/output.rs b/crates/nvisy-dal/src/provider/qdrant/output.rs new file mode 100644 index 0000000..d3ffc4f --- /dev/null +++ b/crates/nvisy-dal/src/provider/qdrant/output.rs @@ -0,0 +1,53 @@ +//! Qdrant DataOutput implementation. + +use std::collections::HashMap; + +use async_trait::async_trait; +use qdrant_client::qdrant::{PointStruct, UpsertPointsBuilder}; + +use super::{QdrantProvider, json_to_qdrant_value}; +use crate::core::DataOutput; +use crate::datatype::Embedding; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for QdrantProvider { + type Item = Embedding; + + async fn write(&self, items: Vec) -> Result<()> { + if items.is_empty() { + return Ok(()); + } + + let collection = self + .collection() + .ok_or_else(|| Error::invalid_input("Collection name required in provider config"))?; + + let dimensions = items + .first() + .map(|v| v.vector.len()) + .ok_or_else(|| Error::invalid_input("No embeddings provided"))?; + + self.ensure_collection(collection, dimensions).await?; + + let points: Vec = items + .into_iter() + .map(|v| { + let payload: HashMap = v + .metadata + .into_iter() + .map(|(k, v)| (k, json_to_qdrant_value(v))) + .collect(); + + PointStruct::new(v.id, v.vector, payload) + }) + .collect(); + + self.client + .upsert_points(UpsertPointsBuilder::new(collection, points)) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + Ok(()) + } +} diff --git a/crates/nvisy-dal/src/provider/s3/input.rs b/crates/nvisy-dal/src/provider/s3/input.rs new file mode 100644 index 0000000..2c5655f --- /dev/null +++ b/crates/nvisy-dal/src/provider/s3/input.rs @@ -0,0 +1,58 @@ +//! S3 DataInput implementation. + +use async_trait::async_trait; +use futures::StreamExt; + +use super::S3Provider; +use crate::core::{DataInput, InputStream, ObjectContext}; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataInput for S3Provider { + type Item = Blob; + type Context = ObjectContext; + + async fn read(&self, ctx: &ObjectContext) -> Result> { + let prefix = ctx.prefix.as_deref().unwrap_or(""); + let limit = ctx.limit.unwrap_or(usize::MAX); + + let lister = self + .operator + .lister(prefix) + .await + .map_err(|e| Error::provider(e.to_string()))?; + + let operator = self.operator.clone(); + + let stream = lister.take(limit).filter_map(move |entry_result| { + let op = operator.clone(); + async move { + match entry_result { + Ok(entry) => { + let path = entry.path().to_string(); + if path.ends_with('/') { + return None; + } + + match op.read(&path).await { + Ok(data) => { + let mut blob = Blob::new(path.clone(), data.to_bytes()); + if let Ok(meta) = op.stat(&path).await + && let Some(ct) = meta.content_type() + { + blob = blob.with_content_type(ct); + } + Some(Ok(blob)) + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + Err(e) => Some(Err(Error::provider(e.to_string()))), + } + } + }); + + Ok(InputStream::new(Box::pin(stream))) + } +} diff --git a/crates/nvisy-dal/src/provider/s3/mod.rs b/crates/nvisy-dal/src/provider/s3/mod.rs index e05e757..d25e8c4 100644 --- a/crates/nvisy-dal/src/provider/s3/mod.rs +++ b/crates/nvisy-dal/src/provider/s3/mod.rs @@ -1,14 +1,13 @@ //! Amazon S3 provider. mod config; +mod input; +mod output; -use async_trait::async_trait; pub use config::S3Config; -use futures::StreamExt; + use opendal::{Operator, services}; -use crate::core::{Context, DataInput, DataOutput, InputStream}; -use crate::datatype::Blob; use crate::error::{Error, Result}; /// Amazon S3 provider for blob storage. @@ -48,65 +47,6 @@ impl S3Provider { } } -#[async_trait] -impl DataInput for S3Provider { - async fn read(&self, ctx: &Context) -> Result> { - let prefix = ctx.target.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let path = entry.path().to_string(); - if path.ends_with('/') { - return None; - } - - match op.read(&path).await { - Ok(data) => { - let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await - && let Some(ct) = meta.content_type() - { - blob = blob.with_content_type(ct); - } - Some(Ok(blob)) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} - -#[async_trait] -impl DataOutput for S3Provider { - async fn write(&self, _ctx: &Context, items: Vec) -> Result<()> { - for blob in items { - self.operator - .write(&blob.path, blob.data) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) - } -} - impl std::fmt::Debug for S3Provider { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("S3Provider").finish() diff --git a/crates/nvisy-dal/src/provider/s3/output.rs b/crates/nvisy-dal/src/provider/s3/output.rs new file mode 100644 index 0000000..c89988d --- /dev/null +++ b/crates/nvisy-dal/src/provider/s3/output.rs @@ -0,0 +1,23 @@ +//! S3 DataOutput implementation. + +use async_trait::async_trait; + +use super::S3Provider; +use crate::core::DataOutput; +use crate::datatype::Blob; +use crate::error::{Error, Result}; + +#[async_trait] +impl DataOutput for S3Provider { + type Item = Blob; + + async fn write(&self, items: Vec) -> Result<()> { + for blob in items { + self.operator + .write(&blob.path, blob.data) + .await + .map_err(|e| Error::provider(e.to_string()))?; + } + Ok(()) + } +} diff --git a/crates/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml index 9eab6e2..f0b4bf8 100644 --- a/crates/nvisy-runtime/Cargo.toml +++ b/crates/nvisy-runtime/Cargo.toml @@ -44,6 +44,7 @@ async-trait = { workspace = true, features = [] } thiserror = { workspace = true, features = [] } derive_more = { workspace = true, features = ["debug", "display", "from", "into"] } derive_builder = { workspace = true, features = [] } +strum = { workspace = true, features = ["derive"] } # Data types uuid = { workspace = true, features = ["v7", "serde"] } diff --git a/crates/nvisy-runtime/src/engine/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs index a85f669..0a44ccb 100644 --- a/crates/nvisy-runtime/src/engine/compiler.rs +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -25,8 +25,8 @@ use crate::graph::{ ExtractProcessor, InputStream, OutputStream, PartitionProcessor, }; use crate::provider::{ - CompletionProviderParams, CredentialsRegistry, EmbeddingProviderParams, InputProviderParams, - IntoProvider, OutputProviderParams, + CompletionProviderParams, CredentialsRegistry, EmbeddingProviderParams, InputProvider, + InputProviderParams, IntoProvider, OutputProviderParams, }; /// Workflow compiler that transforms definitions into executable graphs. @@ -252,10 +252,9 @@ impl<'a> WorkflowCompiler<'a> { params: &InputProviderParams, ) -> Result { let creds = self.registry.get(params.credentials_id())?; - let provider = params.clone().into_provider(creds.clone()).await?; - let dal_ctx: nvisy_dal::core::Context = self.ctx.clone().into(); - let stream = provider.read_stream(&dal_ctx).await?; + + let stream = self.read_from_provider(&provider).await?; // Map the stream to our Result type use futures::StreamExt; @@ -264,6 +263,24 @@ impl<'a> WorkflowCompiler<'a> { Ok(InputStream::new(Box::pin(mapped))) } + /// Reads from an input provider using the appropriate context type. + async fn read_from_provider( + &self, + provider: &InputProvider, + ) -> Result>> + { + match provider { + InputProvider::S3(_) | InputProvider::Gcs(_) | InputProvider::Azblob(_) => { + let ctx = self.ctx.to_object_context(); + provider.read_object_stream(&ctx).await + } + InputProvider::Postgres(_) | InputProvider::Mysql(_) => { + let ctx = self.ctx.to_relational_context(); + provider.read_relational_stream(&ctx).await + } + } + } + /// Creates an output stream from an output definition. async fn create_output_stream(&self, output: &Output) -> Result { match output { @@ -288,10 +305,8 @@ impl<'a> WorkflowCompiler<'a> { params: &OutputProviderParams, ) -> Result { let creds = self.registry.get(params.credentials_id())?; - let provider = params.clone().into_provider(creds.clone()).await?; - let dal_ctx: nvisy_dal::core::Context = self.ctx.clone().into(); - let sink = provider.write_sink(&dal_ctx).await?; + let sink = provider.write_sink(); Ok(OutputStream::new(sink)) } diff --git a/crates/nvisy-runtime/src/engine/context.rs b/crates/nvisy-runtime/src/engine/context.rs index 28b746a..4743590 100644 --- a/crates/nvisy-runtime/src/engine/context.rs +++ b/crates/nvisy-runtime/src/engine/context.rs @@ -15,6 +15,8 @@ pub struct Context { pub target: Option, /// Cursor for pagination (provider-specific format). pub cursor: Option, + /// Tiebreaker for pagination conflicts. + pub tiebreaker: Option, /// Maximum number of items to read. pub limit: Option, } @@ -37,6 +39,12 @@ impl Context { self } + /// Sets the tiebreaker for pagination. + pub fn with_tiebreaker(mut self, tiebreaker: impl Into) -> Self { + self.tiebreaker = Some(tiebreaker.into()); + self + } + /// Sets the limit. pub fn with_limit(mut self, limit: usize) -> Self { self.limit = Some(limit); @@ -57,31 +65,38 @@ impl Context { pub fn limit(&self) -> Option { self.limit } -} -impl From for nvisy_dal::core::Context { - fn from(ctx: Context) -> Self { - let mut dal_ctx = nvisy_dal::core::Context::new(); - if let Some(target) = ctx.target { - dal_ctx = dal_ctx.with_target(target); + /// Converts to an ObjectContext for object storage providers. + pub fn to_object_context(&self) -> nvisy_dal::ObjectContext { + let mut ctx = nvisy_dal::ObjectContext::new(); + if let Some(ref prefix) = self.target { + ctx = ctx.with_prefix(prefix.clone()); } - if let Some(cursor) = ctx.cursor { - dal_ctx = dal_ctx.with_cursor(cursor); + if let Some(ref token) = self.cursor { + ctx = ctx.with_token(token.clone()); } - if let Some(limit) = ctx.limit { - dal_ctx = dal_ctx.with_limit(limit); + if let Some(limit) = self.limit { + ctx = ctx.with_limit(limit); } - dal_ctx + ctx } -} -impl From for Context { - fn from(ctx: nvisy_dal::core::Context) -> Self { - Self { - target: ctx.target, - cursor: ctx.cursor, - limit: ctx.limit, + /// Converts to a RelationalContext for relational database providers. + pub fn to_relational_context(&self) -> nvisy_dal::RelationalContext { + let mut ctx = nvisy_dal::RelationalContext::new(); + if let Some(ref table) = self.target { + ctx = ctx.with_table(table.clone()); + } + if let Some(ref cursor) = self.cursor { + ctx = ctx.with_cursor(cursor.clone()); + } + if let Some(ref tiebreaker) = self.tiebreaker { + ctx = ctx.with_tiebreaker(tiebreaker.clone()); + } + if let Some(limit) = self.limit { + ctx = ctx.with_limit(limit); } + ctx } } diff --git a/crates/nvisy-runtime/src/error.rs b/crates/nvisy-runtime/src/error.rs index 6d92249..190e9ce 100644 --- a/crates/nvisy-runtime/src/error.rs +++ b/crates/nvisy-runtime/src/error.rs @@ -51,7 +51,7 @@ pub enum Error { /// Storage operation failed. #[error("storage error: {0}")] - Storage(#[from] nvisy_dal::StorageError), + Storage(#[from] nvisy_dal::Error), /// Serialization/deserialization error. #[error("serialization error: {0}")] diff --git a/crates/nvisy-runtime/src/provider/inputs.rs b/crates/nvisy-runtime/src/provider/inputs.rs index edb2e52..d9c7839 100644 --- a/crates/nvisy-runtime/src/provider/inputs.rs +++ b/crates/nvisy-runtime/src/provider/inputs.rs @@ -1,11 +1,10 @@ //! Input provider types and implementations. use derive_more::From; -use nvisy_dal::core::Context; use nvisy_dal::provider::{ AzblobProvider, GcsProvider, MysqlProvider, PostgresProvider, S3Provider, }; -use nvisy_dal::{AnyDataValue, DataTypeId}; +use nvisy_dal::{AnyDataValue, DataTypeId, ObjectContext, RelationalContext}; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -113,30 +112,55 @@ impl InputProvider { } } - /// Reads data from the provider as a stream. - /// - /// Returns a boxed stream of type-erased values that can be processed incrementally. - pub async fn read_stream( + /// Reads data from the provider as a stream using object context. + pub async fn read_object_stream( &self, - ctx: &Context, + ctx: &ObjectContext, ) -> Result>> { match self { Self::S3(p) => read_stream!(p, ctx, Blob), Self::Gcs(p) => read_stream!(p, ctx, Blob), Self::Azblob(p) => read_stream!(p, ctx, Blob), + _ => Err(Error::Internal( + "Provider does not support ObjectContext".into(), + )), + } + } + + /// Reads data from the provider as a stream using relational context. + pub async fn read_relational_stream( + &self, + ctx: &RelationalContext, + ) -> Result>> { + match self { Self::Postgres(p) => read_stream!(p, ctx, Record), Self::Mysql(p) => read_stream!(p, ctx, Record), + _ => Err(Error::Internal( + "Provider does not support RelationalContext".into(), + )), } } - /// Reads data from the provider, returning type-erased values. - pub async fn read(&self, ctx: &Context) -> Result> { + /// Reads data from the provider using object context. + pub async fn read_object(&self, ctx: &ObjectContext) -> Result> { match self { Self::S3(p) => read_data!(p, ctx, Blob), Self::Gcs(p) => read_data!(p, ctx, Blob), Self::Azblob(p) => read_data!(p, ctx, Blob), + _ => Err(Error::Internal( + "Provider does not support ObjectContext".into(), + )), + } + } + + /// Reads data from the provider using relational context. + pub async fn read_relational(&self, ctx: &RelationalContext) -> Result> { + match self { Self::Postgres(p) => read_data!(p, ctx, Record), Self::Mysql(p) => read_data!(p, ctx, Record), + _ => Err(Error::Internal( + "Provider does not support RelationalContext".into(), + )), } } } diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs index a0a17e2..103c166 100644 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -10,10 +10,6 @@ //! # Module Structure //! //! - [`backend`]: Individual provider implementations (credentials + params) -//! - `inputs`: Input provider types and read operations -//! - `outputs`: Output provider types and write operations -//! - `ai`: AI provider types (completion + embedding) -//! - `registry`: Credentials registry for workflow execution mod ai; pub mod backend; @@ -22,34 +18,25 @@ mod outputs; mod registry; pub mod runtime; -// Storage backend exports -// AI provider enum exports pub use ai::{AiCredentials, CompletionProviderParams, EmbeddingProviderParams}; -// AI provider exports -pub use backend::{ - AnthropicCompletionParams, AnthropicCredentials, CohereCompletionParams, CohereCredentials, - CohereEmbeddingParams, GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams, - OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams, PerplexityCompletionParams, - PerplexityCredentials, -}; -pub use backend::{ - AzblobCredentials, AzblobParams, GcsCredentials, GcsParams, IntoProvider, MysqlCredentials, - MysqlParams, PostgresCredentials, PostgresParams, S3Credentials, S3Params, -}; -// Vector database exports -pub use backend::{ - MilvusCredentials, MilvusParams, PgVectorCredentials, PgVectorParams, PineconeCredentials, - PineconeParams, QdrantCredentials, QdrantParams, +pub use backend::IntoProvider; +use backend::{ + AnthropicCredentials, AzblobCredentials, CohereCredentials, GcsCredentials, GeminiCredentials, + MilvusCredentials, MysqlCredentials, OpenAiCredentials, PerplexityCredentials, + PgVectorCredentials, PineconeCredentials, PostgresCredentials, QdrantCredentials, + S3Credentials, }; use derive_more::From; pub use inputs::{InputProvider, InputProviderParams}; pub use outputs::{OutputProvider, OutputProviderParams}; pub use registry::CredentialsRegistry; use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; /// Provider credentials (sensitive). -#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[derive(Debug, Clone, From, Serialize, Deserialize, IntoStaticStr)] #[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] pub enum ProviderCredentials { // Storage backends /// Amazon S3 credentials. @@ -88,25 +75,7 @@ pub enum ProviderCredentials { impl ProviderCredentials { /// Returns the provider kind as a string. - pub const fn kind(&self) -> &'static str { - match self { - // Storage backends - Self::S3(_) => "s3", - Self::Gcs(_) => "gcs", - Self::Azblob(_) => "azblob", - Self::Postgres(_) => "postgres", - Self::Mysql(_) => "mysql", - // Vector databases - Self::Qdrant(_) => "qdrant", - Self::Pinecone(_) => "pinecone", - Self::Milvus(_) => "milvus", - Self::PgVector(_) => "pgvector", - // AI providers - Self::OpenAi(_) => "openai", - Self::Anthropic(_) => "anthropic", - Self::Cohere(_) => "cohere", - Self::Gemini(_) => "gemini", - Self::Perplexity(_) => "perplexity", - } + pub fn kind(&self) -> &'static str { + self.into() } } diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs index 3996fd1..9d724c6 100644 --- a/crates/nvisy-runtime/src/provider/outputs.rs +++ b/crates/nvisy-runtime/src/provider/outputs.rs @@ -6,7 +6,6 @@ use std::task::{Context as TaskContext, Poll}; use derive_more::From; use futures::Sink; -use nvisy_dal::core::Context; use nvisy_dal::provider::{ AzblobProvider, GcsProvider, MilvusProvider, MysqlProvider, PgVectorProvider, PineconeProvider, PostgresProvider, QdrantProvider, S3Provider, @@ -163,23 +162,23 @@ impl OutputProvider { /// Creates a sink for streaming writes to the provider. /// /// The sink buffers items and writes them on flush/close. - pub async fn write_sink(self, ctx: &Context) -> Result { - let sink = ProviderSink::new(self, ctx.clone()); - Ok(Box::pin(sink)) + pub fn write_sink(self) -> DataSink { + let sink = ProviderSink::new(self); + Box::pin(sink) } /// Writes data to the provider, accepting type-erased values. - pub async fn write(&self, ctx: &Context, data: Vec) -> Result<()> { + pub async fn write(&self, data: Vec) -> Result<()> { match self { - Self::S3(p) => write_data!(p, ctx, data, Blob, into_blob), - Self::Gcs(p) => write_data!(p, ctx, data, Blob, into_blob), - Self::Azblob(p) => write_data!(p, ctx, data, Blob, into_blob), - Self::Postgres(p) => write_data!(p, ctx, data, Record, into_record), - Self::Mysql(p) => write_data!(p, ctx, data, Record, into_record), - Self::Qdrant(p) => write_data!(p, ctx, data, Embedding, into_embedding), - Self::Pinecone(p) => write_data!(p, ctx, data, Embedding, into_embedding), - Self::Milvus(p) => write_data!(p, ctx, data, Embedding, into_embedding), - Self::PgVector(p) => write_data!(p, ctx, data, Embedding, into_embedding), + Self::S3(p) => write_data!(p, data, Blob, into_blob), + Self::Gcs(p) => write_data!(p, data, Blob, into_blob), + Self::Azblob(p) => write_data!(p, data, Blob, into_blob), + Self::Postgres(p) => write_data!(p, data, Record, into_record), + Self::Mysql(p) => write_data!(p, data, Record, into_record), + Self::Qdrant(p) => write_data!(p, data, Embedding, into_embedding), + Self::Pinecone(p) => write_data!(p, data, Embedding, into_embedding), + Self::Milvus(p) => write_data!(p, data, Embedding, into_embedding), + Self::PgVector(p) => write_data!(p, data, Embedding, into_embedding), } } } @@ -187,16 +186,14 @@ impl OutputProvider { /// A sink that buffers items and writes them to an output provider. struct ProviderSink { provider: Arc, - ctx: Context, buffer: Arc>>, flush_future: Option> + Send>>>, } impl ProviderSink { - fn new(provider: OutputProvider, ctx: Context) -> Self { + fn new(provider: OutputProvider) -> Self { Self { provider: Arc::new(provider), - ctx, buffer: Arc::new(Mutex::new(Vec::new())), flush_future: None, } @@ -218,7 +215,6 @@ impl Sink for ProviderSink { item: AnyDataValue, ) -> std::result::Result<(), Self::Error> { let buffer = self.buffer.clone(); - // Use blocking lock since we're in a sync context if let Ok(mut guard) = buffer.try_lock() { guard.push(item); Ok(()) @@ -231,7 +227,6 @@ impl Sink for ProviderSink { mut self: Pin<&mut Self>, cx: &mut TaskContext<'_>, ) -> Poll> { - // If we have an in-progress flush, poll it if let Some(ref mut future) = self.flush_future { return match future.as_mut().poll(cx) { Poll::Ready(result) => { @@ -242,10 +237,8 @@ impl Sink for ProviderSink { }; } - // Take items from buffer and start write let buffer = self.buffer.clone(); let provider = self.provider.clone(); - let ctx = self.ctx.clone(); let future = Box::pin(async move { let items = { @@ -257,7 +250,7 @@ impl Sink for ProviderSink { return Ok(()); } - provider.write(&ctx, items).await + provider.write(items).await }); self.flush_future = Some(future); @@ -274,14 +267,14 @@ impl Sink for ProviderSink { /// Helper macro to write data to a provider from AnyDataValue. macro_rules! write_data { - ($provider:expr, $ctx:expr, $data:expr, $type:ident, $converter:ident) => {{ + ($provider:expr, $data:expr, $type:ident, $converter:ident) => {{ use nvisy_dal::core::DataOutput; use nvisy_dal::datatype::$type; let items: Vec<$type> = $data.into_iter().filter_map(|v| v.$converter()).collect(); $provider - .write($ctx, items) + .write(items) .await .map_err(|e| Error::Internal(e.to_string())) }}; From e0b354b21a0fe90b04e519f8d7a0c39287ba6cc1 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Fri, 23 Jan 2026 09:51:03 +0100 Subject: [PATCH 21/28] feat(rig): implement IntoProvider trait for CompletionProvider and EmbeddingProvider - Move IntoProvider trait to nvisy-core as shared abstraction - Implement From for nvisy_core::Error - Refactor CompletionProvider to use IntoProvider::create(params, credentials) - Refactor EmbeddingProvider to use IntoProvider::create(params, credentials) - Update runtime backend providers to use new create pattern - Provider methods now return nvisy_core::Result for consistency --- Cargo.lock | 3 + crates/nvisy-core/Cargo.toml | 3 + crates/nvisy-core/src/lib.rs | 2 + crates/nvisy-core/src/provider.rs | 40 +++++ crates/nvisy-dal/Cargo.toml | 3 + crates/nvisy-dal/src/core/mod.rs | 2 + crates/nvisy-dal/src/error.rs | 19 ++- crates/nvisy-dal/src/lib.rs | 5 +- .../nvisy-dal/src/provider/azblob/config.rs | 48 ++---- crates/nvisy-dal/src/provider/azblob/input.rs | 2 +- crates/nvisy-dal/src/provider/azblob/mod.rs | 28 ++-- crates/nvisy-dal/src/provider/gcs/config.rs | 39 ++--- crates/nvisy-dal/src/provider/gcs/input.rs | 2 +- crates/nvisy-dal/src/provider/gcs/mod.rs | 26 ++-- .../nvisy-dal/src/provider/milvus/config.rs | 79 +++------- crates/nvisy-dal/src/provider/milvus/mod.rs | 30 ++-- .../nvisy-dal/src/provider/milvus/output.rs | 4 +- crates/nvisy-dal/src/provider/mod.rs | 23 +-- crates/nvisy-dal/src/provider/mysql/config.rs | 43 ++---- crates/nvisy-dal/src/provider/mysql/input.rs | 2 +- crates/nvisy-dal/src/provider/mysql/mod.rs | 32 ++-- .../nvisy-dal/src/provider/pgvector/config.rs | 49 ++---- crates/nvisy-dal/src/provider/pgvector/mod.rs | 34 +++-- .../nvisy-dal/src/provider/pinecone/config.rs | 46 ++---- crates/nvisy-dal/src/provider/pinecone/mod.rs | 29 ++-- .../nvisy-dal/src/provider/pinecone/output.rs | 2 +- .../nvisy-dal/src/provider/postgres/config.rs | 43 ++---- .../nvisy-dal/src/provider/postgres/input.rs | 2 +- crates/nvisy-dal/src/provider/postgres/mod.rs | 33 ++-- .../nvisy-dal/src/provider/qdrant/config.rs | 48 ++---- crates/nvisy-dal/src/provider/qdrant/mod.rs | 32 ++-- .../nvisy-dal/src/provider/qdrant/output.rs | 4 +- crates/nvisy-dal/src/provider/s3/config.rs | 64 ++------ crates/nvisy-dal/src/provider/s3/input.rs | 2 +- crates/nvisy-dal/src/provider/s3/mod.rs | 36 ++--- crates/nvisy-rig/Cargo.toml | 1 + crates/nvisy-rig/src/chat/service.rs | 9 +- crates/nvisy-rig/src/error.rs | 22 +++ .../src/provider/completion/provider.rs | 55 ++++--- .../src/provider/embedding/provider.rs | 47 +++--- crates/nvisy-rig/src/rag/indexer/mod.rs | 6 +- crates/nvisy-rig/src/rag/searcher/mod.rs | 6 +- crates/nvisy-rig/src/service/config.rs | 4 +- crates/nvisy-rig/src/service/rig.rs | 6 +- crates/nvisy-runtime/src/definition/input.rs | 11 +- crates/nvisy-runtime/src/definition/output.rs | 11 +- crates/nvisy-runtime/src/engine/compiler.rs | 20 +-- crates/nvisy-runtime/src/engine/executor.rs | 3 +- crates/nvisy-runtime/src/graph/route/mod.rs | 1 - crates/nvisy-runtime/src/provider/ai.rs | 26 ++-- .../src/provider/backend/anthropic.rs | 9 +- .../src/provider/backend/azblob.rs | 55 ------- .../src/provider/backend/cohere.rs | 15 +- .../nvisy-runtime/src/provider/backend/gcs.rs | 43 ------ .../src/provider/backend/gemini.rs | 15 +- .../src/provider/backend/milvus.rs | 70 --------- .../nvisy-runtime/src/provider/backend/mod.rs | 74 +++++---- .../src/provider/backend/mysql.rs | 43 ------ .../src/provider/backend/openai.rs | 15 +- .../src/provider/backend/perplexity.rs | 9 +- .../src/provider/backend/pgvector.rs | 40 ----- .../src/provider/backend/pinecone.rs | 54 ------- .../src/provider/backend/postgres.rs | 43 ------ .../src/provider/backend/qdrant.rs | 51 ------- .../nvisy-runtime/src/provider/backend/s3.rs | 54 ------- crates/nvisy-runtime/src/provider/inputs.rs | 102 ++++++++----- crates/nvisy-runtime/src/provider/mod.rs | 8 +- crates/nvisy-runtime/src/provider/outputs.rs | 141 +++++++++++------- .../src/handler/response/pipelines.rs | 3 +- 69 files changed, 748 insertions(+), 1183 deletions(-) create mode 100644 crates/nvisy-core/src/provider.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/azblob.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/gcs.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/milvus.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/mysql.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/pgvector.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/pinecone.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/postgres.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/qdrant.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/s3.rs diff --git a/Cargo.lock b/Cargo.lock index c70099e..71002d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3234,6 +3234,7 @@ dependencies = [ name = "nvisy-core" version = "0.1.0" dependencies = [ + "async-trait", "jiff", "schemars 0.9.0", "serde", @@ -3255,6 +3256,7 @@ dependencies = [ "futures", "jiff", "milvus-sdk-rust", + "nvisy-core", "opendal", "pgvector", "pinecone-sdk", @@ -3329,6 +3331,7 @@ dependencies = [ "derive_more", "futures", "jiff", + "nvisy-core", "nvisy-nats", "nvisy-postgres", "rig-core", diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml index 4afb491..3ca4546 100644 --- a/crates/nvisy-core/Cargo.toml +++ b/crates/nvisy-core/Cargo.toml @@ -24,6 +24,9 @@ default = [] schema = ["dep:schemars"] [dependencies] +# Async +async-trait = { workspace = true } + # Error handling thiserror = { workspace = true } diff --git a/crates/nvisy-core/src/lib.rs b/crates/nvisy-core/src/lib.rs index 98d8fcd..6558bc5 100644 --- a/crates/nvisy-core/src/lib.rs +++ b/crates/nvisy-core/src/lib.rs @@ -3,6 +3,8 @@ #![doc = include_str!("../README.md")] mod error; +mod provider; pub mod types; pub use error::{BoxedError, Error, ErrorKind, Result}; +pub use provider::IntoProvider; diff --git a/crates/nvisy-core/src/provider.rs b/crates/nvisy-core/src/provider.rs new file mode 100644 index 0000000..6a7e4ce --- /dev/null +++ b/crates/nvisy-core/src/provider.rs @@ -0,0 +1,40 @@ +//! Provider creation trait. + +use crate::Result; + +/// Trait for creating a provider from parameters and credentials. +/// +/// This trait bridges non-sensitive parameters (like bucket name, table, model) +/// with sensitive credentials (like API keys, secrets) to construct +/// a fully configured provider instance. +/// +/// # Type Parameters +/// +/// - `Params`: Non-sensitive configuration (e.g., bucket name, model name) +/// - `Credentials`: Sensitive authentication data (e.g., API keys, secrets) +/// +/// # Example +/// +/// ```ignore +/// #[async_trait::async_trait] +/// impl IntoProvider for S3Provider { +/// type Params = S3Params; +/// type Credentials = S3Credentials; +/// +/// async fn create(params: Self::Params, credentials: Self::Credentials) -> Result { +/// // Build provider from params and credentials +/// } +/// } +/// ``` +#[async_trait::async_trait] +pub trait IntoProvider: Send { + /// Non-sensitive parameters (bucket, prefix, table, model, etc.). + type Params: Send; + /// Sensitive credentials (API keys, secrets, etc.). + type Credentials: Send; + + /// Creates a new provider from parameters and credentials. + async fn create(params: Self::Params, credentials: Self::Credentials) -> Result + where + Self: Sized; +} diff --git a/crates/nvisy-dal/Cargo.toml b/crates/nvisy-dal/Cargo.toml index 3c5b027..cce2611 100644 --- a/crates/nvisy-dal/Cargo.toml +++ b/crates/nvisy-dal/Cargo.toml @@ -21,6 +21,9 @@ all-features = true rustdoc-args = ["--cfg", "docsrs"] [dependencies] +# Internal crates +nvisy-core = { workspace = true, features = [] } + # Async runtime tokio = { workspace = true, features = ["rt", "sync"] } futures = { workspace = true, features = [] } diff --git a/crates/nvisy-dal/src/core/mod.rs b/crates/nvisy-dal/src/core/mod.rs index 499eaa5..2dccc7c 100644 --- a/crates/nvisy-dal/src/core/mod.rs +++ b/crates/nvisy-dal/src/core/mod.rs @@ -7,6 +7,8 @@ mod relational_context; mod vector_context; pub use input_stream::{InputStream, ItemStream}; +// Re-export IntoProvider from nvisy-core +pub use nvisy_core::IntoProvider; pub use object_context::ObjectContext; pub use output_stream::{ItemSink, OutputStream}; pub use relational_context::RelationalContext; diff --git a/crates/nvisy-dal/src/error.rs b/crates/nvisy-dal/src/error.rs index 6cd112e..5b4b9b0 100644 --- a/crates/nvisy-dal/src/error.rs +++ b/crates/nvisy-dal/src/error.rs @@ -1,7 +1,5 @@ //! Error types for data operations. -use thiserror::Error; - /// Boxed error type for dynamic error handling. pub type BoxError = Box; @@ -9,7 +7,7 @@ pub type BoxError = Box; pub type Result = std::result::Result; /// Error type for data operations. -#[derive(Debug, Error)] +#[derive(Debug, thiserror::Error)] #[error("{kind}: {message}")] pub struct Error { kind: ErrorKind, @@ -83,3 +81,18 @@ impl std::fmt::Display for ErrorKind { } } } + +impl From for nvisy_core::Error { + fn from(err: Error) -> Self { + let kind = match err.kind { + ErrorKind::Connection => nvisy_core::ErrorKind::NetworkError, + ErrorKind::NotFound => nvisy_core::ErrorKind::NotFound, + ErrorKind::InvalidInput => nvisy_core::ErrorKind::InvalidInput, + ErrorKind::Provider => nvisy_core::ErrorKind::ExternalError, + }; + + nvisy_core::Error::new(kind) + .with_message(&err.message) + .with_source(err) + } +} diff --git a/crates/nvisy-dal/src/lib.rs b/crates/nvisy-dal/src/lib.rs index f492c1a..7ccbf9b 100644 --- a/crates/nvisy-dal/src/lib.rs +++ b/crates/nvisy-dal/src/lib.rs @@ -13,8 +13,9 @@ pub mod provider; mod error; pub use core::{ - DataInput, DataOutput, InputStream, ItemSink, ItemStream, ObjectContext, OutputStream, - RelationalContext, VectorContext, + DataInput, DataOutput, InputStream, IntoProvider, ItemSink, ItemStream, ObjectContext, + OutputStream, RelationalContext, VectorContext, }; + pub use datatype::{AnyDataValue, DataTypeId}; pub use error::{BoxError, Error, ErrorKind, Result}; diff --git a/crates/nvisy-dal/src/provider/azblob/config.rs b/crates/nvisy-dal/src/provider/azblob/config.rs index ab4f1e9..61b13c9 100644 --- a/crates/nvisy-dal/src/provider/azblob/config.rs +++ b/crates/nvisy-dal/src/provider/azblob/config.rs @@ -1,52 +1,26 @@ -//! Azure Blob Storage configuration. +//! Azure Blob Storage configuration types. use serde::{Deserialize, Serialize}; -/// Azure Blob Storage configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct AzblobConfig { +/// Azure Blob Storage credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AzblobCredentials { /// Storage account name. pub account_name: String, - /// Container name. - pub container: String, /// Account key for authentication. #[serde(skip_serializing_if = "Option::is_none")] pub account_key: Option, /// SAS token for authentication. #[serde(skip_serializing_if = "Option::is_none")] pub sas_token: Option, +} + +/// Azure Blob Storage parameters (non-sensitive). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct AzblobParams { + /// Container name. + pub container: String, /// Path prefix within the container. #[serde(skip_serializing_if = "Option::is_none")] pub prefix: Option, } - -impl AzblobConfig { - /// Creates a new Azure Blob configuration. - pub fn new(account_name: impl Into, container: impl Into) -> Self { - Self { - account_name: account_name.into(), - container: container.into(), - account_key: None, - sas_token: None, - prefix: None, - } - } - - /// Sets the account key. - pub fn with_account_key(mut self, account_key: impl Into) -> Self { - self.account_key = Some(account_key.into()); - self - } - - /// Sets the SAS token. - pub fn with_sas_token(mut self, sas_token: impl Into) -> Self { - self.sas_token = Some(sas_token.into()); - self - } - - /// Sets the path prefix. - pub fn with_prefix(mut self, prefix: impl Into) -> Self { - self.prefix = Some(prefix.into()); - self - } -} diff --git a/crates/nvisy-dal/src/provider/azblob/input.rs b/crates/nvisy-dal/src/provider/azblob/input.rs index 4272019..2ca1b21 100644 --- a/crates/nvisy-dal/src/provider/azblob/input.rs +++ b/crates/nvisy-dal/src/provider/azblob/input.rs @@ -10,8 +10,8 @@ use crate::error::{Error, Result}; #[async_trait] impl DataInput for AzblobProvider { - type Item = Blob; type Context = ObjectContext; + type Item = Blob; async fn read(&self, ctx: &ObjectContext) -> Result> { let prefix = ctx.prefix.as_deref().unwrap_or(""); diff --git a/crates/nvisy-dal/src/provider/azblob/mod.rs b/crates/nvisy-dal/src/provider/azblob/mod.rs index 2ba7f87..2646b60 100644 --- a/crates/nvisy-dal/src/provider/azblob/mod.rs +++ b/crates/nvisy-dal/src/provider/azblob/mod.rs @@ -4,11 +4,11 @@ mod config; mod input; mod output; -pub use config::AzblobConfig; - +pub use config::{AzblobCredentials, AzblobParams}; use opendal::{Operator, services}; -use crate::error::{Error, Result}; +use crate::core::IntoProvider; +use crate::error::Error; /// Azure Blob Storage provider for blob storage. #[derive(Clone)] @@ -16,22 +16,28 @@ pub struct AzblobProvider { operator: Operator, } -impl AzblobProvider { - /// Creates a new Azure Blob provider. - pub fn new(config: &AzblobConfig) -> Result { +#[async_trait::async_trait] +impl IntoProvider for AzblobProvider { + type Credentials = AzblobCredentials; + type Params = AzblobParams; + + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { let mut builder = services::Azblob::default() - .account_name(&config.account_name) - .container(&config.container); + .account_name(&credentials.account_name) + .container(¶ms.container); - if let Some(ref account_key) = config.account_key { + if let Some(ref account_key) = credentials.account_key { builder = builder.account_key(account_key); } - if let Some(ref sas_token) = config.sas_token { + if let Some(ref sas_token) = credentials.sas_token { builder = builder.sas_token(sas_token); } - if let Some(ref prefix) = config.prefix { + if let Some(ref prefix) = params.prefix { builder = builder.root(prefix); } diff --git a/crates/nvisy-dal/src/provider/gcs/config.rs b/crates/nvisy-dal/src/provider/gcs/config.rs index 1dc2a00..edcabc6 100644 --- a/crates/nvisy-dal/src/provider/gcs/config.rs +++ b/crates/nvisy-dal/src/provider/gcs/config.rs @@ -1,39 +1,20 @@ -//! Google Cloud Storage configuration. +//! Google Cloud Storage configuration types. use serde::{Deserialize, Serialize}; -/// Google Cloud Storage configuration. +/// Google Cloud Storage credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct GcsCredentials { + /// Service account credentials JSON. + pub credentials_json: String, +} + +/// Google Cloud Storage parameters (non-sensitive). #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct GcsConfig { +pub struct GcsParams { /// Bucket name. pub bucket: String, - /// Service account credentials JSON. - #[serde(skip_serializing_if = "Option::is_none")] - pub credentials: Option, /// Path prefix within the bucket. #[serde(skip_serializing_if = "Option::is_none")] pub prefix: Option, } - -impl GcsConfig { - /// Creates a new GCS configuration. - pub fn new(bucket: impl Into) -> Self { - Self { - bucket: bucket.into(), - credentials: None, - prefix: None, - } - } - - /// Sets the credentials JSON. - pub fn with_credentials(mut self, credentials: impl Into) -> Self { - self.credentials = Some(credentials.into()); - self - } - - /// Sets the path prefix. - pub fn with_prefix(mut self, prefix: impl Into) -> Self { - self.prefix = Some(prefix.into()); - self - } -} diff --git a/crates/nvisy-dal/src/provider/gcs/input.rs b/crates/nvisy-dal/src/provider/gcs/input.rs index 7d23b57..c709bb8 100644 --- a/crates/nvisy-dal/src/provider/gcs/input.rs +++ b/crates/nvisy-dal/src/provider/gcs/input.rs @@ -10,8 +10,8 @@ use crate::error::{Error, Result}; #[async_trait] impl DataInput for GcsProvider { - type Item = Blob; type Context = ObjectContext; + type Item = Blob; async fn read(&self, ctx: &ObjectContext) -> Result> { let prefix = ctx.prefix.as_deref().unwrap_or(""); diff --git a/crates/nvisy-dal/src/provider/gcs/mod.rs b/crates/nvisy-dal/src/provider/gcs/mod.rs index 49e379b..742480e 100644 --- a/crates/nvisy-dal/src/provider/gcs/mod.rs +++ b/crates/nvisy-dal/src/provider/gcs/mod.rs @@ -4,11 +4,11 @@ mod config; mod input; mod output; -pub use config::GcsConfig; - +pub use config::{GcsCredentials, GcsParams}; use opendal::{Operator, services}; -use crate::error::{Error, Result}; +use crate::core::IntoProvider; +use crate::error::Error; /// Google Cloud Storage provider for blob storage. #[derive(Clone)] @@ -16,16 +16,20 @@ pub struct GcsProvider { operator: Operator, } -impl GcsProvider { - /// Creates a new GCS provider. - pub fn new(config: &GcsConfig) -> Result { - let mut builder = services::Gcs::default().bucket(&config.bucket); +#[async_trait::async_trait] +impl IntoProvider for GcsProvider { + type Credentials = GcsCredentials; + type Params = GcsParams; - if let Some(ref credentials) = config.credentials { - builder = builder.credential(credentials); - } + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let mut builder = services::Gcs::default() + .bucket(¶ms.bucket) + .credential(&credentials.credentials_json); - if let Some(ref prefix) = config.prefix { + if let Some(ref prefix) = params.prefix { builder = builder.root(prefix); } diff --git a/crates/nvisy-dal/src/provider/milvus/config.rs b/crates/nvisy-dal/src/provider/milvus/config.rs index 945478a..1b91a5f 100644 --- a/crates/nvisy-dal/src/provider/milvus/config.rs +++ b/crates/nvisy-dal/src/provider/milvus/config.rs @@ -1,14 +1,19 @@ -//! Milvus configuration. +//! Milvus configuration types. use serde::{Deserialize, Serialize}; -/// Milvus configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MilvusConfig { +/// Default Milvus port. +fn default_port() -> u16 { + 19530 +} + +/// Milvus credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MilvusCredentials { /// Milvus server host. pub host: String, /// Milvus server port. - #[serde(default = "default_milvus_port")] + #[serde(default = "default_port")] pub port: u16, /// Username for authentication. #[serde(skip_serializing_if = "Option::is_none")] @@ -16,67 +21,17 @@ pub struct MilvusConfig { /// Password for authentication. #[serde(skip_serializing_if = "Option::is_none")] pub password: Option, +} + +/// Milvus parameters (non-sensitive). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MilvusParams { + /// Collection name. + pub collection: String, /// Database name. #[serde(skip_serializing_if = "Option::is_none")] pub database: Option, - /// Default collection name. - #[serde(skip_serializing_if = "Option::is_none")] - pub collection: Option, /// Vector dimensions. #[serde(skip_serializing_if = "Option::is_none")] pub dimensions: Option, } - -impl MilvusConfig { - /// Creates a new Milvus configuration. - pub fn new(host: impl Into) -> Self { - Self { - host: host.into(), - port: default_milvus_port(), - username: None, - password: None, - database: None, - collection: None, - dimensions: None, - } - } - - /// Sets the port. - pub fn with_port(mut self, port: u16) -> Self { - self.port = port; - self - } - - /// Sets the credentials. - pub fn with_credentials( - mut self, - username: impl Into, - password: impl Into, - ) -> Self { - self.username = Some(username.into()); - self.password = Some(password.into()); - self - } - - /// Sets the database name. - pub fn with_database(mut self, database: impl Into) -> Self { - self.database = Some(database.into()); - self - } - - /// Sets the default collection. - pub fn with_collection(mut self, collection: impl Into) -> Self { - self.collection = Some(collection.into()); - self - } - - /// Sets the vector dimensions. - pub fn with_dimensions(mut self, dimensions: usize) -> Self { - self.dimensions = Some(dimensions); - self - } -} - -fn default_milvus_port() -> u16 { - 19530 -} diff --git a/crates/nvisy-dal/src/provider/milvus/mod.rs b/crates/nvisy-dal/src/provider/milvus/mod.rs index 1ad257b..67c3dd8 100644 --- a/crates/nvisy-dal/src/provider/milvus/mod.rs +++ b/crates/nvisy-dal/src/provider/milvus/mod.rs @@ -6,39 +6,45 @@ mod output; use std::borrow::Cow; use std::collections::HashMap; -pub use config::MilvusConfig; +pub use config::{MilvusCredentials, MilvusParams}; use milvus::client::Client; use milvus::collection::SearchOption; use milvus::index::{IndexParams, IndexType, MetricType}; use milvus::schema::{CollectionSchemaBuilder, FieldSchema}; use milvus::value::Value; +use crate::core::IntoProvider; use crate::error::{Error, Result}; /// Milvus provider for vector storage. pub struct MilvusProvider { client: Client, - config: MilvusConfig, + params: MilvusParams, } -impl MilvusProvider { - /// Creates a new Milvus provider. - pub async fn new(config: &MilvusConfig) -> Result { - let url = format!("http://{}:{}", config.host, config.port); +#[async_trait::async_trait] +impl IntoProvider for MilvusProvider { + type Credentials = MilvusCredentials; + type Params = MilvusParams; + + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let url = format!("http://{}:{}", credentials.host, credentials.port); let client = Client::new(url) .await .map_err(|e| Error::connection(e.to_string()))?; - Ok(Self { - client, - config: config.clone(), - }) + Ok(Self { client, params }) } +} +impl MilvusProvider { /// Returns the configured collection name. - pub fn collection(&self) -> Option<&str> { - self.config.collection.as_deref() + pub fn collection(&self) -> &str { + &self.params.collection } /// Ensures a collection exists, creating it if necessary. diff --git a/crates/nvisy-dal/src/provider/milvus/output.rs b/crates/nvisy-dal/src/provider/milvus/output.rs index 59ff992..b7315e1 100644 --- a/crates/nvisy-dal/src/provider/milvus/output.rs +++ b/crates/nvisy-dal/src/provider/milvus/output.rs @@ -19,9 +19,7 @@ impl DataOutput for MilvusProvider { return Ok(()); } - let collection = self - .collection() - .ok_or_else(|| Error::invalid_input("Collection name required in provider config"))?; + let collection = self.collection(); let dim = items.first().map(|v| v.vector.len()).unwrap_or(0); diff --git a/crates/nvisy-dal/src/provider/mod.rs b/crates/nvisy-dal/src/provider/mod.rs index 9771422..f2b01cc 100644 --- a/crates/nvisy-dal/src/provider/mod.rs +++ b/crates/nvisy-dal/src/provider/mod.rs @@ -10,12 +10,17 @@ mod postgres; mod qdrant; mod s3; -pub use azblob::{AzblobConfig, AzblobProvider}; -pub use gcs::{GcsConfig, GcsProvider}; -pub use milvus::{MilvusConfig, MilvusProvider}; -pub use mysql::{MysqlConfig, MysqlProvider}; -pub use pgvector::{DistanceMetric, IndexType, PgVectorConfig, PgVectorProvider}; -pub use pinecone::{PineconeConfig, PineconeProvider}; -pub use postgres::{PostgresConfig, PostgresProvider}; -pub use qdrant::{QdrantConfig, QdrantProvider}; -pub use s3::{S3Config, S3Provider}; +// Object storage providers +pub use azblob::{AzblobCredentials, AzblobParams, AzblobProvider}; +pub use gcs::{GcsCredentials, GcsParams, GcsProvider}; +// Vector database providers +pub use milvus::{MilvusCredentials, MilvusParams, MilvusProvider}; +// Relational database providers +pub use mysql::{MysqlCredentials, MysqlParams, MysqlProvider}; +pub use pgvector::{ + DistanceMetric, IndexType, PgVectorCredentials, PgVectorParams, PgVectorProvider, +}; +pub use pinecone::{PineconeCredentials, PineconeParams, PineconeProvider}; +pub use postgres::{PostgresCredentials, PostgresParams, PostgresProvider}; +pub use qdrant::{QdrantCredentials, QdrantParams, QdrantProvider}; +pub use s3::{S3Credentials, S3Params, S3Provider}; diff --git a/crates/nvisy-dal/src/provider/mysql/config.rs b/crates/nvisy-dal/src/provider/mysql/config.rs index ecba6bc..92c9e12 100644 --- a/crates/nvisy-dal/src/provider/mysql/config.rs +++ b/crates/nvisy-dal/src/provider/mysql/config.rs @@ -1,39 +1,20 @@ -//! MySQL configuration. +//! MySQL configuration types. use serde::{Deserialize, Serialize}; -/// MySQL configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MysqlConfig { +/// MySQL credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MysqlCredentials { /// Connection string (e.g., "mysql://user:pass@host:3306/db"). pub connection_string: String, - /// Default table name. - #[serde(skip_serializing_if = "Option::is_none")] - pub table: Option, - /// Default database. - #[serde(skip_serializing_if = "Option::is_none")] - pub database: Option, } -impl MysqlConfig { - /// Creates a new MySQL configuration. - pub fn new(connection_string: impl Into) -> Self { - Self { - connection_string: connection_string.into(), - table: None, - database: None, - } - } - - /// Sets the default table. - pub fn with_table(mut self, table: impl Into) -> Self { - self.table = Some(table.into()); - self - } - - /// Sets the default database. - pub fn with_database(mut self, database: impl Into) -> Self { - self.database = Some(database.into()); - self - } +/// MySQL parameters (non-sensitive). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct MysqlParams { + /// Table name. + pub table: String, + /// Database name. + #[serde(skip_serializing_if = "Option::is_none")] + pub database: Option, } diff --git a/crates/nvisy-dal/src/provider/mysql/input.rs b/crates/nvisy-dal/src/provider/mysql/input.rs index 9cc2d75..a058128 100644 --- a/crates/nvisy-dal/src/provider/mysql/input.rs +++ b/crates/nvisy-dal/src/provider/mysql/input.rs @@ -12,8 +12,8 @@ use crate::error::{Error, Result}; #[async_trait] impl DataInput for MysqlProvider { - type Item = Record; type Context = RelationalContext; + type Item = Record; async fn read(&self, ctx: &RelationalContext) -> Result> { let prefix = ctx.table.as_deref().unwrap_or(""); diff --git a/crates/nvisy-dal/src/provider/mysql/mod.rs b/crates/nvisy-dal/src/provider/mysql/mod.rs index b1b8343..8a3ea77 100644 --- a/crates/nvisy-dal/src/provider/mysql/mod.rs +++ b/crates/nvisy-dal/src/provider/mysql/mod.rs @@ -4,11 +4,11 @@ mod config; mod input; mod output; -pub use config::MysqlConfig; - +pub use config::{MysqlCredentials, MysqlParams}; use opendal::{Operator, services}; -use crate::error::{Error, Result}; +use crate::core::IntoProvider; +use crate::error::Error; /// MySQL provider for relational data. #[derive(Clone)] @@ -16,17 +16,21 @@ pub struct MysqlProvider { operator: Operator, } -impl MysqlProvider { - /// Creates a new MySQL provider. - pub fn new(config: &MysqlConfig) -> Result { - let mut builder = services::Mysql::default().connection_string(&config.connection_string); - - if let Some(ref table) = config.table { - builder = builder.table(table); - } - - if let Some(ref root) = config.database { - builder = builder.root(root); +#[async_trait::async_trait] +impl IntoProvider for MysqlProvider { + type Credentials = MysqlCredentials; + type Params = MysqlParams; + + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let mut builder = services::Mysql::default() + .connection_string(&credentials.connection_string) + .table(¶ms.table); + + if let Some(ref database) = params.database { + builder = builder.root(database); } let operator = Operator::new(builder) diff --git a/crates/nvisy-dal/src/provider/pgvector/config.rs b/crates/nvisy-dal/src/provider/pgvector/config.rs index 48edfb6..5d6b7bb 100644 --- a/crates/nvisy-dal/src/provider/pgvector/config.rs +++ b/crates/nvisy-dal/src/provider/pgvector/config.rs @@ -1,14 +1,18 @@ -//! pgvector configuration. +//! pgvector configuration types. use serde::{Deserialize, Serialize}; -/// PostgreSQL pgvector configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PgVectorConfig { +/// pgvector credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PgVectorCredentials { /// PostgreSQL connection URL. pub connection_url: String, +} + +/// pgvector parameters (non-sensitive). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PgVectorParams { /// Table name for vectors. - #[serde(default = "default_pgvector_table")] pub table: String, /// Vector dimensions. pub dimensions: usize, @@ -20,41 +24,6 @@ pub struct PgVectorConfig { pub index_type: IndexType, } -impl PgVectorConfig { - /// Creates a new pgvector configuration. - pub fn new(connection_url: impl Into, dimensions: usize) -> Self { - Self { - connection_url: connection_url.into(), - table: default_pgvector_table(), - dimensions, - distance_metric: DistanceMetric::default(), - index_type: IndexType::default(), - } - } - - /// Sets the table name. - pub fn with_table(mut self, table: impl Into) -> Self { - self.table = table.into(); - self - } - - /// Sets the distance metric. - pub fn with_distance_metric(mut self, metric: DistanceMetric) -> Self { - self.distance_metric = metric; - self - } - - /// Sets the index type. - pub fn with_index_type(mut self, index_type: IndexType) -> Self { - self.index_type = index_type; - self - } -} - -fn default_pgvector_table() -> String { - "vectors".to_string() -} - /// Distance metric for pgvector. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/crates/nvisy-dal/src/provider/pgvector/mod.rs b/crates/nvisy-dal/src/provider/pgvector/mod.rs index 0254a8a..389a927 100644 --- a/crates/nvisy-dal/src/provider/pgvector/mod.rs +++ b/crates/nvisy-dal/src/provider/pgvector/mod.rs @@ -5,26 +5,33 @@ mod output; use std::collections::HashMap; -pub use config::{DistanceMetric, IndexType, PgVectorConfig}; +pub use config::{DistanceMetric, IndexType, PgVectorCredentials, PgVectorParams}; use diesel::prelude::*; use diesel::sql_types::{Float, Integer, Text}; use diesel_async::pooled_connection::AsyncDieselConnectionManager; use diesel_async::pooled_connection::deadpool::Pool; use diesel_async::{AsyncPgConnection, RunQueryDsl}; +use crate::core::IntoProvider; use crate::error::{Error, Result}; /// pgvector provider for vector storage using PostgreSQL. pub struct PgVectorProvider { pool: Pool, - config: PgVectorConfig, + params: PgVectorParams, } -impl PgVectorProvider { - /// Creates a new pgvector provider. - pub async fn new(config: &PgVectorConfig) -> Result { +#[async_trait::async_trait] +impl IntoProvider for PgVectorProvider { + type Credentials = PgVectorCredentials; + type Params = PgVectorParams; + + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { let manager = - AsyncDieselConnectionManager::::new(&config.connection_url); + AsyncDieselConnectionManager::::new(&credentials.connection_url); let pool = Pool::builder(manager) .build() @@ -44,15 +51,14 @@ impl PgVectorProvider { })?; } - Ok(Self { - pool, - config: config.clone(), - }) + Ok(Self { pool, params }) } +} +impl PgVectorProvider { /// Returns the configured table name. pub fn table(&self) -> &str { - &self.config.table + &self.params.table } pub(crate) async fn get_conn( @@ -65,7 +71,7 @@ impl PgVectorProvider { } pub(crate) fn distance_operator(&self) -> &'static str { - self.config.distance_metric.operator() + self.params.distance_metric.operator() } /// Ensures a collection (table) exists, creating it if necessary. @@ -92,7 +98,7 @@ impl PgVectorProvider { let index_name = format!("{}_vector_idx", name); let operator = self.distance_operator(); - let create_index = match self.config.index_type { + let create_index = match self.params.index_type { IndexType::IvfFlat => { format!( r#" @@ -149,7 +155,7 @@ impl PgVectorProvider { "" }; - let score_expr = match self.config.distance_metric { + let score_expr = match self.params.distance_metric { DistanceMetric::L2 => format!("vector {} $1::vector", operator), DistanceMetric::InnerProduct => format!("-(vector {} $1::vector)", operator), DistanceMetric::Cosine => format!("1 - (vector {} $1::vector)", operator), diff --git a/crates/nvisy-dal/src/provider/pinecone/config.rs b/crates/nvisy-dal/src/provider/pinecone/config.rs index c5e9cdd..1d58997 100644 --- a/crates/nvisy-dal/src/provider/pinecone/config.rs +++ b/crates/nvisy-dal/src/provider/pinecone/config.rs @@ -1,49 +1,23 @@ -//! Pinecone configuration. +//! Pinecone configuration types. use serde::{Deserialize, Serialize}; -/// Pinecone configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PineconeConfig { +/// Pinecone credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PineconeCredentials { /// Pinecone API key. pub api_key: String, - /// Environment (e.g., "us-east-1-aws"). - pub environment: String, +} + +/// Pinecone parameters (non-sensitive). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PineconeParams { /// Index name. pub index: String, - /// Namespace (optional). + /// Namespace. #[serde(skip_serializing_if = "Option::is_none")] pub namespace: Option, /// Vector dimensions. #[serde(skip_serializing_if = "Option::is_none")] pub dimensions: Option, } - -impl PineconeConfig { - /// Creates a new Pinecone configuration. - pub fn new( - api_key: impl Into, - environment: impl Into, - index: impl Into, - ) -> Self { - Self { - api_key: api_key.into(), - environment: environment.into(), - index: index.into(), - namespace: None, - dimensions: None, - } - } - - /// Sets the namespace. - pub fn with_namespace(mut self, namespace: impl Into) -> Self { - self.namespace = Some(namespace.into()); - self - } - - /// Sets the vector dimensions. - pub fn with_dimensions(mut self, dimensions: usize) -> Self { - self.dimensions = Some(dimensions); - self - } -} diff --git a/crates/nvisy-dal/src/provider/pinecone/mod.rs b/crates/nvisy-dal/src/provider/pinecone/mod.rs index 88fd6ba..4963838 100644 --- a/crates/nvisy-dal/src/provider/pinecone/mod.rs +++ b/crates/nvisy-dal/src/provider/pinecone/mod.rs @@ -5,25 +5,32 @@ mod output; use std::collections::{BTreeMap, HashMap}; -pub use config::PineconeConfig; +pub use config::{PineconeCredentials, PineconeParams}; use pinecone_sdk::models::{Kind, Metadata, Namespace, Value as PineconeValue}; use pinecone_sdk::pinecone::PineconeClientConfig; use pinecone_sdk::pinecone::data::Index; use tokio::sync::Mutex; +use crate::core::IntoProvider; use crate::error::{Error, Result}; /// Pinecone provider for vector storage. pub struct PineconeProvider { index: Mutex, - config: PineconeConfig, + params: PineconeParams, } -impl PineconeProvider { - /// Creates a new Pinecone provider. - pub async fn new(config: &PineconeConfig) -> Result { +#[async_trait::async_trait] +impl IntoProvider for PineconeProvider { + type Credentials = PineconeCredentials; + type Params = PineconeParams; + + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { let client_config = PineconeClientConfig { - api_key: Some(config.api_key.clone()), + api_key: Some(credentials.api_key), ..Default::default() }; @@ -32,7 +39,7 @@ impl PineconeProvider { .map_err(|e| Error::connection(e.to_string()))?; let index_description = client - .describe_index(&config.index) + .describe_index(¶ms.index) .await .map_err(|e| Error::connection(format!("Failed to describe index: {}", e)))?; @@ -45,13 +52,15 @@ impl PineconeProvider { Ok(Self { index: Mutex::new(index), - config: config.clone(), + params, }) } +} +impl PineconeProvider { pub(crate) fn get_namespace(&self, collection: &str) -> Namespace { if collection.is_empty() { - self.config + self.params .namespace .as_ref() .map(|ns| Namespace::from(ns.as_str())) @@ -63,7 +72,7 @@ impl PineconeProvider { /// Returns the configured namespace. pub fn namespace(&self) -> Option<&str> { - self.config.namespace.as_deref() + self.params.namespace.as_deref() } /// Searches for similar vectors. diff --git a/crates/nvisy-dal/src/provider/pinecone/output.rs b/crates/nvisy-dal/src/provider/pinecone/output.rs index 0c9f5fa..39629e1 100644 --- a/crates/nvisy-dal/src/provider/pinecone/output.rs +++ b/crates/nvisy-dal/src/provider/pinecone/output.rs @@ -15,7 +15,7 @@ impl DataOutput for PineconeProvider { async fn write(&self, items: Vec) -> Result<()> { let namespace = self .namespace() - .map(|ns| pinecone_sdk::models::Namespace::from(ns)) + .map(pinecone_sdk::models::Namespace::from) .unwrap_or_default(); let pinecone_vectors: Vec = items diff --git a/crates/nvisy-dal/src/provider/postgres/config.rs b/crates/nvisy-dal/src/provider/postgres/config.rs index 2541c76..33b12aa 100644 --- a/crates/nvisy-dal/src/provider/postgres/config.rs +++ b/crates/nvisy-dal/src/provider/postgres/config.rs @@ -1,39 +1,20 @@ -//! PostgreSQL configuration. +//! PostgreSQL configuration types. use serde::{Deserialize, Serialize}; -/// PostgreSQL configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PostgresConfig { +/// PostgreSQL credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PostgresCredentials { /// Connection string (e.g., "postgresql://user:pass@host:5432/db"). pub connection_string: String, - /// Default table name. - #[serde(skip_serializing_if = "Option::is_none")] - pub table: Option, - /// Default schema. - #[serde(skip_serializing_if = "Option::is_none")] - pub schema: Option, } -impl PostgresConfig { - /// Creates a new PostgreSQL configuration. - pub fn new(connection_string: impl Into) -> Self { - Self { - connection_string: connection_string.into(), - table: None, - schema: None, - } - } - - /// Sets the default table. - pub fn with_table(mut self, table: impl Into) -> Self { - self.table = Some(table.into()); - self - } - - /// Sets the default schema. - pub fn with_schema(mut self, schema: impl Into) -> Self { - self.schema = Some(schema.into()); - self - } +/// PostgreSQL parameters (non-sensitive). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct PostgresParams { + /// Table name. + pub table: String, + /// Schema name. + #[serde(skip_serializing_if = "Option::is_none")] + pub schema: Option, } diff --git a/crates/nvisy-dal/src/provider/postgres/input.rs b/crates/nvisy-dal/src/provider/postgres/input.rs index 1f095f5..a7f549b 100644 --- a/crates/nvisy-dal/src/provider/postgres/input.rs +++ b/crates/nvisy-dal/src/provider/postgres/input.rs @@ -12,8 +12,8 @@ use crate::error::{Error, Result}; #[async_trait] impl DataInput for PostgresProvider { - type Item = Record; type Context = RelationalContext; + type Item = Record; async fn read(&self, ctx: &RelationalContext) -> Result> { let prefix = ctx.table.as_deref().unwrap_or(""); diff --git a/crates/nvisy-dal/src/provider/postgres/mod.rs b/crates/nvisy-dal/src/provider/postgres/mod.rs index eaeb080..1ded3e6 100644 --- a/crates/nvisy-dal/src/provider/postgres/mod.rs +++ b/crates/nvisy-dal/src/provider/postgres/mod.rs @@ -4,11 +4,11 @@ mod config; mod input; mod output; -pub use config::PostgresConfig; - +pub use config::{PostgresCredentials, PostgresParams}; use opendal::{Operator, services}; -use crate::error::{Error, Result}; +use crate::core::IntoProvider; +use crate::error::Error; /// PostgreSQL provider for relational data. #[derive(Clone)] @@ -16,18 +16,21 @@ pub struct PostgresProvider { operator: Operator, } -impl PostgresProvider { - /// Creates a new PostgreSQL provider. - pub fn new(config: &PostgresConfig) -> Result { - let mut builder = - services::Postgresql::default().connection_string(&config.connection_string); - - if let Some(ref table) = config.table { - builder = builder.table(table); - } - - if let Some(ref root) = config.schema { - builder = builder.root(root); +#[async_trait::async_trait] +impl IntoProvider for PostgresProvider { + type Credentials = PostgresCredentials; + type Params = PostgresParams; + + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let mut builder = services::Postgresql::default() + .connection_string(&credentials.connection_string) + .table(¶ms.table); + + if let Some(ref schema) = params.schema { + builder = builder.root(schema); } let operator = Operator::new(builder) diff --git a/crates/nvisy-dal/src/provider/qdrant/config.rs b/crates/nvisy-dal/src/provider/qdrant/config.rs index c0e33f2..176d5fb 100644 --- a/crates/nvisy-dal/src/provider/qdrant/config.rs +++ b/crates/nvisy-dal/src/provider/qdrant/config.rs @@ -1,49 +1,23 @@ -//! Qdrant configuration. +//! Qdrant configuration types. use serde::{Deserialize, Serialize}; -/// Qdrant configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct QdrantConfig { +/// Qdrant credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct QdrantCredentials { /// Qdrant server URL (e.g., "http://localhost:6334"). pub url: String, /// API key for authentication. #[serde(skip_serializing_if = "Option::is_none")] pub api_key: Option, - /// Default collection name. - #[serde(skip_serializing_if = "Option::is_none")] - pub collection: Option, +} + +/// Qdrant parameters (non-sensitive). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct QdrantParams { + /// Collection name. + pub collection: String, /// Vector dimensions. #[serde(skip_serializing_if = "Option::is_none")] pub dimensions: Option, } - -impl QdrantConfig { - /// Creates a new Qdrant configuration. - pub fn new(url: impl Into) -> Self { - Self { - url: url.into(), - api_key: None, - collection: None, - dimensions: None, - } - } - - /// Sets the API key. - pub fn with_api_key(mut self, api_key: impl Into) -> Self { - self.api_key = Some(api_key.into()); - self - } - - /// Sets the default collection. - pub fn with_collection(mut self, collection: impl Into) -> Self { - self.collection = Some(collection.into()); - self - } - - /// Sets the vector dimensions. - pub fn with_dimensions(mut self, dimensions: usize) -> Self { - self.dimensions = Some(dimensions); - self - } -} diff --git a/crates/nvisy-dal/src/provider/qdrant/mod.rs b/crates/nvisy-dal/src/provider/qdrant/mod.rs index 46df2e2..c295b87 100644 --- a/crates/nvisy-dal/src/provider/qdrant/mod.rs +++ b/crates/nvisy-dal/src/provider/qdrant/mod.rs @@ -5,7 +5,7 @@ mod output; use std::collections::HashMap; -pub use config::QdrantConfig; +pub use config::{QdrantCredentials, QdrantParams}; use qdrant_client::Qdrant; use qdrant_client::qdrant::vectors_config::Config as VectorsConfig; use qdrant_client::qdrant::with_payload_selector::SelectorOptions; @@ -15,28 +15,34 @@ use qdrant_client::qdrant::{ VectorParamsBuilder, }; +use crate::core::IntoProvider; use crate::error::{Error, Result}; /// Qdrant provider for vector storage. pub struct QdrantProvider { client: Qdrant, - config: QdrantConfig, + params: QdrantParams, } -impl QdrantProvider { - /// Creates a new Qdrant provider. - pub async fn new(config: &QdrantConfig) -> Result { - let client = Qdrant::from_url(&config.url) - .api_key(config.api_key.clone()) +#[async_trait::async_trait] +impl IntoProvider for QdrantProvider { + type Credentials = QdrantCredentials; + type Params = QdrantParams; + + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let client = Qdrant::from_url(&credentials.url) + .api_key(credentials.api_key) .build() .map_err(|e| Error::connection(e.to_string()))?; - Ok(Self { - client, - config: config.clone(), - }) + Ok(Self { client, params }) } +} +impl QdrantProvider { /// Ensures a collection exists, creating it if necessary. pub(crate) async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { let exists = self @@ -62,8 +68,8 @@ impl QdrantProvider { } /// Returns the configured collection name. - pub fn collection(&self) -> Option<&str> { - self.config.collection.as_deref() + pub fn collection(&self) -> &str { + &self.params.collection } /// Searches for similar vectors. diff --git a/crates/nvisy-dal/src/provider/qdrant/output.rs b/crates/nvisy-dal/src/provider/qdrant/output.rs index d3ffc4f..6319c86 100644 --- a/crates/nvisy-dal/src/provider/qdrant/output.rs +++ b/crates/nvisy-dal/src/provider/qdrant/output.rs @@ -19,9 +19,7 @@ impl DataOutput for QdrantProvider { return Ok(()); } - let collection = self - .collection() - .ok_or_else(|| Error::invalid_input("Collection name required in provider config"))?; + let collection = self.collection(); let dimensions = items .first() diff --git a/crates/nvisy-dal/src/provider/s3/config.rs b/crates/nvisy-dal/src/provider/s3/config.rs index b938ee5..81a8e03 100644 --- a/crates/nvisy-dal/src/provider/s3/config.rs +++ b/crates/nvisy-dal/src/provider/s3/config.rs @@ -1,61 +1,27 @@ -//! Amazon S3 configuration. +//! Amazon S3 configuration types. use serde::{Deserialize, Serialize}; -/// Amazon S3 configuration. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct S3Config { - /// Bucket name. - pub bucket: String, +/// Amazon S3 credentials (sensitive). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct S3Credentials { /// AWS region. pub region: String, - /// Custom endpoint URL (for S3-compatible storage like MinIO, R2). - #[serde(skip_serializing_if = "Option::is_none")] - pub endpoint: Option, /// Access key ID. - #[serde(skip_serializing_if = "Option::is_none")] - pub access_key_id: Option, + pub access_key_id: String, /// Secret access key. + pub secret_access_key: String, + /// Custom endpoint URL (for S3-compatible storage like MinIO, R2). #[serde(skip_serializing_if = "Option::is_none")] - pub secret_access_key: Option, + pub endpoint: Option, +} + +/// Amazon S3 parameters (non-sensitive). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct S3Params { + /// Bucket name. + pub bucket: String, /// Path prefix within the bucket. #[serde(skip_serializing_if = "Option::is_none")] pub prefix: Option, } - -impl S3Config { - /// Creates a new S3 configuration. - pub fn new(bucket: impl Into, region: impl Into) -> Self { - Self { - bucket: bucket.into(), - region: region.into(), - endpoint: None, - access_key_id: None, - secret_access_key: None, - prefix: None, - } - } - - /// Sets the custom endpoint (for S3-compatible storage). - pub fn with_endpoint(mut self, endpoint: impl Into) -> Self { - self.endpoint = Some(endpoint.into()); - self - } - - /// Sets the access credentials. - pub fn with_credentials( - mut self, - access_key_id: impl Into, - secret_access_key: impl Into, - ) -> Self { - self.access_key_id = Some(access_key_id.into()); - self.secret_access_key = Some(secret_access_key.into()); - self - } - - /// Sets the path prefix. - pub fn with_prefix(mut self, prefix: impl Into) -> Self { - self.prefix = Some(prefix.into()); - self - } -} diff --git a/crates/nvisy-dal/src/provider/s3/input.rs b/crates/nvisy-dal/src/provider/s3/input.rs index 2c5655f..dcfb8a1 100644 --- a/crates/nvisy-dal/src/provider/s3/input.rs +++ b/crates/nvisy-dal/src/provider/s3/input.rs @@ -10,8 +10,8 @@ use crate::error::{Error, Result}; #[async_trait] impl DataInput for S3Provider { - type Item = Blob; type Context = ObjectContext; + type Item = Blob; async fn read(&self, ctx: &ObjectContext) -> Result> { let prefix = ctx.prefix.as_deref().unwrap_or(""); diff --git a/crates/nvisy-dal/src/provider/s3/mod.rs b/crates/nvisy-dal/src/provider/s3/mod.rs index d25e8c4..30557d5 100644 --- a/crates/nvisy-dal/src/provider/s3/mod.rs +++ b/crates/nvisy-dal/src/provider/s3/mod.rs @@ -4,11 +4,11 @@ mod config; mod input; mod output; -pub use config::S3Config; - +pub use config::{S3Credentials, S3Params}; use opendal::{Operator, services}; -use crate::error::{Error, Result}; +use crate::core::IntoProvider; +use crate::error::Error; /// Amazon S3 provider for blob storage. #[derive(Clone)] @@ -16,26 +16,26 @@ pub struct S3Provider { operator: Operator, } -impl S3Provider { - /// Creates a new S3 provider. - pub fn new(config: &S3Config) -> Result { +#[async_trait::async_trait] +impl IntoProvider for S3Provider { + type Credentials = S3Credentials; + type Params = S3Params; + + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { let mut builder = services::S3::default() - .bucket(&config.bucket) - .region(&config.region); + .bucket(¶ms.bucket) + .region(&credentials.region) + .access_key_id(&credentials.access_key_id) + .secret_access_key(&credentials.secret_access_key); - if let Some(ref endpoint) = config.endpoint { + if let Some(ref endpoint) = credentials.endpoint { builder = builder.endpoint(endpoint); } - if let Some(ref access_key_id) = config.access_key_id { - builder = builder.access_key_id(access_key_id); - } - - if let Some(ref secret_access_key) = config.secret_access_key { - builder = builder.secret_access_key(secret_access_key); - } - - if let Some(ref prefix) = config.prefix { + if let Some(ref prefix) = params.prefix { builder = builder.root(prefix); } diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml index de71b8f..17d5808 100644 --- a/crates/nvisy-rig/Cargo.toml +++ b/crates/nvisy-rig/Cargo.toml @@ -27,6 +27,7 @@ ollama = [] [dependencies] # Internal crates +nvisy-core = { path = "../nvisy-core" } nvisy-nats = { workspace = true } nvisy-postgres = { workspace = true } diff --git a/crates/nvisy-rig/src/chat/service.rs b/crates/nvisy-rig/src/chat/service.rs index 3985ea1..880ab90 100644 --- a/crates/nvisy-rig/src/chat/service.rs +++ b/crates/nvisy-rig/src/chat/service.rs @@ -6,11 +6,11 @@ use nvisy_nats::NatsClient; use uuid::Uuid; use super::ChatStream; -use crate::Result; use crate::provider::{CompletionModel, EmbeddingProvider}; use crate::session::{CreateSession, Session, SessionStore}; use crate::tool::ToolRegistry; use crate::tool::edit::ApplyResult; +use crate::{Error, Result}; /// Inner state for [`ChatService`]. struct ChatServiceInner { @@ -137,7 +137,12 @@ impl ChatService { /// Generates embeddings for text. pub async fn embed(&self, text: &str) -> Result> { - let embedding = self.inner.embedding_provider.embed_text(text).await?; + let embedding = self + .inner + .embedding_provider + .embed_text(text) + .await + .map_err(|e| Error::provider("embedding", e.to_string()))?; Ok(embedding.vec) } diff --git a/crates/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs index b203572..ed4a0b4 100644 --- a/crates/nvisy-rig/src/error.rs +++ b/crates/nvisy-rig/src/error.rs @@ -78,3 +78,25 @@ impl Error { matches!(self, Self::Provider { .. }) } } + +impl From for nvisy_core::Error { + fn from(err: Error) -> Self { + let (kind, message) = match &err { + Error::Provider { provider, message } => ( + nvisy_core::ErrorKind::ExternalError, + format!("{}: {}", provider, message), + ), + Error::Session(msg) => (nvisy_core::ErrorKind::InvalidInput, msg.clone()), + Error::Retrieval(msg) => (nvisy_core::ErrorKind::ExternalError, msg.clone()), + Error::Embedding(_) => (nvisy_core::ErrorKind::ExternalError, err.to_string()), + Error::Completion(_) => (nvisy_core::ErrorKind::ExternalError, err.to_string()), + Error::Prompt(_) => (nvisy_core::ErrorKind::InvalidInput, err.to_string()), + Error::Config(msg) => (nvisy_core::ErrorKind::Configuration, msg.clone()), + Error::Parse(msg) => (nvisy_core::ErrorKind::Serialization, msg.clone()), + }; + + nvisy_core::Error::new(kind) + .with_message(message) + .with_source(err) + } +} diff --git a/crates/nvisy-rig/src/provider/completion/provider.rs b/crates/nvisy-rig/src/provider/completion/provider.rs index f3ad81b..e1b3d74 100644 --- a/crates/nvisy-rig/src/provider/completion/provider.rs +++ b/crates/nvisy-rig/src/provider/completion/provider.rs @@ -2,6 +2,7 @@ use std::sync::Arc; +use nvisy_core::IntoProvider; #[cfg(feature = "ollama")] use rig::client::Nothing; use rig::completion::{AssistantContent, CompletionError, CompletionModel as RigCompletionModel}; @@ -14,7 +15,7 @@ use rig::providers::{anthropic, cohere, gemini, openai, perplexity}; use super::credentials::CompletionCredentials; use super::model::{AnthropicModel, CompletionModel}; -use crate::{Error, Result}; +use crate::Error; /// Completion provider that wraps different rig completion model implementations. /// @@ -50,17 +51,18 @@ pub(crate) enum CompletionService { }, } -impl CompletionProvider { - /// Returns a reference to the inner provider. - pub(crate) fn inner(&self) -> &CompletionService { - &self.0 - } +#[async_trait::async_trait] +impl IntoProvider for CompletionProvider { + type Credentials = CompletionCredentials; + type Params = CompletionModel; - /// Creates a new completion provider from credentials and model. - pub fn new(credentials: &CompletionCredentials, model: &CompletionModel) -> Result { - let inner = match (credentials, model) { + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let inner = match (credentials, params) { (CompletionCredentials::OpenAi { api_key }, CompletionModel::OpenAi(m)) => { - let client = openai::Client::new(api_key) + let client = openai::Client::new(&api_key) .map_err(|e| Error::provider("openai", e.to_string()))? .completions_api(); CompletionService::OpenAi { @@ -69,7 +71,7 @@ impl CompletionProvider { } } (CompletionCredentials::Anthropic { api_key }, CompletionModel::Anthropic(m)) => { - let client = anthropic::Client::new(api_key) + let client = anthropic::Client::new(&api_key) .map_err(|e| Error::provider("anthropic", e.to_string()))?; CompletionService::Anthropic { model: client.completion_model(m.as_ref()), @@ -77,7 +79,7 @@ impl CompletionProvider { } } (CompletionCredentials::Cohere { api_key }, CompletionModel::Cohere(m)) => { - let client = cohere::Client::new(api_key) + let client = cohere::Client::new(&api_key) .map_err(|e| Error::provider("cohere", e.to_string()))?; CompletionService::Cohere { model: client.completion_model(m.as_ref()), @@ -85,7 +87,7 @@ impl CompletionProvider { } } (CompletionCredentials::Gemini { api_key }, CompletionModel::Gemini(m)) => { - let client = gemini::Client::new(api_key) + let client = gemini::Client::new(&api_key) .map_err(|e| Error::provider("gemini", e.to_string()))?; CompletionService::Gemini { model: client.completion_model(m.as_ref()), @@ -93,7 +95,7 @@ impl CompletionProvider { } } (CompletionCredentials::Perplexity { api_key }, CompletionModel::Perplexity(m)) => { - let client = perplexity::Client::new(api_key) + let client = perplexity::Client::new(&api_key) .map_err(|e| Error::provider("perplexity", e.to_string()))?; CompletionService::Perplexity { model: client.completion_model(m.as_ref()), @@ -104,7 +106,7 @@ impl CompletionProvider { (CompletionCredentials::Ollama { base_url }, CompletionModel::Ollama(model_name)) => { let client = ollama::Client::builder() .api_key(Nothing) - .base_url(base_url) + .base_url(&base_url) .build() .map_err(|e| Error::provider("ollama", e.to_string()))?; CompletionService::Ollama { @@ -113,14 +115,21 @@ impl CompletionProvider { } } #[allow(unreachable_patterns)] - _ => return Err(Error::config("mismatched credentials and model provider")), + _ => return Err(Error::config("mismatched credentials and model provider").into()), }; Ok(Self(Arc::new(inner))) } +} + +impl CompletionProvider { + /// Returns a reference to the inner provider. + pub(crate) fn inner(&self) -> &CompletionService { + &self.0 + } /// Creates an Ollama completion provider (convenience for local development). #[cfg(feature = "ollama")] - pub fn ollama(base_url: &str, model_name: &str) -> Result { + pub fn ollama(base_url: &str, model_name: &str) -> nvisy_core::Result { let client = ollama::Client::builder() .api_key(Nothing) .base_url(base_url) @@ -133,7 +142,7 @@ impl CompletionProvider { } /// Creates an Anthropic completion provider with a specific model. - pub fn anthropic(api_key: &str, model: AnthropicModel) -> Result { + pub fn anthropic(api_key: &str, model: AnthropicModel) -> nvisy_core::Result { let client = anthropic::Client::new(api_key) .map_err(|e| Error::provider("anthropic", e.to_string()))?; Ok(Self(Arc::new(CompletionService::Anthropic { @@ -169,9 +178,15 @@ impl CompletionProvider { } /// Sends a completion request with the given prompt and chat history. - pub async fn complete(&self, prompt: &str, chat_history: Vec) -> Result { + pub async fn complete( + &self, + prompt: &str, + chat_history: Vec, + ) -> nvisy_core::Result { let model_name = self.model_name().to_string(); - let map_err = |e: CompletionError| Error::provider(&model_name, e.to_string()); + let map_err = |e: CompletionError| { + nvisy_core::Error::from(Error::provider(&model_name, e.to_string())) + }; match self.0.as_ref() { CompletionService::OpenAi { model, .. } => model diff --git a/crates/nvisy-rig/src/provider/embedding/provider.rs b/crates/nvisy-rig/src/provider/embedding/provider.rs index b379705..bb4c4d4 100644 --- a/crates/nvisy-rig/src/provider/embedding/provider.rs +++ b/crates/nvisy-rig/src/provider/embedding/provider.rs @@ -2,6 +2,7 @@ use std::sync::Arc; +use nvisy_core::IntoProvider; #[cfg(feature = "ollama")] use rig::client::Nothing; use rig::embeddings::{Embedding, EmbeddingModel as RigEmbeddingModel}; @@ -14,7 +15,7 @@ use super::credentials::EmbeddingCredentials; use super::model::EmbeddingModel; #[cfg(feature = "ollama")] use super::model::OllamaEmbeddingModel; -use crate::{Error, Result}; +use crate::Error; /// Default maximum documents per embedding request. /// @@ -48,17 +49,18 @@ pub(crate) enum EmbeddingService { }, } -impl EmbeddingProvider { - /// Returns a reference to the inner provider. - pub(crate) fn inner(&self) -> &EmbeddingService { - &self.0 - } +#[async_trait::async_trait] +impl IntoProvider for EmbeddingProvider { + type Credentials = EmbeddingCredentials; + type Params = EmbeddingModel; - /// Creates a new embedding provider from credentials and model. - pub fn new(credentials: &EmbeddingCredentials, model: &EmbeddingModel) -> Result { - let inner = match (credentials, model) { + async fn create( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let inner = match (credentials, params) { (EmbeddingCredentials::OpenAi { api_key }, EmbeddingModel::OpenAi(m)) => { - let client = openai::Client::new(api_key) + let client = openai::Client::new(&api_key) .map_err(|e| Error::provider("openai", e.to_string()))?; EmbeddingService::OpenAi { model: client.embedding_model_with_ndims(m.as_ref(), m.dimensions()), @@ -66,7 +68,7 @@ impl EmbeddingProvider { } } (EmbeddingCredentials::Cohere { api_key }, EmbeddingModel::Cohere(m)) => { - let client = cohere::Client::new(api_key) + let client = cohere::Client::new(&api_key) .map_err(|e| Error::provider("cohere", e.to_string()))?; EmbeddingService::Cohere { model: client.embedding_model_with_ndims( @@ -78,7 +80,7 @@ impl EmbeddingProvider { } } (EmbeddingCredentials::Gemini { api_key }, EmbeddingModel::Gemini(m)) => { - let client = gemini::Client::new(api_key) + let client = gemini::Client::new(&api_key) .map_err(|e| Error::provider("gemini", e.to_string()))?; EmbeddingService::Gemini { model: client.embedding_model_with_ndims(m.as_ref(), m.dimensions()), @@ -89,7 +91,7 @@ impl EmbeddingProvider { (EmbeddingCredentials::Ollama { base_url }, EmbeddingModel::Ollama(m)) => { let client = ollama::Client::builder() .api_key(Nothing) - .base_url(base_url) + .base_url(&base_url) .build() .map_err(|e| Error::provider("ollama", e.to_string()))?; EmbeddingService::Ollama { @@ -99,14 +101,21 @@ impl EmbeddingProvider { } } #[allow(unreachable_patterns)] - _ => return Err(Error::config("mismatched credentials and model provider")), + _ => return Err(Error::config("mismatched credentials and model provider").into()), }; Ok(Self(Arc::new(inner))) } +} + +impl EmbeddingProvider { + /// Returns a reference to the inner provider. + pub(crate) fn inner(&self) -> &EmbeddingService { + &self.0 + } /// Creates an Ollama embedding provider (convenience for local development). #[cfg(feature = "ollama")] - pub fn ollama(base_url: &str, model: OllamaEmbeddingModel) -> Result { + pub fn ollama(base_url: &str, model: OllamaEmbeddingModel) -> nvisy_core::Result { let client = ollama::Client::builder() .api_key(Nothing) .base_url(base_url) @@ -144,10 +153,10 @@ impl EmbeddingProvider { /// Embed a single text document. /// /// This is a convenience method that delegates to the trait implementation. - pub async fn embed_text(&self, text: &str) -> Result { + pub async fn embed_text(&self, text: &str) -> nvisy_core::Result { RigEmbeddingModel::embed_text(self, text) .await - .map_err(|e| Error::provider(self.provider_name(), e.to_string())) + .map_err(|e| Error::provider(self.provider_name(), e.to_string()).into()) } /// Embed multiple text documents. @@ -156,10 +165,10 @@ impl EmbeddingProvider { pub async fn embed_texts( &self, texts: impl IntoIterator + Send, - ) -> Result> { + ) -> nvisy_core::Result> { RigEmbeddingModel::embed_texts(self, texts) .await - .map_err(|e| Error::provider(self.provider_name(), e.to_string())) + .map_err(|e| Error::provider(self.provider_name(), e.to_string()).into()) } } diff --git a/crates/nvisy-rig/src/rag/indexer/mod.rs b/crates/nvisy-rig/src/rag/indexer/mod.rs index 081c5a1..e5e2a00 100644 --- a/crates/nvisy-rig/src/rag/indexer/mod.rs +++ b/crates/nvisy-rig/src/rag/indexer/mod.rs @@ -79,7 +79,11 @@ impl Indexer { let chunk_count = texts.len(); tracing::debug!(chunk_count, "embedding chunks"); - let embeddings = self.provider.embed_texts(texts).await?; + let embeddings = self + .provider + .embed_texts(texts) + .await + .map_err(|e| Error::provider("embedding", e.to_string()))?; if embeddings.len() != chunk_count { return Err(Error::config(format!( diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs index a90ef00..976e5b6 100644 --- a/crates/nvisy-rig/src/rag/searcher/mod.rs +++ b/crates/nvisy-rig/src/rag/searcher/mod.rs @@ -60,7 +60,11 @@ impl Searcher { /// Searches for relevant chunks without loading content. pub async fn query(&self, query: &str, limit: u32) -> Result> { - let embedding = self.provider.embed_text(query).await?; + let embedding = self + .provider + .embed_text(query) + .await + .map_err(|e| Error::provider("embedding", e.to_string()))?; let query_vector: Vector = embedding .vec diff --git a/crates/nvisy-rig/src/service/config.rs b/crates/nvisy-rig/src/service/config.rs index 1fc7407..4f1dbed 100644 --- a/crates/nvisy-rig/src/service/config.rs +++ b/crates/nvisy-rig/src/service/config.rs @@ -4,8 +4,6 @@ use clap::Args; use serde::{Deserialize, Serialize}; -#[cfg(feature = "ollama")] -use crate::Result; #[cfg(feature = "ollama")] use crate::provider::{EmbeddingProvider, OllamaEmbeddingModel}; @@ -62,7 +60,7 @@ impl Default for RigConfig { #[cfg(feature = "ollama")] impl RigConfig { /// Creates an Ollama embedding provider from this configuration. - pub(crate) fn embedding_provider(&self) -> Result { + pub(crate) fn embedding_provider(&self) -> nvisy_core::Result { let model = OllamaEmbeddingModel::new( &self.ollama_embedding_model, self.ollama_embedding_dimensions, diff --git a/crates/nvisy-rig/src/service/rig.rs b/crates/nvisy-rig/src/service/rig.rs index e251709..bde2b47 100644 --- a/crates/nvisy-rig/src/service/rig.rs +++ b/crates/nvisy-rig/src/service/rig.rs @@ -6,9 +6,9 @@ use nvisy_nats::NatsClient; use nvisy_postgres::PgClient; use super::RigConfig; -use crate::Result; use crate::chat::ChatService; use crate::rag::{RagConfig, RagService}; +use crate::{Error, Result}; /// Inner state for [`RigService`]. struct RigServiceInner { @@ -25,7 +25,9 @@ pub struct RigService { impl RigService { /// Creates a new RigService from configuration. pub async fn new(config: RigConfig, db: PgClient, nats: NatsClient) -> Result { - let embedding_provider = config.embedding_provider()?; + let embedding_provider = config + .embedding_provider() + .map_err(|e| Error::config(e.to_string()))?; let rag_config = RagConfig::default(); let rag = RagService::new(rag_config, embedding_provider.clone(), db, nats.clone()).await?; diff --git a/crates/nvisy-runtime/src/definition/input.rs b/crates/nvisy-runtime/src/definition/input.rs index 121352e..692b850 100644 --- a/crates/nvisy-runtime/src/definition/input.rs +++ b/crates/nvisy-runtime/src/definition/input.rs @@ -3,13 +3,14 @@ use serde::{Deserialize, Serialize}; use super::route::CacheSlot; -use crate::provider::InputProviderParams; +use crate::provider::InputProviderConfig; /// Input provider definition for workflow nodes. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct InputProvider { - /// Provider parameters (contains credentials_id). - pub provider: InputProviderParams, + /// Provider configuration (credentials_id + params). + #[serde(flatten)] + pub provider: InputProviderConfig, } /// Input node definition - source of data for the workflow. @@ -23,8 +24,8 @@ pub enum Input { } impl Input { - /// Creates a new input from a provider. - pub fn from_provider(provider: InputProviderParams) -> Self { + /// Creates a new input from a provider configuration. + pub fn from_provider(provider: InputProviderConfig) -> Self { Self::Provider(InputProvider { provider }) } diff --git a/crates/nvisy-runtime/src/definition/output.rs b/crates/nvisy-runtime/src/definition/output.rs index a41c38a..bd33b8c 100644 --- a/crates/nvisy-runtime/src/definition/output.rs +++ b/crates/nvisy-runtime/src/definition/output.rs @@ -3,13 +3,14 @@ use serde::{Deserialize, Serialize}; use super::route::CacheSlot; -use crate::provider::OutputProviderParams; +use crate::provider::OutputProviderConfig; /// Output provider definition for workflow nodes. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct OutputProvider { - /// Provider parameters (contains credentials_id). - pub provider: OutputProviderParams, + /// Provider configuration (credentials_id + params). + #[serde(flatten)] + pub provider: OutputProviderConfig, } /// Output node definition - destination for workflow data. @@ -23,8 +24,8 @@ pub enum Output { } impl Output { - /// Creates a new output from a provider. - pub fn from_provider(provider: OutputProviderParams) -> Self { + /// Creates a new output from a provider configuration. + pub fn from_provider(provider: OutputProviderConfig) -> Self { Self::Provider(OutputProvider { provider }) } diff --git a/crates/nvisy-runtime/src/engine/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs index 0a44ccb..28ba07b 100644 --- a/crates/nvisy-runtime/src/engine/compiler.rs +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -12,11 +12,11 @@ use std::collections::HashMap; -use super::context::Context; use nvisy_rig::agent::Agents; use nvisy_rig::provider::CompletionProvider; use petgraph::graph::{DiGraph, NodeIndex}; +use super::context::Context; use crate::definition::{Input, NodeId, NodeKind, Output, Workflow}; use crate::error::{Error, Result}; use crate::graph::{ @@ -26,7 +26,7 @@ use crate::graph::{ }; use crate::provider::{ CompletionProviderParams, CredentialsRegistry, EmbeddingProviderParams, InputProvider, - InputProviderParams, IntoProvider, OutputProviderParams, + InputProviderConfig, OutputProviderConfig, }; /// Workflow compiler that transforms definitions into executable graphs. @@ -246,13 +246,13 @@ impl<'a> WorkflowCompiler<'a> { } } - /// Creates an input stream from provider parameters. + /// Creates an input stream from provider configuration. async fn create_provider_input_stream( &self, - params: &InputProviderParams, + config: &InputProviderConfig, ) -> Result { - let creds = self.registry.get(params.credentials_id())?; - let provider = params.clone().into_provider(creds.clone()).await?; + let creds = self.registry.get(config.credentials_id)?; + let provider = config.params.clone().into_provider(creds.clone()).await?; let stream = self.read_from_provider(&provider).await?; @@ -299,13 +299,13 @@ impl<'a> WorkflowCompiler<'a> { } } - /// Creates an output stream from provider parameters. + /// Creates an output stream from provider configuration. async fn create_provider_output_stream( &self, - params: &OutputProviderParams, + config: &OutputProviderConfig, ) -> Result { - let creds = self.registry.get(params.credentials_id())?; - let provider = params.clone().into_provider(creds.clone()).await?; + let creds = self.registry.get(config.credentials_id)?; + let provider = config.params.clone().into_provider(creds.clone()).await?; let sink = provider.write_sink(); Ok(OutputStream::new(sink)) diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index a94f877..a172143 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -2,13 +2,12 @@ use std::sync::Arc; -use super::context::Context; use futures::{SinkExt, StreamExt}; use tokio::sync::Semaphore; use super::EngineConfig; use super::compiler::WorkflowCompiler; -use super::context::ExecutionContext; +use super::context::{Context, ExecutionContext}; use crate::definition::{NodeId, Workflow}; use crate::error::{Error, Result}; use crate::graph::{CompiledGraph, CompiledNode, InputStream, OutputStream, Process}; diff --git a/crates/nvisy-runtime/src/graph/route/mod.rs b/crates/nvisy-runtime/src/graph/route/mod.rs index 985ac5c..02a4390 100644 --- a/crates/nvisy-runtime/src/graph/route/mod.rs +++ b/crates/nvisy-runtime/src/graph/route/mod.rs @@ -5,7 +5,6 @@ mod language; pub use file_category::FileCategoryEvaluator; pub use language::LanguageEvaluator; - use nvisy_dal::AnyDataValue; use crate::definition::SwitchDef; diff --git a/crates/nvisy-runtime/src/provider/ai.rs b/crates/nvisy-runtime/src/provider/ai.rs index cee8ffe..451062b 100644 --- a/crates/nvisy-runtime/src/provider/ai.rs +++ b/crates/nvisy-runtime/src/provider/ai.rs @@ -9,7 +9,7 @@ use super::ProviderCredentials; use super::backend::{ AnthropicCompletionParams, AnthropicCredentials, CohereCompletionParams, CohereCredentials, CohereEmbeddingParams, GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams, - IntoProvider, OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams, + IntoAiProvider as _, OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams, PerplexityCompletionParams, PerplexityCredentials, }; use crate::error::{Error, Result}; @@ -54,12 +54,12 @@ impl CompletionProviderParams { } } -#[async_trait::async_trait] -impl IntoProvider for CompletionProviderParams { - type Credentials = ProviderCredentials; - type Output = CompletionProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { +impl CompletionProviderParams { + /// Creates a completion provider from these params and credentials. + pub async fn into_provider( + self, + credentials: ProviderCredentials, + ) -> Result { match (self, credentials) { (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c).await, (Self::Anthropic(p), ProviderCredentials::Anthropic(c)) => p.into_provider(c).await, @@ -116,12 +116,12 @@ impl EmbeddingProviderParams { } } -#[async_trait::async_trait] -impl IntoProvider for EmbeddingProviderParams { - type Credentials = ProviderCredentials; - type Output = EmbeddingProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { +impl EmbeddingProviderParams { + /// Creates an embedding provider from these params and credentials. + pub async fn into_provider( + self, + credentials: ProviderCredentials, + ) -> Result { match (self, credentials) { (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c).await, (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c).await, diff --git a/crates/nvisy-runtime/src/provider/backend/anthropic.rs b/crates/nvisy-runtime/src/provider/backend/anthropic.rs index 663c04a..d664df9 100644 --- a/crates/nvisy-runtime/src/provider/backend/anthropic.rs +++ b/crates/nvisy-runtime/src/provider/backend/anthropic.rs @@ -1,10 +1,11 @@ //! Anthropic provider. +use nvisy_core::IntoProvider; use nvisy_rig::provider::{AnthropicModel, CompletionProvider}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::IntoProvider; +use super::IntoAiProvider; use crate::error::{Error, Result}; /// Anthropic credentials. @@ -34,7 +35,7 @@ impl AnthropicCompletionParams { } #[async_trait::async_trait] -impl IntoProvider for AnthropicCompletionParams { +impl IntoAiProvider for AnthropicCompletionParams { type Credentials = AnthropicCredentials; type Output = CompletionProvider; @@ -43,6 +44,8 @@ impl IntoProvider for AnthropicCompletionParams { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::Anthropic(self.model); - CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) + CompletionProvider::create(model, rig_creds) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/azblob.rs b/crates/nvisy-runtime/src/provider/backend/azblob.rs deleted file mode 100644 index 19d2636..0000000 --- a/crates/nvisy-runtime/src/provider/backend/azblob.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! Azure Blob Storage provider. - -use nvisy_dal::provider::{AzblobConfig, AzblobProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// Azure Blob Storage credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AzblobCredentials { - /// Storage account name. - pub account_name: String, - /// Account key for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub account_key: Option, - /// SAS token for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub sas_token: Option, -} - -/// Azure Blob Storage parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct AzblobParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Container name. - pub container: String, - /// Path prefix within the container. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} - -#[async_trait::async_trait] -impl IntoProvider for AzblobParams { - type Credentials = AzblobCredentials; - type Output = AzblobProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let mut config = AzblobConfig::new(credentials.account_name, self.container); - - if let Some(account_key) = credentials.account_key { - config = config.with_account_key(account_key); - } - if let Some(sas_token) = credentials.sas_token { - config = config.with_sas_token(sas_token); - } - if let Some(prefix) = self.prefix { - config = config.with_prefix(prefix); - } - - AzblobProvider::new(&config).map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/cohere.rs b/crates/nvisy-runtime/src/provider/backend/cohere.rs index 85a0c09..2b23528 100644 --- a/crates/nvisy-runtime/src/provider/backend/cohere.rs +++ b/crates/nvisy-runtime/src/provider/backend/cohere.rs @@ -1,12 +1,13 @@ //! Cohere provider. +use nvisy_core::IntoProvider; use nvisy_rig::provider::{ CohereCompletionModel, CohereEmbeddingModel, CompletionProvider, EmbeddingProvider, }; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::IntoProvider; +use super::IntoAiProvider; use crate::error::{Error, Result}; /// Cohere credentials. @@ -36,7 +37,7 @@ impl CohereCompletionParams { } #[async_trait::async_trait] -impl IntoProvider for CohereCompletionParams { +impl IntoAiProvider for CohereCompletionParams { type Credentials = CohereCredentials; type Output = CompletionProvider; @@ -45,7 +46,9 @@ impl IntoProvider for CohereCompletionParams { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::Cohere(self.model); - CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) + CompletionProvider::create(model, rig_creds) + .await + .map_err(|e| Error::Internal(e.to_string())) } } @@ -69,7 +72,7 @@ impl CohereEmbeddingParams { } #[async_trait::async_trait] -impl IntoProvider for CohereEmbeddingParams { +impl IntoAiProvider for CohereEmbeddingParams { type Credentials = CohereCredentials; type Output = EmbeddingProvider; @@ -78,6 +81,8 @@ impl IntoProvider for CohereEmbeddingParams { api_key: credentials.api_key, }; let model = nvisy_rig::provider::EmbeddingModel::Cohere(self.model); - EmbeddingProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) + EmbeddingProvider::create(model, rig_creds) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/gcs.rs b/crates/nvisy-runtime/src/provider/backend/gcs.rs deleted file mode 100644 index b83b45e..0000000 --- a/crates/nvisy-runtime/src/provider/backend/gcs.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! Google Cloud Storage provider. - -use nvisy_dal::provider::{GcsConfig, GcsProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// Google Cloud Storage credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GcsCredentials { - /// Service account credentials JSON. - pub credentials_json: String, -} - -/// Google Cloud Storage parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct GcsParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Bucket name. - pub bucket: String, - /// Path prefix within the bucket. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} - -#[async_trait::async_trait] -impl IntoProvider for GcsParams { - type Credentials = GcsCredentials; - type Output = GcsProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let mut config = GcsConfig::new(self.bucket).with_credentials(credentials.credentials_json); - - if let Some(prefix) = self.prefix { - config = config.with_prefix(prefix); - } - - GcsProvider::new(&config).map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/gemini.rs b/crates/nvisy-runtime/src/provider/backend/gemini.rs index 8c38450..5ce401a 100644 --- a/crates/nvisy-runtime/src/provider/backend/gemini.rs +++ b/crates/nvisy-runtime/src/provider/backend/gemini.rs @@ -1,12 +1,13 @@ //! Google Gemini provider. +use nvisy_core::IntoProvider; use nvisy_rig::provider::{ CompletionProvider, EmbeddingProvider, GeminiCompletionModel, GeminiEmbeddingModel, }; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::IntoProvider; +use super::IntoAiProvider; use crate::error::{Error, Result}; /// Gemini credentials. @@ -36,7 +37,7 @@ impl GeminiCompletionParams { } #[async_trait::async_trait] -impl IntoProvider for GeminiCompletionParams { +impl IntoAiProvider for GeminiCompletionParams { type Credentials = GeminiCredentials; type Output = CompletionProvider; @@ -45,7 +46,9 @@ impl IntoProvider for GeminiCompletionParams { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::Gemini(self.model); - CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) + CompletionProvider::create(model, rig_creds) + .await + .map_err(|e| Error::Internal(e.to_string())) } } @@ -69,7 +72,7 @@ impl GeminiEmbeddingParams { } #[async_trait::async_trait] -impl IntoProvider for GeminiEmbeddingParams { +impl IntoAiProvider for GeminiEmbeddingParams { type Credentials = GeminiCredentials; type Output = EmbeddingProvider; @@ -78,6 +81,8 @@ impl IntoProvider for GeminiEmbeddingParams { api_key: credentials.api_key, }; let model = nvisy_rig::provider::EmbeddingModel::Gemini(self.model); - EmbeddingProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) + EmbeddingProvider::create(model, rig_creds) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/milvus.rs b/crates/nvisy-runtime/src/provider/backend/milvus.rs deleted file mode 100644 index 9893339..0000000 --- a/crates/nvisy-runtime/src/provider/backend/milvus.rs +++ /dev/null @@ -1,70 +0,0 @@ -//! Milvus vector database provider. - -use nvisy_dal::provider::{MilvusConfig, MilvusProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// Default Milvus port. -fn default_milvus_port() -> u16 { - 19530 -} - -/// Milvus credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MilvusCredentials { - /// Milvus server host. - pub host: String, - /// Milvus server port. - #[serde(default = "default_milvus_port")] - pub port: u16, - /// Username for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub username: Option, - /// Password for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub password: Option, -} - -/// Milvus parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MilvusParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Collection name. - pub collection: String, - /// Database name. - #[serde(skip_serializing_if = "Option::is_none")] - pub database: Option, - /// Vector dimensions. - #[serde(skip_serializing_if = "Option::is_none")] - pub dimensions: Option, -} - -#[async_trait::async_trait] -impl IntoProvider for MilvusParams { - type Credentials = MilvusCredentials; - type Output = MilvusProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let mut config = MilvusConfig::new(credentials.host) - .with_port(credentials.port) - .with_collection(self.collection); - - if let Some((username, password)) = credentials.username.zip(credentials.password) { - config = config.with_credentials(username, password); - } - if let Some(database) = self.database { - config = config.with_database(database); - } - if let Some(dimensions) = self.dimensions { - config = config.with_dimensions(dimensions); - } - - MilvusProvider::new(&config) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/mod.rs b/crates/nvisy-runtime/src/provider/backend/mod.rs index 09512b5..8b0a376 100644 --- a/crates/nvisy-runtime/src/provider/backend/mod.rs +++ b/crates/nvisy-runtime/src/provider/backend/mod.rs @@ -1,21 +1,22 @@ //! Backend provider implementations. //! -//! Each provider file contains credentials and params for a specific backend: +//! Storage and vector database providers are re-exported from `nvisy_dal`. +//! AI providers are defined locally in this module. //! -//! ## Storage backends +//! ## Storage backends (from nvisy_dal) //! - `s3` - Amazon S3 //! - `gcs` - Google Cloud Storage //! - `azblob` - Azure Blob Storage //! - `postgres` - PostgreSQL //! - `mysql` - MySQL //! -//! ## Vector databases +//! ## Vector databases (from nvisy_dal) //! - `qdrant` - Qdrant vector database //! - `pinecone` - Pinecone vector database //! - `milvus` - Milvus vector database //! - `pgvector` - pgvector (PostgreSQL extension) //! -//! ## AI providers +//! ## AI providers (local) //! - `openai` - OpenAI (completion + embedding) //! - `anthropic` - Anthropic (completion only) //! - `cohere` - Cohere (completion + embedding) @@ -24,47 +25,58 @@ use crate::error::Result; -// Storage backends -mod azblob; -mod gcs; -mod mysql; -mod postgres; -mod s3; - -// Vector databases -mod milvus; -mod pgvector; -mod pinecone; -mod qdrant; - -// AI providers +// AI providers (local implementations) mod anthropic; mod cohere; mod gemini; mod openai; mod perplexity; -// Storage backend exports +// Re-export storage backend types from nvisy_dal // AI provider exports pub use anthropic::{AnthropicCompletionParams, AnthropicCredentials}; -pub use azblob::{AzblobCredentials, AzblobParams}; pub use cohere::{CohereCompletionParams, CohereCredentials, CohereEmbeddingParams}; -pub use gcs::{GcsCredentials, GcsParams}; pub use gemini::{GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams}; -// Vector database exports -pub use milvus::{MilvusCredentials, MilvusParams}; -pub use mysql::{MysqlCredentials, MysqlParams}; +pub use nvisy_dal::provider::{ + // Object storage + AzblobCredentials, + AzblobParams, + AzblobProvider, + GcsCredentials, + GcsParams, + GcsProvider, + // Vector databases + MilvusCredentials, + MilvusParams, + MilvusProvider, + // Relational databases + MysqlCredentials, + MysqlParams, + MysqlProvider, + PgVectorCredentials, + PgVectorParams, + PgVectorProvider, + PineconeCredentials, + PineconeParams, + PineconeProvider, + PostgresCredentials, + PostgresParams, + PostgresProvider, + QdrantCredentials, + QdrantParams, + QdrantProvider, + S3Credentials, + S3Params, + S3Provider, +}; pub use openai::{OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams}; pub use perplexity::{PerplexityCompletionParams, PerplexityCredentials}; -pub use pgvector::{PgVectorCredentials, PgVectorParams}; -pub use pinecone::{PineconeCredentials, PineconeParams}; -pub use postgres::{PostgresCredentials, PostgresParams}; -pub use qdrant::{QdrantCredentials, QdrantParams}; -pub use s3::{S3Credentials, S3Params}; -/// Trait for provider parameters that can be combined with credentials to create a provider. +/// Trait for AI provider parameters that can be combined with credentials to create a provider. +/// +/// This is distinct from `nvisy_dal::IntoProvider` which is for storage/vector providers. #[async_trait::async_trait] -pub trait IntoProvider { +pub trait IntoAiProvider { /// The credentials type required by this provider. type Credentials: Send; /// The output type (provider instance). diff --git a/crates/nvisy-runtime/src/provider/backend/mysql.rs b/crates/nvisy-runtime/src/provider/backend/mysql.rs deleted file mode 100644 index c2686f5..0000000 --- a/crates/nvisy-runtime/src/provider/backend/mysql.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! MySQL provider. - -use nvisy_dal::provider::{MysqlConfig, MysqlProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// MySQL credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MysqlCredentials { - /// Connection string (e.g., "mysql://user:pass@host:3306/db"). - pub connection_string: String, -} - -/// MySQL parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MysqlParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Table name. - pub table: String, - /// Database name. - #[serde(skip_serializing_if = "Option::is_none")] - pub database: Option, -} - -#[async_trait::async_trait] -impl IntoProvider for MysqlParams { - type Credentials = MysqlCredentials; - type Output = MysqlProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let mut config = MysqlConfig::new(credentials.connection_string).with_table(self.table); - - if let Some(database) = self.database { - config = config.with_database(database); - } - - MysqlProvider::new(&config).map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/openai.rs b/crates/nvisy-runtime/src/provider/backend/openai.rs index 36dc484..f6c4be3 100644 --- a/crates/nvisy-runtime/src/provider/backend/openai.rs +++ b/crates/nvisy-runtime/src/provider/backend/openai.rs @@ -1,12 +1,13 @@ //! OpenAI provider. +use nvisy_core::IntoProvider; use nvisy_rig::provider::{ CompletionProvider, EmbeddingProvider, OpenAiCompletionModel, OpenAiEmbeddingModel, }; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::IntoProvider; +use super::IntoAiProvider; use crate::error::{Error, Result}; /// OpenAI credentials. @@ -36,7 +37,7 @@ impl OpenAiCompletionParams { } #[async_trait::async_trait] -impl IntoProvider for OpenAiCompletionParams { +impl IntoAiProvider for OpenAiCompletionParams { type Credentials = OpenAiCredentials; type Output = CompletionProvider; @@ -45,7 +46,9 @@ impl IntoProvider for OpenAiCompletionParams { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::OpenAi(self.model); - CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) + CompletionProvider::create(model, rig_creds) + .await + .map_err(|e| Error::Internal(e.to_string())) } } @@ -69,7 +72,7 @@ impl OpenAiEmbeddingParams { } #[async_trait::async_trait] -impl IntoProvider for OpenAiEmbeddingParams { +impl IntoAiProvider for OpenAiEmbeddingParams { type Credentials = OpenAiCredentials; type Output = EmbeddingProvider; @@ -78,6 +81,8 @@ impl IntoProvider for OpenAiEmbeddingParams { api_key: credentials.api_key, }; let model = nvisy_rig::provider::EmbeddingModel::OpenAi(self.model); - EmbeddingProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) + EmbeddingProvider::create(model, rig_creds) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/perplexity.rs b/crates/nvisy-runtime/src/provider/backend/perplexity.rs index 03106a1..ec06e4e 100644 --- a/crates/nvisy-runtime/src/provider/backend/perplexity.rs +++ b/crates/nvisy-runtime/src/provider/backend/perplexity.rs @@ -1,10 +1,11 @@ //! Perplexity provider. +use nvisy_core::IntoProvider; use nvisy_rig::provider::{CompletionProvider, PerplexityModel}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use super::IntoProvider; +use super::IntoAiProvider; use crate::error::{Error, Result}; /// Perplexity credentials. @@ -34,7 +35,7 @@ impl PerplexityCompletionParams { } #[async_trait::async_trait] -impl IntoProvider for PerplexityCompletionParams { +impl IntoAiProvider for PerplexityCompletionParams { type Credentials = PerplexityCredentials; type Output = CompletionProvider; @@ -43,6 +44,8 @@ impl IntoProvider for PerplexityCompletionParams { api_key: credentials.api_key, }; let model = nvisy_rig::provider::CompletionModel::Perplexity(self.model); - CompletionProvider::new(&rig_creds, &model).map_err(|e| Error::Internal(e.to_string())) + CompletionProvider::create(model, rig_creds) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/pgvector.rs b/crates/nvisy-runtime/src/provider/backend/pgvector.rs deleted file mode 100644 index 8bce688..0000000 --- a/crates/nvisy-runtime/src/provider/backend/pgvector.rs +++ /dev/null @@ -1,40 +0,0 @@ -//! pgvector (PostgreSQL extension) provider. - -use nvisy_dal::provider::{PgVectorConfig, PgVectorProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// pgvector credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PgVectorCredentials { - /// PostgreSQL connection URL. - pub connection_url: String, -} - -/// pgvector parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PgVectorParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Table name. - pub table: String, - /// Vector dimensions. - pub dimensions: usize, -} - -#[async_trait::async_trait] -impl IntoProvider for PgVectorParams { - type Credentials = PgVectorCredentials; - type Output = PgVectorProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let config = - PgVectorConfig::new(credentials.connection_url, self.dimensions).with_table(self.table); - PgVectorProvider::new(&config) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/pinecone.rs b/crates/nvisy-runtime/src/provider/backend/pinecone.rs deleted file mode 100644 index f9ab4dd..0000000 --- a/crates/nvisy-runtime/src/provider/backend/pinecone.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Pinecone vector database provider. - -use nvisy_dal::provider::{PineconeConfig, PineconeProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// Pinecone credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PineconeCredentials { - /// Pinecone API key. - pub api_key: String, - /// Environment (e.g., "us-east-1-aws"). - pub environment: String, -} - -/// Pinecone parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PineconeParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Index name. - pub index: String, - /// Namespace. - #[serde(skip_serializing_if = "Option::is_none")] - pub namespace: Option, - /// Vector dimensions. - #[serde(skip_serializing_if = "Option::is_none")] - pub dimensions: Option, -} - -#[async_trait::async_trait] -impl IntoProvider for PineconeParams { - type Credentials = PineconeCredentials; - type Output = PineconeProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let mut config = - PineconeConfig::new(credentials.api_key, credentials.environment, self.index); - - if let Some(namespace) = self.namespace { - config = config.with_namespace(namespace); - } - if let Some(dimensions) = self.dimensions { - config = config.with_dimensions(dimensions); - } - - PineconeProvider::new(&config) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/postgres.rs b/crates/nvisy-runtime/src/provider/backend/postgres.rs deleted file mode 100644 index 22f6290..0000000 --- a/crates/nvisy-runtime/src/provider/backend/postgres.rs +++ /dev/null @@ -1,43 +0,0 @@ -//! PostgreSQL provider. - -use nvisy_dal::provider::{PostgresConfig, PostgresProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// PostgreSQL credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PostgresCredentials { - /// Connection string (e.g., "postgresql://user:pass@host:5432/db"). - pub connection_string: String, -} - -/// PostgreSQL parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PostgresParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Table name. - pub table: String, - /// Schema name. - #[serde(skip_serializing_if = "Option::is_none")] - pub schema: Option, -} - -#[async_trait::async_trait] -impl IntoProvider for PostgresParams { - type Credentials = PostgresCredentials; - type Output = PostgresProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let mut config = PostgresConfig::new(credentials.connection_string).with_table(self.table); - - if let Some(schema) = self.schema { - config = config.with_schema(schema); - } - - PostgresProvider::new(&config).map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/qdrant.rs b/crates/nvisy-runtime/src/provider/backend/qdrant.rs deleted file mode 100644 index 7e83784..0000000 --- a/crates/nvisy-runtime/src/provider/backend/qdrant.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Qdrant vector database provider. - -use nvisy_dal::provider::{QdrantConfig, QdrantProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// Qdrant credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct QdrantCredentials { - /// Qdrant server URL. - pub url: String, - /// API key for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub api_key: Option, -} - -/// Qdrant parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct QdrantParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Collection name. - pub collection: String, - /// Vector dimensions. - #[serde(skip_serializing_if = "Option::is_none")] - pub dimensions: Option, -} - -#[async_trait::async_trait] -impl IntoProvider for QdrantParams { - type Credentials = QdrantCredentials; - type Output = QdrantProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let mut config = QdrantConfig::new(credentials.url).with_collection(self.collection); - - if let Some(api_key) = credentials.api_key { - config = config.with_api_key(api_key); - } - if let Some(dimensions) = self.dimensions { - config = config.with_dimensions(dimensions); - } - - QdrantProvider::new(&config) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/s3.rs b/crates/nvisy-runtime/src/provider/backend/s3.rs deleted file mode 100644 index 765d880..0000000 --- a/crates/nvisy-runtime/src/provider/backend/s3.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Amazon S3 provider. - -use nvisy_dal::provider::{S3Config, S3Provider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoProvider; -use crate::error::{Error, Result}; - -/// Amazon S3 credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct S3Credentials { - /// AWS region. - pub region: String, - /// Access key ID. - pub access_key_id: String, - /// Secret access key. - pub secret_access_key: String, - /// Custom endpoint URL (for S3-compatible storage like MinIO, R2). - #[serde(skip_serializing_if = "Option::is_none")] - pub endpoint: Option, -} - -/// Amazon S3 parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct S3Params { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Bucket name. - pub bucket: String, - /// Path prefix within the bucket. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} - -#[async_trait::async_trait] -impl IntoProvider for S3Params { - type Credentials = S3Credentials; - type Output = S3Provider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let mut config = S3Config::new(self.bucket, credentials.region) - .with_credentials(credentials.access_key_id, credentials.secret_access_key); - - if let Some(endpoint) = credentials.endpoint { - config = config.with_endpoint(endpoint); - } - if let Some(prefix) = self.prefix { - config = config.with_prefix(prefix); - } - - S3Provider::new(&config).map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/inputs.rs b/crates/nvisy-runtime/src/provider/inputs.rs index d9c7839..d41c229 100644 --- a/crates/nvisy-runtime/src/provider/inputs.rs +++ b/crates/nvisy-runtime/src/provider/inputs.rs @@ -1,19 +1,53 @@ //! Input provider types and implementations. use derive_more::From; +use nvisy_dal::core::IntoProvider as DalIntoProvider; use nvisy_dal::provider::{ - AzblobProvider, GcsProvider, MysqlProvider, PostgresProvider, S3Provider, + AzblobParams, AzblobProvider, GcsParams, GcsProvider, MysqlParams, MysqlProvider, + PostgresParams, PostgresProvider, S3Params, S3Provider, }; use nvisy_dal::{AnyDataValue, DataTypeId, ObjectContext, RelationalContext}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use super::ProviderCredentials; -use super::backend::{ - AzblobParams, GcsParams, IntoProvider, MysqlParams, PostgresParams, S3Params, -}; use crate::error::{Error, Result}; +/// Input provider configuration (credentials reference + params). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct InputProviderConfig { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Provider-specific parameters. + #[serde(flatten)] + pub params: InputProviderParams, +} + +impl InputProviderConfig { + /// Creates a new input provider configuration. + pub fn new(credentials_id: Uuid, params: InputProviderParams) -> Self { + Self { + credentials_id, + params, + } + } + + /// Returns the provider kind as a string. + pub const fn kind(&self) -> &'static str { + self.params.kind() + } + + /// Returns the output data type for this provider. + pub const fn output_type(&self) -> DataTypeId { + self.params.output_type() + } + + /// Creates an input provider from this configuration and credentials. + pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { + self.params.into_provider(credentials).await + } +} + /// Input provider parameters (storage backends only, no vector DBs). #[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] @@ -31,17 +65,6 @@ pub enum InputProviderParams { } impl InputProviderParams { - /// Returns the credentials ID for this provider. - pub fn credentials_id(&self) -> Uuid { - match self { - Self::S3(p) => p.credentials_id, - Self::Gcs(p) => p.credentials_id, - Self::Azblob(p) => p.credentials_id, - Self::Postgres(p) => p.credentials_id, - Self::Mysql(p) => p.credentials_id, - } - } - /// Returns the provider kind as a string. pub const fn kind(&self) -> &'static str { match self { @@ -60,30 +83,35 @@ impl InputProviderParams { Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, } } -} - -#[async_trait::async_trait] -impl IntoProvider for InputProviderParams { - type Credentials = ProviderCredentials; - type Output = InputProvider; - async fn into_provider(self, credentials: Self::Credentials) -> Result { + /// Creates an input provider from these params and credentials. + pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { match (self, credentials) { - (Self::S3(p), ProviderCredentials::S3(c)) => { - Ok(InputProvider::S3(p.into_provider(c).await?)) - } - (Self::Gcs(p), ProviderCredentials::Gcs(c)) => { - Ok(InputProvider::Gcs(p.into_provider(c).await?)) - } - (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { - Ok(InputProvider::Azblob(p.into_provider(c).await?)) - } - (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { - Ok(InputProvider::Postgres(p.into_provider(c).await?)) - } - (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { - Ok(InputProvider::Mysql(p.into_provider(c).await?)) - } + (Self::S3(p), ProviderCredentials::S3(c)) => Ok(InputProvider::S3( + S3Provider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Gcs(p), ProviderCredentials::Gcs(c)) => Ok(InputProvider::Gcs( + GcsProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Azblob(p), ProviderCredentials::Azblob(c)) => Ok(InputProvider::Azblob( + AzblobProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Postgres(p), ProviderCredentials::Postgres(c)) => Ok(InputProvider::Postgres( + PostgresProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Mysql(p), ProviderCredentials::Mysql(c)) => Ok(InputProvider::Mysql( + MysqlProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), (params, creds) => Err(Error::Internal(format!( "credentials type mismatch: expected '{}', got '{}'", params.kind(), diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs index 103c166..f3935d8 100644 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -3,13 +3,14 @@ //! This module separates provider configuration into: //! - [`ProviderCredentials`]: Sensitive credentials (stored per workspace) //! - [`AiCredentials`]: AI provider credentials (stored per workspace) +//! - [`InputProviderConfig`] / [`OutputProviderConfig`]: Config with credentials reference + params //! - [`InputProviderParams`] / [`OutputProviderParams`]: Non-sensitive parameters (part of node definition) //! - [`CompletionProviderParams`] / [`EmbeddingProviderParams`]: AI provider parameters //! - [`CredentialsRegistry`]: In-memory registry for credentials lookup //! //! # Module Structure //! -//! - [`backend`]: Individual provider implementations (credentials + params) +//! - [`backend`]: Re-exports from nvisy_dal + local AI provider implementations mod ai; pub mod backend; @@ -19,7 +20,6 @@ mod registry; pub mod runtime; pub use ai::{AiCredentials, CompletionProviderParams, EmbeddingProviderParams}; -pub use backend::IntoProvider; use backend::{ AnthropicCredentials, AzblobCredentials, CohereCredentials, GcsCredentials, GeminiCredentials, MilvusCredentials, MysqlCredentials, OpenAiCredentials, PerplexityCredentials, @@ -27,8 +27,8 @@ use backend::{ S3Credentials, }; use derive_more::From; -pub use inputs::{InputProvider, InputProviderParams}; -pub use outputs::{OutputProvider, OutputProviderParams}; +pub use inputs::{InputProvider, InputProviderConfig, InputProviderParams}; +pub use outputs::{OutputProvider, OutputProviderConfig, OutputProviderParams}; pub use registry::CredentialsRegistry; use serde::{Deserialize, Serialize}; use strum::IntoStaticStr; diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs index 9d724c6..ea5f9cc 100644 --- a/crates/nvisy-runtime/src/provider/outputs.rs +++ b/crates/nvisy-runtime/src/provider/outputs.rs @@ -6,9 +6,11 @@ use std::task::{Context as TaskContext, Poll}; use derive_more::From; use futures::Sink; +use nvisy_dal::core::IntoProvider as DalIntoProvider; use nvisy_dal::provider::{ - AzblobProvider, GcsProvider, MilvusProvider, MysqlProvider, PgVectorProvider, PineconeProvider, - PostgresProvider, QdrantProvider, S3Provider, + AzblobParams, AzblobProvider, GcsParams, GcsProvider, MilvusParams, MilvusProvider, + MysqlParams, MysqlProvider, PgVectorParams, PgVectorProvider, PineconeParams, PineconeProvider, + PostgresParams, PostgresProvider, QdrantParams, QdrantProvider, S3Params, S3Provider, }; use nvisy_dal::{AnyDataValue, DataTypeId}; use serde::{Deserialize, Serialize}; @@ -16,13 +18,44 @@ use tokio::sync::Mutex; use uuid::Uuid; use super::ProviderCredentials; -use super::backend::{ - AzblobParams, GcsParams, IntoProvider, MilvusParams, MysqlParams, PgVectorParams, - PineconeParams, PostgresParams, QdrantParams, S3Params, -}; use crate::error::{Error, Result}; use crate::graph::DataSink; +/// Output provider configuration (credentials reference + params). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct OutputProviderConfig { + /// Reference to stored credentials. + pub credentials_id: Uuid, + /// Provider-specific parameters. + #[serde(flatten)] + pub params: OutputProviderParams, +} + +impl OutputProviderConfig { + /// Creates a new output provider configuration. + pub fn new(credentials_id: Uuid, params: OutputProviderParams) -> Self { + Self { + credentials_id, + params, + } + } + + /// Returns the provider kind as a string. + pub const fn kind(&self) -> &'static str { + self.params.kind() + } + + /// Returns the output data type for this provider. + pub const fn output_type(&self) -> DataTypeId { + self.params.output_type() + } + + /// Creates an output provider from this configuration and credentials. + pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { + self.params.into_provider(credentials).await + } +} + /// Output provider parameters (storage backends + vector DBs). #[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] @@ -48,21 +81,6 @@ pub enum OutputProviderParams { } impl OutputProviderParams { - /// Returns the credentials ID for this provider. - pub fn credentials_id(&self) -> Uuid { - match self { - Self::S3(p) => p.credentials_id, - Self::Gcs(p) => p.credentials_id, - Self::Azblob(p) => p.credentials_id, - Self::Postgres(p) => p.credentials_id, - Self::Mysql(p) => p.credentials_id, - Self::Qdrant(p) => p.credentials_id, - Self::Pinecone(p) => p.credentials_id, - Self::Milvus(p) => p.credentials_id, - Self::PgVector(p) => p.credentials_id, - } - } - /// Returns the provider kind as a string. pub const fn kind(&self) -> &'static str { match self { @@ -88,42 +106,55 @@ impl OutputProviderParams { } } } -} -#[async_trait::async_trait] -impl IntoProvider for OutputProviderParams { - type Credentials = ProviderCredentials; - type Output = OutputProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { + /// Creates an output provider from these params and credentials. + pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { match (self, credentials) { - (Self::S3(p), ProviderCredentials::S3(c)) => { - Ok(OutputProvider::S3(p.into_provider(c).await?)) - } - (Self::Gcs(p), ProviderCredentials::Gcs(c)) => { - Ok(OutputProvider::Gcs(p.into_provider(c).await?)) - } - (Self::Azblob(p), ProviderCredentials::Azblob(c)) => { - Ok(OutputProvider::Azblob(p.into_provider(c).await?)) - } - (Self::Postgres(p), ProviderCredentials::Postgres(c)) => { - Ok(OutputProvider::Postgres(p.into_provider(c).await?)) - } - (Self::Mysql(p), ProviderCredentials::Mysql(c)) => { - Ok(OutputProvider::Mysql(p.into_provider(c).await?)) - } - (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => { - Ok(OutputProvider::Qdrant(p.into_provider(c).await?)) - } - (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => { - Ok(OutputProvider::Pinecone(p.into_provider(c).await?)) - } - (Self::Milvus(p), ProviderCredentials::Milvus(c)) => { - Ok(OutputProvider::Milvus(p.into_provider(c).await?)) - } - (Self::PgVector(p), ProviderCredentials::PgVector(c)) => { - Ok(OutputProvider::PgVector(p.into_provider(c).await?)) - } + (Self::S3(p), ProviderCredentials::S3(c)) => Ok(OutputProvider::S3( + S3Provider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Gcs(p), ProviderCredentials::Gcs(c)) => Ok(OutputProvider::Gcs( + GcsProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Azblob(p), ProviderCredentials::Azblob(c)) => Ok(OutputProvider::Azblob( + AzblobProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Postgres(p), ProviderCredentials::Postgres(c)) => Ok(OutputProvider::Postgres( + PostgresProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Mysql(p), ProviderCredentials::Mysql(c)) => Ok(OutputProvider::Mysql( + MysqlProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => Ok(OutputProvider::Qdrant( + QdrantProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => Ok(OutputProvider::Pinecone( + PineconeProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::Milvus(p), ProviderCredentials::Milvus(c)) => Ok(OutputProvider::Milvus( + MilvusProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), + (Self::PgVector(p), ProviderCredentials::PgVector(c)) => Ok(OutputProvider::PgVector( + PgVectorProvider::create(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + )), (params, creds) => Err(Error::Internal(format!( "credentials type mismatch: expected '{}', got '{}'", params.kind(), diff --git a/crates/nvisy-server/src/handler/response/pipelines.rs b/crates/nvisy-server/src/handler/response/pipelines.rs index 86a460d..c60bd5a 100644 --- a/crates/nvisy-server/src/handler/response/pipelines.rs +++ b/crates/nvisy-server/src/handler/response/pipelines.rs @@ -39,8 +39,7 @@ pub struct Pipeline { impl Pipeline { /// Creates a new instance of [`Pipeline`] from the database model. pub fn from_model(pipeline: model::Pipeline) -> Self { - let definition: Workflow = - serde_json::from_value(pipeline.definition).unwrap_or_default(); + let definition: Workflow = serde_json::from_value(pipeline.definition).unwrap_or_default(); Self { pipeline_id: pipeline.id, workspace_id: pipeline.workspace_id, From e69b297716b4b91068f9c7a35373ac2b1133a4f2 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Fri, 23 Jan 2026 10:19:46 +0100 Subject: [PATCH 22/28] refactor(rig): add shared credential types and remove runtime backend module - Add ApiKeyCredentials and OllamaCredentials shared types in nvisy-rig - Update CompletionCredentials and EmbeddingCredentials to use shared types - Remove IntoAiProvider trait and backend module from nvisy-runtime - Add strum derives for kind() method on nvisy_core::Error - Implement IntoProvider trait in nvisy-core for provider abstraction - Add From and From for nvisy_core::Error --- Cargo.lock | 1 + crates/nvisy-core/Cargo.toml | 3 + crates/nvisy-core/src/error.rs | 79 +------ crates/nvisy-core/src/lib.rs | 2 +- crates/nvisy-core/src/provider.rs | 4 +- crates/nvisy-dal/src/core/mod.rs | 2 +- crates/nvisy-dal/src/lib.rs | 2 +- crates/nvisy-dal/src/provider/azblob/mod.rs | 6 +- crates/nvisy-dal/src/provider/gcs/mod.rs | 6 +- crates/nvisy-dal/src/provider/milvus/mod.rs | 6 +- crates/nvisy-dal/src/provider/mysql/mod.rs | 6 +- crates/nvisy-dal/src/provider/pgvector/mod.rs | 6 +- crates/nvisy-dal/src/provider/pinecone/mod.rs | 6 +- crates/nvisy-dal/src/provider/postgres/mod.rs | 6 +- crates/nvisy-dal/src/provider/qdrant/mod.rs | 6 +- crates/nvisy-dal/src/provider/s3/mod.rs | 6 +- .../src/provider/completion/credentials.rs | 25 ++- .../src/provider/completion/provider.rs | 30 +-- crates/nvisy-rig/src/provider/credentials.rs | 17 ++ .../src/provider/embedding/credentials.rs | 21 +- .../src/provider/embedding/provider.rs | 22 +- crates/nvisy-rig/src/provider/mod.rs | 2 + crates/nvisy-runtime/src/engine/compiler.rs | 14 +- crates/nvisy-runtime/src/provider/ai.rs | 203 +++++++++--------- .../src/provider/backend/anthropic.rs | 51 ----- .../src/provider/backend/cohere.rs | 88 -------- .../src/provider/backend/gemini.rs | 88 -------- .../nvisy-runtime/src/provider/backend/mod.rs | 87 -------- .../src/provider/backend/openai.rs | 88 -------- .../src/provider/backend/perplexity.rs | 51 ----- crates/nvisy-runtime/src/provider/inputs.rs | 28 ++- crates/nvisy-runtime/src/provider/mod.rs | 67 +++--- crates/nvisy-runtime/src/provider/outputs.rs | 40 ++-- 33 files changed, 301 insertions(+), 768 deletions(-) create mode 100644 crates/nvisy-rig/src/provider/credentials.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/anthropic.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/cohere.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/gemini.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/mod.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/openai.rs delete mode 100644 crates/nvisy-runtime/src/provider/backend/perplexity.rs diff --git a/Cargo.lock b/Cargo.lock index 71002d9..dbbf9a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3239,6 +3239,7 @@ dependencies = [ "schemars 0.9.0", "serde", "serde_json", + "strum 0.27.2", "thiserror 2.0.18", "tracing", ] diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml index 3ca4546..2a5272c 100644 --- a/crates/nvisy-core/Cargo.toml +++ b/crates/nvisy-core/Cargo.toml @@ -30,6 +30,9 @@ async-trait = { workspace = true } # Error handling thiserror = { workspace = true } +# Derive macros +strum = { workspace = true } + # Observability tracing = { workspace = true } diff --git a/crates/nvisy-core/src/error.rs b/crates/nvisy-core/src/error.rs index c8df0cc..805dfb2 100644 --- a/crates/nvisy-core/src/error.rs +++ b/crates/nvisy-core/src/error.rs @@ -1,7 +1,6 @@ //! Common error type definitions. -use std::time::Duration; - +use strum::{AsRefStr, IntoStaticStr}; use thiserror::Error; /// Type alias for boxed dynamic errors that can be sent across threads. @@ -15,7 +14,8 @@ pub type BoxedError = Box; pub type Result = std::result::Result; /// Categories of errors that can occur in nvisy-core operations. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, AsRefStr, IntoStaticStr)] +#[strum(serialize_all = "snake_case")] pub enum ErrorKind { /// Input validation failed. InvalidInput, @@ -145,74 +145,13 @@ impl Error { Self::new(ErrorKind::Unknown) } - /// Returns true if this is a client error (4xx equivalent). - pub fn is_client_error(&self) -> bool { - matches!( - self.kind, - ErrorKind::InvalidInput - | ErrorKind::Authentication - | ErrorKind::Authorization - | ErrorKind::NotFound - | ErrorKind::RateLimited - ) - } - - /// Returns true if this is a server error (5xx equivalent). - pub fn is_server_error(&self) -> bool { - matches!( - self.kind, - ErrorKind::ServiceUnavailable - | ErrorKind::InternalError - | ErrorKind::ExternalError - | ErrorKind::Configuration - | ErrorKind::Timeout - | ErrorKind::Serialization - | ErrorKind::Unknown - ) - } - - /// Returns true if this error is potentially retryable. - pub fn is_retryable(&self) -> bool { - matches!( - self.kind, - ErrorKind::NetworkError - | ErrorKind::RateLimited - | ErrorKind::ServiceUnavailable - | ErrorKind::Timeout - ) - } - - /// Returns the recommended retry delay for this error. - pub fn retry_delay(&self) -> Option { - match self.kind { - ErrorKind::RateLimited => Some(Duration::from_secs(60)), - ErrorKind::ServiceUnavailable => Some(Duration::from_secs(30)), - ErrorKind::NetworkError => Some(Duration::from_secs(5)), - ErrorKind::Timeout => Some(Duration::from_secs(10)), - _ => None, - } - } - - /// Returns true if this is an authentication error. - pub fn is_auth_error(&self) -> bool { - matches!( - self.kind, - ErrorKind::Authentication | ErrorKind::Authorization - ) - } - - /// Returns true if this is a rate limiting error. - pub fn is_rate_limit_error(&self) -> bool { - matches!(self.kind, ErrorKind::RateLimited) - } - - /// Returns true if this is a timeout error. - pub fn is_timeout_error(&self) -> bool { - matches!(self.kind, ErrorKind::Timeout) + /// Returns the error kind. + pub fn kind(&self) -> ErrorKind { + self.kind } - /// Returns true if this is a network error. - pub fn is_network_error(&self) -> bool { - matches!(self.kind, ErrorKind::NetworkError) + /// Returns the error kind as a string. + pub fn kind_str(&self) -> &'static str { + self.kind.into() } } diff --git a/crates/nvisy-core/src/lib.rs b/crates/nvisy-core/src/lib.rs index 6558bc5..ca397d5 100644 --- a/crates/nvisy-core/src/lib.rs +++ b/crates/nvisy-core/src/lib.rs @@ -7,4 +7,4 @@ mod provider; pub mod types; pub use error::{BoxedError, Error, ErrorKind, Result}; -pub use provider::IntoProvider; +pub use provider::Provider; diff --git a/crates/nvisy-core/src/provider.rs b/crates/nvisy-core/src/provider.rs index 6a7e4ce..a700896 100644 --- a/crates/nvisy-core/src/provider.rs +++ b/crates/nvisy-core/src/provider.rs @@ -27,14 +27,14 @@ use crate::Result; /// } /// ``` #[async_trait::async_trait] -pub trait IntoProvider: Send { +pub trait Provider: Send { /// Non-sensitive parameters (bucket, prefix, table, model, etc.). type Params: Send; /// Sensitive credentials (API keys, secrets, etc.). type Credentials: Send; /// Creates a new provider from parameters and credentials. - async fn create(params: Self::Params, credentials: Self::Credentials) -> Result + async fn connect(params: Self::Params, credentials: Self::Credentials) -> Result where Self: Sized; } diff --git a/crates/nvisy-dal/src/core/mod.rs b/crates/nvisy-dal/src/core/mod.rs index 2dccc7c..3703504 100644 --- a/crates/nvisy-dal/src/core/mod.rs +++ b/crates/nvisy-dal/src/core/mod.rs @@ -8,7 +8,7 @@ mod vector_context; pub use input_stream::{InputStream, ItemStream}; // Re-export IntoProvider from nvisy-core -pub use nvisy_core::IntoProvider; +pub use nvisy_core::Provider; pub use object_context::ObjectContext; pub use output_stream::{ItemSink, OutputStream}; pub use relational_context::RelationalContext; diff --git a/crates/nvisy-dal/src/lib.rs b/crates/nvisy-dal/src/lib.rs index 7ccbf9b..3905303 100644 --- a/crates/nvisy-dal/src/lib.rs +++ b/crates/nvisy-dal/src/lib.rs @@ -13,7 +13,7 @@ pub mod provider; mod error; pub use core::{ - DataInput, DataOutput, InputStream, IntoProvider, ItemSink, ItemStream, ObjectContext, + DataInput, DataOutput, InputStream, Provider, ItemSink, ItemStream, ObjectContext, OutputStream, RelationalContext, VectorContext, }; diff --git a/crates/nvisy-dal/src/provider/azblob/mod.rs b/crates/nvisy-dal/src/provider/azblob/mod.rs index 2646b60..2c8d215 100644 --- a/crates/nvisy-dal/src/provider/azblob/mod.rs +++ b/crates/nvisy-dal/src/provider/azblob/mod.rs @@ -7,7 +7,7 @@ mod output; pub use config::{AzblobCredentials, AzblobParams}; use opendal::{Operator, services}; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::Error; /// Azure Blob Storage provider for blob storage. @@ -17,11 +17,11 @@ pub struct AzblobProvider { } #[async_trait::async_trait] -impl IntoProvider for AzblobProvider { +impl Provider for AzblobProvider { type Credentials = AzblobCredentials; type Params = AzblobParams; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-dal/src/provider/gcs/mod.rs b/crates/nvisy-dal/src/provider/gcs/mod.rs index 742480e..04f1ac4 100644 --- a/crates/nvisy-dal/src/provider/gcs/mod.rs +++ b/crates/nvisy-dal/src/provider/gcs/mod.rs @@ -7,7 +7,7 @@ mod output; pub use config::{GcsCredentials, GcsParams}; use opendal::{Operator, services}; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::Error; /// Google Cloud Storage provider for blob storage. @@ -17,11 +17,11 @@ pub struct GcsProvider { } #[async_trait::async_trait] -impl IntoProvider for GcsProvider { +impl Provider for GcsProvider { type Credentials = GcsCredentials; type Params = GcsParams; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-dal/src/provider/milvus/mod.rs b/crates/nvisy-dal/src/provider/milvus/mod.rs index 67c3dd8..e9a7fa6 100644 --- a/crates/nvisy-dal/src/provider/milvus/mod.rs +++ b/crates/nvisy-dal/src/provider/milvus/mod.rs @@ -13,7 +13,7 @@ use milvus::index::{IndexParams, IndexType, MetricType}; use milvus::schema::{CollectionSchemaBuilder, FieldSchema}; use milvus::value::Value; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::{Error, Result}; /// Milvus provider for vector storage. @@ -23,11 +23,11 @@ pub struct MilvusProvider { } #[async_trait::async_trait] -impl IntoProvider for MilvusProvider { +impl Provider for MilvusProvider { type Credentials = MilvusCredentials; type Params = MilvusParams; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-dal/src/provider/mysql/mod.rs b/crates/nvisy-dal/src/provider/mysql/mod.rs index 8a3ea77..e5cfa8f 100644 --- a/crates/nvisy-dal/src/provider/mysql/mod.rs +++ b/crates/nvisy-dal/src/provider/mysql/mod.rs @@ -7,7 +7,7 @@ mod output; pub use config::{MysqlCredentials, MysqlParams}; use opendal::{Operator, services}; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::Error; /// MySQL provider for relational data. @@ -17,11 +17,11 @@ pub struct MysqlProvider { } #[async_trait::async_trait] -impl IntoProvider for MysqlProvider { +impl Provider for MysqlProvider { type Credentials = MysqlCredentials; type Params = MysqlParams; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-dal/src/provider/pgvector/mod.rs b/crates/nvisy-dal/src/provider/pgvector/mod.rs index 389a927..28ced98 100644 --- a/crates/nvisy-dal/src/provider/pgvector/mod.rs +++ b/crates/nvisy-dal/src/provider/pgvector/mod.rs @@ -12,7 +12,7 @@ use diesel_async::pooled_connection::AsyncDieselConnectionManager; use diesel_async::pooled_connection::deadpool::Pool; use diesel_async::{AsyncPgConnection, RunQueryDsl}; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::{Error, Result}; /// pgvector provider for vector storage using PostgreSQL. @@ -22,11 +22,11 @@ pub struct PgVectorProvider { } #[async_trait::async_trait] -impl IntoProvider for PgVectorProvider { +impl Provider for PgVectorProvider { type Credentials = PgVectorCredentials; type Params = PgVectorParams; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-dal/src/provider/pinecone/mod.rs b/crates/nvisy-dal/src/provider/pinecone/mod.rs index 4963838..010c648 100644 --- a/crates/nvisy-dal/src/provider/pinecone/mod.rs +++ b/crates/nvisy-dal/src/provider/pinecone/mod.rs @@ -11,7 +11,7 @@ use pinecone_sdk::pinecone::PineconeClientConfig; use pinecone_sdk::pinecone::data::Index; use tokio::sync::Mutex; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::{Error, Result}; /// Pinecone provider for vector storage. @@ -21,11 +21,11 @@ pub struct PineconeProvider { } #[async_trait::async_trait] -impl IntoProvider for PineconeProvider { +impl Provider for PineconeProvider { type Credentials = PineconeCredentials; type Params = PineconeParams; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-dal/src/provider/postgres/mod.rs b/crates/nvisy-dal/src/provider/postgres/mod.rs index 1ded3e6..afc319c 100644 --- a/crates/nvisy-dal/src/provider/postgres/mod.rs +++ b/crates/nvisy-dal/src/provider/postgres/mod.rs @@ -7,7 +7,7 @@ mod output; pub use config::{PostgresCredentials, PostgresParams}; use opendal::{Operator, services}; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::Error; /// PostgreSQL provider for relational data. @@ -17,11 +17,11 @@ pub struct PostgresProvider { } #[async_trait::async_trait] -impl IntoProvider for PostgresProvider { +impl Provider for PostgresProvider { type Credentials = PostgresCredentials; type Params = PostgresParams; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-dal/src/provider/qdrant/mod.rs b/crates/nvisy-dal/src/provider/qdrant/mod.rs index c295b87..7e8405f 100644 --- a/crates/nvisy-dal/src/provider/qdrant/mod.rs +++ b/crates/nvisy-dal/src/provider/qdrant/mod.rs @@ -15,7 +15,7 @@ use qdrant_client::qdrant::{ VectorParamsBuilder, }; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::{Error, Result}; /// Qdrant provider for vector storage. @@ -25,11 +25,11 @@ pub struct QdrantProvider { } #[async_trait::async_trait] -impl IntoProvider for QdrantProvider { +impl Provider for QdrantProvider { type Credentials = QdrantCredentials; type Params = QdrantParams; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-dal/src/provider/s3/mod.rs b/crates/nvisy-dal/src/provider/s3/mod.rs index 30557d5..0ac9eed 100644 --- a/crates/nvisy-dal/src/provider/s3/mod.rs +++ b/crates/nvisy-dal/src/provider/s3/mod.rs @@ -7,7 +7,7 @@ mod output; pub use config::{S3Credentials, S3Params}; use opendal::{Operator, services}; -use crate::core::IntoProvider; +use crate::core::Provider; use crate::error::Error; /// Amazon S3 provider for blob storage. @@ -17,11 +17,11 @@ pub struct S3Provider { } #[async_trait::async_trait] -impl IntoProvider for S3Provider { +impl Provider for S3Provider { type Credentials = S3Credentials; type Params = S3Params; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { diff --git a/crates/nvisy-rig/src/provider/completion/credentials.rs b/crates/nvisy-rig/src/provider/completion/credentials.rs index 795dea5..7b0bcbd 100644 --- a/crates/nvisy-rig/src/provider/completion/credentials.rs +++ b/crates/nvisy-rig/src/provider/completion/credentials.rs @@ -1,21 +1,32 @@ //! Completion provider credentials. use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; + +pub use super::super::credentials::{ApiKeyCredentials, OllamaCredentials}; /// Credentials for completion providers. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, IntoStaticStr)] #[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] pub enum CompletionCredentials { /// OpenAI credentials. - OpenAi { api_key: String }, + OpenAi(ApiKeyCredentials), /// Anthropic credentials. - Anthropic { api_key: String }, + Anthropic(ApiKeyCredentials), /// Cohere credentials. - Cohere { api_key: String }, + Cohere(ApiKeyCredentials), /// Google Gemini credentials. - Gemini { api_key: String }, + Gemini(ApiKeyCredentials), /// Perplexity credentials. - Perplexity { api_key: String }, + Perplexity(ApiKeyCredentials), /// Ollama credentials (local, no API key required). - Ollama { base_url: String }, + Ollama(OllamaCredentials), +} + +impl CompletionCredentials { + /// Returns the provider kind as a string. + pub fn kind(&self) -> &'static str { + self.into() + } } diff --git a/crates/nvisy-rig/src/provider/completion/provider.rs b/crates/nvisy-rig/src/provider/completion/provider.rs index e1b3d74..946e25d 100644 --- a/crates/nvisy-rig/src/provider/completion/provider.rs +++ b/crates/nvisy-rig/src/provider/completion/provider.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use nvisy_core::IntoProvider; +use nvisy_core::Provider; #[cfg(feature = "ollama")] use rig::client::Nothing; use rig::completion::{AssistantContent, CompletionError, CompletionModel as RigCompletionModel}; @@ -52,17 +52,17 @@ pub(crate) enum CompletionService { } #[async_trait::async_trait] -impl IntoProvider for CompletionProvider { +impl Provider for CompletionProvider { type Credentials = CompletionCredentials; type Params = CompletionModel; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { let inner = match (credentials, params) { - (CompletionCredentials::OpenAi { api_key }, CompletionModel::OpenAi(m)) => { - let client = openai::Client::new(&api_key) + (CompletionCredentials::OpenAi(c), CompletionModel::OpenAi(m)) => { + let client = openai::Client::new(&c.api_key) .map_err(|e| Error::provider("openai", e.to_string()))? .completions_api(); CompletionService::OpenAi { @@ -70,32 +70,32 @@ impl IntoProvider for CompletionProvider { model_name: m.as_ref().to_string(), } } - (CompletionCredentials::Anthropic { api_key }, CompletionModel::Anthropic(m)) => { - let client = anthropic::Client::new(&api_key) + (CompletionCredentials::Anthropic(c), CompletionModel::Anthropic(m)) => { + let client = anthropic::Client::new(&c.api_key) .map_err(|e| Error::provider("anthropic", e.to_string()))?; CompletionService::Anthropic { model: client.completion_model(m.as_ref()), model_name: m.as_ref().to_string(), } } - (CompletionCredentials::Cohere { api_key }, CompletionModel::Cohere(m)) => { - let client = cohere::Client::new(&api_key) + (CompletionCredentials::Cohere(c), CompletionModel::Cohere(m)) => { + let client = cohere::Client::new(&c.api_key) .map_err(|e| Error::provider("cohere", e.to_string()))?; CompletionService::Cohere { model: client.completion_model(m.as_ref()), model_name: m.as_ref().to_string(), } } - (CompletionCredentials::Gemini { api_key }, CompletionModel::Gemini(m)) => { - let client = gemini::Client::new(&api_key) + (CompletionCredentials::Gemini(c), CompletionModel::Gemini(m)) => { + let client = gemini::Client::new(&c.api_key) .map_err(|e| Error::provider("gemini", e.to_string()))?; CompletionService::Gemini { model: client.completion_model(m.as_ref()), model_name: m.as_ref().to_string(), } } - (CompletionCredentials::Perplexity { api_key }, CompletionModel::Perplexity(m)) => { - let client = perplexity::Client::new(&api_key) + (CompletionCredentials::Perplexity(c), CompletionModel::Perplexity(m)) => { + let client = perplexity::Client::new(&c.api_key) .map_err(|e| Error::provider("perplexity", e.to_string()))?; CompletionService::Perplexity { model: client.completion_model(m.as_ref()), @@ -103,10 +103,10 @@ impl IntoProvider for CompletionProvider { } } #[cfg(feature = "ollama")] - (CompletionCredentials::Ollama { base_url }, CompletionModel::Ollama(model_name)) => { + (CompletionCredentials::Ollama(c), CompletionModel::Ollama(model_name)) => { let client = ollama::Client::builder() .api_key(Nothing) - .base_url(&base_url) + .base_url(&c.base_url) .build() .map_err(|e| Error::provider("ollama", e.to_string()))?; CompletionService::Ollama { diff --git a/crates/nvisy-rig/src/provider/credentials.rs b/crates/nvisy-rig/src/provider/credentials.rs new file mode 100644 index 0000000..a5b0ca8 --- /dev/null +++ b/crates/nvisy-rig/src/provider/credentials.rs @@ -0,0 +1,17 @@ +//! Shared credential types for AI providers. + +use serde::{Deserialize, Serialize}; + +/// API key credentials for AI providers. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ApiKeyCredentials { + /// API key. + pub api_key: String, +} + +/// Ollama credentials (local deployment, no API key required). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OllamaCredentials { + /// Base URL for the Ollama server. + pub base_url: String, +} diff --git a/crates/nvisy-rig/src/provider/embedding/credentials.rs b/crates/nvisy-rig/src/provider/embedding/credentials.rs index dc1fd53..5c3c41a 100644 --- a/crates/nvisy-rig/src/provider/embedding/credentials.rs +++ b/crates/nvisy-rig/src/provider/embedding/credentials.rs @@ -1,18 +1,29 @@ //! Embedding provider credentials. use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; + +pub use super::super::credentials::{ApiKeyCredentials, OllamaCredentials}; /// Credentials for embedding providers. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, IntoStaticStr)] #[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] pub enum EmbeddingCredentials { /// OpenAI credentials. - OpenAi { api_key: String }, + OpenAi(ApiKeyCredentials), /// Cohere credentials. - Cohere { api_key: String }, + Cohere(ApiKeyCredentials), /// Google Gemini credentials. - Gemini { api_key: String }, + Gemini(ApiKeyCredentials), /// Ollama credentials. #[cfg(feature = "ollama")] - Ollama { base_url: String }, + Ollama(OllamaCredentials), +} + +impl EmbeddingCredentials { + /// Returns the provider kind as a string. + pub fn kind(&self) -> &'static str { + self.into() + } } diff --git a/crates/nvisy-rig/src/provider/embedding/provider.rs b/crates/nvisy-rig/src/provider/embedding/provider.rs index bb4c4d4..877bffd 100644 --- a/crates/nvisy-rig/src/provider/embedding/provider.rs +++ b/crates/nvisy-rig/src/provider/embedding/provider.rs @@ -2,7 +2,7 @@ use std::sync::Arc; -use nvisy_core::IntoProvider; +use nvisy_core::Provider; #[cfg(feature = "ollama")] use rig::client::Nothing; use rig::embeddings::{Embedding, EmbeddingModel as RigEmbeddingModel}; @@ -50,25 +50,25 @@ pub(crate) enum EmbeddingService { } #[async_trait::async_trait] -impl IntoProvider for EmbeddingProvider { +impl Provider for EmbeddingProvider { type Credentials = EmbeddingCredentials; type Params = EmbeddingModel; - async fn create( + async fn connect( params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { let inner = match (credentials, params) { - (EmbeddingCredentials::OpenAi { api_key }, EmbeddingModel::OpenAi(m)) => { - let client = openai::Client::new(&api_key) + (EmbeddingCredentials::OpenAi(c), EmbeddingModel::OpenAi(m)) => { + let client = openai::Client::new(&c.api_key) .map_err(|e| Error::provider("openai", e.to_string()))?; EmbeddingService::OpenAi { model: client.embedding_model_with_ndims(m.as_ref(), m.dimensions()), model_name: m.as_ref().to_string(), } } - (EmbeddingCredentials::Cohere { api_key }, EmbeddingModel::Cohere(m)) => { - let client = cohere::Client::new(&api_key) + (EmbeddingCredentials::Cohere(c), EmbeddingModel::Cohere(m)) => { + let client = cohere::Client::new(&c.api_key) .map_err(|e| Error::provider("cohere", e.to_string()))?; EmbeddingService::Cohere { model: client.embedding_model_with_ndims( @@ -79,8 +79,8 @@ impl IntoProvider for EmbeddingProvider { model_name: m.as_ref().to_string(), } } - (EmbeddingCredentials::Gemini { api_key }, EmbeddingModel::Gemini(m)) => { - let client = gemini::Client::new(&api_key) + (EmbeddingCredentials::Gemini(c), EmbeddingModel::Gemini(m)) => { + let client = gemini::Client::new(&c.api_key) .map_err(|e| Error::provider("gemini", e.to_string()))?; EmbeddingService::Gemini { model: client.embedding_model_with_ndims(m.as_ref(), m.dimensions()), @@ -88,10 +88,10 @@ impl IntoProvider for EmbeddingProvider { } } #[cfg(feature = "ollama")] - (EmbeddingCredentials::Ollama { base_url }, EmbeddingModel::Ollama(m)) => { + (EmbeddingCredentials::Ollama(c), EmbeddingModel::Ollama(m)) => { let client = ollama::Client::builder() .api_key(Nothing) - .base_url(&base_url) + .base_url(&c.base_url) .build() .map_err(|e| Error::provider("ollama", e.to_string()))?; EmbeddingService::Ollama { diff --git a/crates/nvisy-rig/src/provider/mod.rs b/crates/nvisy-rig/src/provider/mod.rs index a80012d..97945d8 100644 --- a/crates/nvisy-rig/src/provider/mod.rs +++ b/crates/nvisy-rig/src/provider/mod.rs @@ -1,6 +1,7 @@ //! Multi-provider management for AI inference. mod completion; +mod credentials; mod embedding; pub mod splitting; @@ -8,6 +9,7 @@ pub use completion::{ AnthropicModel, CohereCompletionModel, CompletionCredentials, CompletionModel, CompletionProvider, GeminiCompletionModel, OpenAiCompletionModel, PerplexityModel, }; +pub use credentials::{ApiKeyCredentials, OllamaCredentials}; #[cfg(feature = "ollama")] pub use embedding::OllamaEmbeddingModel; pub use embedding::{ diff --git a/crates/nvisy-runtime/src/engine/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs index 28ba07b..9d36e29 100644 --- a/crates/nvisy-runtime/src/engine/compiler.rs +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -377,8 +377,11 @@ impl<'a> WorkflowCompiler<'a> { &self, params: &EmbeddingProviderParams, ) -> Result { - let creds = self.registry.get(params.credentials_id())?; - params.clone().into_provider(creds.clone()).await + let creds = self.registry.get(params.credentials_id())?.clone(); + params + .clone() + .into_provider(creds.into_embedding_credentials()?) + .await } /// Creates agents from completion provider parameters. @@ -392,8 +395,11 @@ impl<'a> WorkflowCompiler<'a> { &self, params: &CompletionProviderParams, ) -> Result { - let creds = self.registry.get(params.credentials_id())?; - params.clone().into_provider(creds.clone()).await + let creds = self.registry.get(params.credentials_id())?.clone(); + params + .clone() + .into_provider(creds.into_completion_credentials()?) + .await } /// Builds the petgraph from compiled nodes and resolved edges. diff --git a/crates/nvisy-runtime/src/provider/ai.rs b/crates/nvisy-runtime/src/provider/ai.rs index 451062b..7b492c5 100644 --- a/crates/nvisy-runtime/src/provider/ai.rs +++ b/crates/nvisy-runtime/src/provider/ai.rs @@ -1,165 +1,156 @@ //! AI provider types and implementations. +//! +//! Re-exports types from nvisy_rig and provides wrapper enums for provider params. use derive_more::From; -use nvisy_rig::provider::{CompletionProvider, EmbeddingProvider}; +use nvisy_core::Provider; +use nvisy_rig::provider::{ + AnthropicModel, CohereCompletionModel, CohereEmbeddingModel, CompletionCredentials, + CompletionModel, CompletionProvider, EmbeddingCredentials, EmbeddingModel, EmbeddingProvider, + GeminiCompletionModel, GeminiEmbeddingModel, OpenAiCompletionModel, OpenAiEmbeddingModel, + PerplexityModel, +}; use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; use uuid::Uuid; -use super::ProviderCredentials; -use super::backend::{ - AnthropicCompletionParams, AnthropicCredentials, CohereCompletionParams, CohereCredentials, - CohereEmbeddingParams, GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams, - IntoAiProvider as _, OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams, - PerplexityCompletionParams, PerplexityCredentials, -}; use crate::error::{Error, Result}; -/// Completion provider parameters. -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +// ============================================================================= +// Completion Provider Params +// ============================================================================= + +/// Completion provider parameters with credentials reference. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize, IntoStaticStr)] #[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] pub enum CompletionProviderParams { /// OpenAI completion. - OpenAi(OpenAiCompletionParams), + OpenAi { + credentials_id: Uuid, + model: OpenAiCompletionModel, + }, /// Anthropic completion. - Anthropic(AnthropicCompletionParams), + Anthropic { + credentials_id: Uuid, + model: AnthropicModel, + }, /// Cohere completion. - Cohere(CohereCompletionParams), + Cohere { + credentials_id: Uuid, + model: CohereCompletionModel, + }, /// Google Gemini completion. - Gemini(GeminiCompletionParams), + Gemini { + credentials_id: Uuid, + model: GeminiCompletionModel, + }, /// Perplexity completion. - Perplexity(PerplexityCompletionParams), + Perplexity { + credentials_id: Uuid, + model: PerplexityModel, + }, } impl CompletionProviderParams { - /// Returns the credentials ID for this provider. + /// Returns the credentials ID. pub fn credentials_id(&self) -> Uuid { match self { - Self::OpenAi(p) => p.credentials_id, - Self::Anthropic(p) => p.credentials_id, - Self::Cohere(p) => p.credentials_id, - Self::Gemini(p) => p.credentials_id, - Self::Perplexity(p) => p.credentials_id, + Self::OpenAi { credentials_id, .. } + | Self::Anthropic { credentials_id, .. } + | Self::Cohere { credentials_id, .. } + | Self::Gemini { credentials_id, .. } + | Self::Perplexity { credentials_id, .. } => *credentials_id, } } /// Returns the provider kind as a string. - pub const fn kind(&self) -> &'static str { - match self { - Self::OpenAi(_) => "openai", - Self::Anthropic(_) => "anthropic", - Self::Cohere(_) => "cohere", - Self::Gemini(_) => "gemini", - Self::Perplexity(_) => "perplexity", - } + pub fn kind(&self) -> &'static str { + self.into() } -} -impl CompletionProviderParams { - /// Creates a completion provider from these params and credentials. + /// Creates a completion provider from params and credentials. pub async fn into_provider( self, - credentials: ProviderCredentials, + credentials: CompletionCredentials, ) -> Result { - match (self, credentials) { - (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c).await, - (Self::Anthropic(p), ProviderCredentials::Anthropic(c)) => p.into_provider(c).await, - (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c).await, - (Self::Gemini(p), ProviderCredentials::Gemini(c)) => p.into_provider(c).await, - (Self::Perplexity(p), ProviderCredentials::Perplexity(c)) => p.into_provider(c).await, - (params, creds) => Err(Error::Internal(format!( - "credentials type mismatch: expected '{}', got '{}'", - params.kind(), - creds.kind() - ))), - } + let model = match self { + Self::OpenAi { model, .. } => CompletionModel::OpenAi(model), + Self::Anthropic { model, .. } => CompletionModel::Anthropic(model), + Self::Cohere { model, .. } => CompletionModel::Cohere(model), + Self::Gemini { model, .. } => CompletionModel::Gemini(model), + Self::Perplexity { model, .. } => CompletionModel::Perplexity(model), + }; + + CompletionProvider::connect(model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) } } -/// Embedding provider parameters. -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +// ============================================================================= +// Embedding Provider Params +// ============================================================================= + +/// Embedding provider parameters with credentials reference. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize, IntoStaticStr)] #[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] pub enum EmbeddingProviderParams { /// OpenAI embedding. - OpenAi(OpenAiEmbeddingParams), + OpenAi { + credentials_id: Uuid, + model: OpenAiEmbeddingModel, + }, /// Cohere embedding. - Cohere(CohereEmbeddingParams), + Cohere { + credentials_id: Uuid, + model: CohereEmbeddingModel, + }, /// Google Gemini embedding. - Gemini(GeminiEmbeddingParams), + Gemini { + credentials_id: Uuid, + model: GeminiEmbeddingModel, + }, } impl EmbeddingProviderParams { - /// Returns the credentials ID for this provider. + /// Returns the credentials ID. pub fn credentials_id(&self) -> Uuid { match self { - Self::OpenAi(p) => p.credentials_id, - Self::Cohere(p) => p.credentials_id, - Self::Gemini(p) => p.credentials_id, + Self::OpenAi { credentials_id, .. } + | Self::Cohere { credentials_id, .. } + | Self::Gemini { credentials_id, .. } => *credentials_id, } } /// Returns the provider kind as a string. - pub const fn kind(&self) -> &'static str { - match self { - Self::OpenAi(_) => "openai", - Self::Cohere(_) => "cohere", - Self::Gemini(_) => "gemini", - } + pub fn kind(&self) -> &'static str { + self.into() } - /// Returns the embedding dimensions for this provider's model. + /// Returns the embedding dimensions for this model. pub fn dimensions(&self) -> usize { match self { - Self::OpenAi(p) => p.model.dimensions(), - Self::Cohere(p) => p.model.dimensions(), - Self::Gemini(p) => p.model.dimensions(), + Self::OpenAi { model, .. } => model.dimensions(), + Self::Cohere { model, .. } => model.dimensions(), + Self::Gemini { model, .. } => model.dimensions(), } } -} -impl EmbeddingProviderParams { - /// Creates an embedding provider from these params and credentials. + /// Creates an embedding provider from params and credentials. pub async fn into_provider( self, - credentials: ProviderCredentials, + credentials: EmbeddingCredentials, ) -> Result { - match (self, credentials) { - (Self::OpenAi(p), ProviderCredentials::OpenAi(c)) => p.into_provider(c).await, - (Self::Cohere(p), ProviderCredentials::Cohere(c)) => p.into_provider(c).await, - (Self::Gemini(p), ProviderCredentials::Gemini(c)) => p.into_provider(c).await, - (params, creds) => Err(Error::Internal(format!( - "credentials type mismatch: expected '{}', got '{}'", - params.kind(), - creds.kind() - ))), - } - } -} + let model = match self { + Self::OpenAi { model, .. } => EmbeddingModel::OpenAi(model), + Self::Cohere { model, .. } => EmbeddingModel::Cohere(model), + Self::Gemini { model, .. } => EmbeddingModel::Gemini(model), + }; -/// AI provider credentials (sensitive). -#[derive(Debug, Clone, From, Serialize, Deserialize)] -#[serde(tag = "provider", rename_all = "snake_case")] -pub enum AiCredentials { - /// OpenAI credentials. - OpenAi(OpenAiCredentials), - /// Anthropic credentials. - Anthropic(AnthropicCredentials), - /// Cohere credentials. - Cohere(CohereCredentials), - /// Gemini credentials. - Gemini(GeminiCredentials), - /// Perplexity credentials. - Perplexity(PerplexityCredentials), -} - -impl AiCredentials { - /// Returns the provider kind as a string. - pub const fn kind(&self) -> &'static str { - match self { - Self::OpenAi(_) => "openai", - Self::Anthropic(_) => "anthropic", - Self::Cohere(_) => "cohere", - Self::Gemini(_) => "gemini", - Self::Perplexity(_) => "perplexity", - } + EmbeddingProvider::connect(model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) } } diff --git a/crates/nvisy-runtime/src/provider/backend/anthropic.rs b/crates/nvisy-runtime/src/provider/backend/anthropic.rs deleted file mode 100644 index d664df9..0000000 --- a/crates/nvisy-runtime/src/provider/backend/anthropic.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Anthropic provider. - -use nvisy_core::IntoProvider; -use nvisy_rig::provider::{AnthropicModel, CompletionProvider}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoAiProvider; -use crate::error::{Error, Result}; - -/// Anthropic credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AnthropicCredentials { - /// API key. - pub api_key: String, -} - -/// Anthropic completion parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct AnthropicCompletionParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Model to use. - pub model: AnthropicModel, -} - -impl AnthropicCompletionParams { - /// Creates a new Anthropic completion params. - pub fn new(credentials_id: Uuid, model: AnthropicModel) -> Self { - Self { - credentials_id, - model, - } - } -} - -#[async_trait::async_trait] -impl IntoAiProvider for AnthropicCompletionParams { - type Credentials = AnthropicCredentials; - type Output = CompletionProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let rig_creds = nvisy_rig::provider::CompletionCredentials::Anthropic { - api_key: credentials.api_key, - }; - let model = nvisy_rig::provider::CompletionModel::Anthropic(self.model); - CompletionProvider::create(model, rig_creds) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/cohere.rs b/crates/nvisy-runtime/src/provider/backend/cohere.rs deleted file mode 100644 index 2b23528..0000000 --- a/crates/nvisy-runtime/src/provider/backend/cohere.rs +++ /dev/null @@ -1,88 +0,0 @@ -//! Cohere provider. - -use nvisy_core::IntoProvider; -use nvisy_rig::provider::{ - CohereCompletionModel, CohereEmbeddingModel, CompletionProvider, EmbeddingProvider, -}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoAiProvider; -use crate::error::{Error, Result}; - -/// Cohere credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CohereCredentials { - /// API key. - pub api_key: String, -} - -/// Cohere completion parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct CohereCompletionParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Model to use. - pub model: CohereCompletionModel, -} - -impl CohereCompletionParams { - /// Creates a new Cohere completion params. - pub fn new(credentials_id: Uuid, model: CohereCompletionModel) -> Self { - Self { - credentials_id, - model, - } - } -} - -#[async_trait::async_trait] -impl IntoAiProvider for CohereCompletionParams { - type Credentials = CohereCredentials; - type Output = CompletionProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let rig_creds = nvisy_rig::provider::CompletionCredentials::Cohere { - api_key: credentials.api_key, - }; - let model = nvisy_rig::provider::CompletionModel::Cohere(self.model); - CompletionProvider::create(model, rig_creds) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} - -/// Cohere embedding parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct CohereEmbeddingParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Model to use. - pub model: CohereEmbeddingModel, -} - -impl CohereEmbeddingParams { - /// Creates a new Cohere embedding params. - pub fn new(credentials_id: Uuid, model: CohereEmbeddingModel) -> Self { - Self { - credentials_id, - model, - } - } -} - -#[async_trait::async_trait] -impl IntoAiProvider for CohereEmbeddingParams { - type Credentials = CohereCredentials; - type Output = EmbeddingProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let rig_creds = nvisy_rig::provider::EmbeddingCredentials::Cohere { - api_key: credentials.api_key, - }; - let model = nvisy_rig::provider::EmbeddingModel::Cohere(self.model); - EmbeddingProvider::create(model, rig_creds) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/gemini.rs b/crates/nvisy-runtime/src/provider/backend/gemini.rs deleted file mode 100644 index 5ce401a..0000000 --- a/crates/nvisy-runtime/src/provider/backend/gemini.rs +++ /dev/null @@ -1,88 +0,0 @@ -//! Google Gemini provider. - -use nvisy_core::IntoProvider; -use nvisy_rig::provider::{ - CompletionProvider, EmbeddingProvider, GeminiCompletionModel, GeminiEmbeddingModel, -}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoAiProvider; -use crate::error::{Error, Result}; - -/// Gemini credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GeminiCredentials { - /// API key. - pub api_key: String, -} - -/// Gemini completion parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct GeminiCompletionParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Model to use. - pub model: GeminiCompletionModel, -} - -impl GeminiCompletionParams { - /// Creates a new Gemini completion params. - pub fn new(credentials_id: Uuid, model: GeminiCompletionModel) -> Self { - Self { - credentials_id, - model, - } - } -} - -#[async_trait::async_trait] -impl IntoAiProvider for GeminiCompletionParams { - type Credentials = GeminiCredentials; - type Output = CompletionProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let rig_creds = nvisy_rig::provider::CompletionCredentials::Gemini { - api_key: credentials.api_key, - }; - let model = nvisy_rig::provider::CompletionModel::Gemini(self.model); - CompletionProvider::create(model, rig_creds) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} - -/// Gemini embedding parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct GeminiEmbeddingParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Model to use. - pub model: GeminiEmbeddingModel, -} - -impl GeminiEmbeddingParams { - /// Creates a new Gemini embedding params. - pub fn new(credentials_id: Uuid, model: GeminiEmbeddingModel) -> Self { - Self { - credentials_id, - model, - } - } -} - -#[async_trait::async_trait] -impl IntoAiProvider for GeminiEmbeddingParams { - type Credentials = GeminiCredentials; - type Output = EmbeddingProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let rig_creds = nvisy_rig::provider::EmbeddingCredentials::Gemini { - api_key: credentials.api_key, - }; - let model = nvisy_rig::provider::EmbeddingModel::Gemini(self.model); - EmbeddingProvider::create(model, rig_creds) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/mod.rs b/crates/nvisy-runtime/src/provider/backend/mod.rs deleted file mode 100644 index 8b0a376..0000000 --- a/crates/nvisy-runtime/src/provider/backend/mod.rs +++ /dev/null @@ -1,87 +0,0 @@ -//! Backend provider implementations. -//! -//! Storage and vector database providers are re-exported from `nvisy_dal`. -//! AI providers are defined locally in this module. -//! -//! ## Storage backends (from nvisy_dal) -//! - `s3` - Amazon S3 -//! - `gcs` - Google Cloud Storage -//! - `azblob` - Azure Blob Storage -//! - `postgres` - PostgreSQL -//! - `mysql` - MySQL -//! -//! ## Vector databases (from nvisy_dal) -//! - `qdrant` - Qdrant vector database -//! - `pinecone` - Pinecone vector database -//! - `milvus` - Milvus vector database -//! - `pgvector` - pgvector (PostgreSQL extension) -//! -//! ## AI providers (local) -//! - `openai` - OpenAI (completion + embedding) -//! - `anthropic` - Anthropic (completion only) -//! - `cohere` - Cohere (completion + embedding) -//! - `gemini` - Google Gemini (completion + embedding) -//! - `perplexity` - Perplexity (completion only) - -use crate::error::Result; - -// AI providers (local implementations) -mod anthropic; -mod cohere; -mod gemini; -mod openai; -mod perplexity; - -// Re-export storage backend types from nvisy_dal -// AI provider exports -pub use anthropic::{AnthropicCompletionParams, AnthropicCredentials}; -pub use cohere::{CohereCompletionParams, CohereCredentials, CohereEmbeddingParams}; -pub use gemini::{GeminiCompletionParams, GeminiCredentials, GeminiEmbeddingParams}; -pub use nvisy_dal::provider::{ - // Object storage - AzblobCredentials, - AzblobParams, - AzblobProvider, - GcsCredentials, - GcsParams, - GcsProvider, - // Vector databases - MilvusCredentials, - MilvusParams, - MilvusProvider, - // Relational databases - MysqlCredentials, - MysqlParams, - MysqlProvider, - PgVectorCredentials, - PgVectorParams, - PgVectorProvider, - PineconeCredentials, - PineconeParams, - PineconeProvider, - PostgresCredentials, - PostgresParams, - PostgresProvider, - QdrantCredentials, - QdrantParams, - QdrantProvider, - S3Credentials, - S3Params, - S3Provider, -}; -pub use openai::{OpenAiCompletionParams, OpenAiCredentials, OpenAiEmbeddingParams}; -pub use perplexity::{PerplexityCompletionParams, PerplexityCredentials}; - -/// Trait for AI provider parameters that can be combined with credentials to create a provider. -/// -/// This is distinct from `nvisy_dal::IntoProvider` which is for storage/vector providers. -#[async_trait::async_trait] -pub trait IntoAiProvider { - /// The credentials type required by this provider. - type Credentials: Send; - /// The output type (provider instance). - type Output; - - /// Combines params with credentials to create the provider. - async fn into_provider(self, credentials: Self::Credentials) -> Result; -} diff --git a/crates/nvisy-runtime/src/provider/backend/openai.rs b/crates/nvisy-runtime/src/provider/backend/openai.rs deleted file mode 100644 index f6c4be3..0000000 --- a/crates/nvisy-runtime/src/provider/backend/openai.rs +++ /dev/null @@ -1,88 +0,0 @@ -//! OpenAI provider. - -use nvisy_core::IntoProvider; -use nvisy_rig::provider::{ - CompletionProvider, EmbeddingProvider, OpenAiCompletionModel, OpenAiEmbeddingModel, -}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoAiProvider; -use crate::error::{Error, Result}; - -/// OpenAI credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct OpenAiCredentials { - /// API key. - pub api_key: String, -} - -/// OpenAI completion parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OpenAiCompletionParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Model to use. - pub model: OpenAiCompletionModel, -} - -impl OpenAiCompletionParams { - /// Creates a new OpenAI completion params. - pub fn new(credentials_id: Uuid, model: OpenAiCompletionModel) -> Self { - Self { - credentials_id, - model, - } - } -} - -#[async_trait::async_trait] -impl IntoAiProvider for OpenAiCompletionParams { - type Credentials = OpenAiCredentials; - type Output = CompletionProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let rig_creds = nvisy_rig::provider::CompletionCredentials::OpenAi { - api_key: credentials.api_key, - }; - let model = nvisy_rig::provider::CompletionModel::OpenAi(self.model); - CompletionProvider::create(model, rig_creds) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} - -/// OpenAI embedding parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OpenAiEmbeddingParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Model to use. - pub model: OpenAiEmbeddingModel, -} - -impl OpenAiEmbeddingParams { - /// Creates a new OpenAI embedding params. - pub fn new(credentials_id: Uuid, model: OpenAiEmbeddingModel) -> Self { - Self { - credentials_id, - model, - } - } -} - -#[async_trait::async_trait] -impl IntoAiProvider for OpenAiEmbeddingParams { - type Credentials = OpenAiCredentials; - type Output = EmbeddingProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let rig_creds = nvisy_rig::provider::EmbeddingCredentials::OpenAi { - api_key: credentials.api_key, - }; - let model = nvisy_rig::provider::EmbeddingModel::OpenAi(self.model); - EmbeddingProvider::create(model, rig_creds) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/backend/perplexity.rs b/crates/nvisy-runtime/src/provider/backend/perplexity.rs deleted file mode 100644 index ec06e4e..0000000 --- a/crates/nvisy-runtime/src/provider/backend/perplexity.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Perplexity provider. - -use nvisy_core::IntoProvider; -use nvisy_rig::provider::{CompletionProvider, PerplexityModel}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::IntoAiProvider; -use crate::error::{Error, Result}; - -/// Perplexity credentials. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PerplexityCredentials { - /// API key. - pub api_key: String, -} - -/// Perplexity completion parameters. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PerplexityCompletionParams { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Model to use. - pub model: PerplexityModel, -} - -impl PerplexityCompletionParams { - /// Creates a new Perplexity completion params. - pub fn new(credentials_id: Uuid, model: PerplexityModel) -> Self { - Self { - credentials_id, - model, - } - } -} - -#[async_trait::async_trait] -impl IntoAiProvider for PerplexityCompletionParams { - type Credentials = PerplexityCredentials; - type Output = CompletionProvider; - - async fn into_provider(self, credentials: Self::Credentials) -> Result { - let rig_creds = nvisy_rig::provider::CompletionCredentials::Perplexity { - api_key: credentials.api_key, - }; - let model = nvisy_rig::provider::CompletionModel::Perplexity(self.model); - CompletionProvider::create(model, rig_creds) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/inputs.rs b/crates/nvisy-runtime/src/provider/inputs.rs index d41c229..1d61291 100644 --- a/crates/nvisy-runtime/src/provider/inputs.rs +++ b/crates/nvisy-runtime/src/provider/inputs.rs @@ -1,13 +1,14 @@ //! Input provider types and implementations. use derive_more::From; -use nvisy_dal::core::IntoProvider as DalIntoProvider; +use nvisy_core::Provider; use nvisy_dal::provider::{ AzblobParams, AzblobProvider, GcsParams, GcsProvider, MysqlParams, MysqlProvider, PostgresParams, PostgresProvider, S3Params, S3Provider, }; use nvisy_dal::{AnyDataValue, DataTypeId, ObjectContext, RelationalContext}; use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; use uuid::Uuid; use super::ProviderCredentials; @@ -33,7 +34,7 @@ impl InputProviderConfig { } /// Returns the provider kind as a string. - pub const fn kind(&self) -> &'static str { + pub fn kind(&self) -> &'static str { self.params.kind() } @@ -49,8 +50,9 @@ impl InputProviderConfig { } /// Input provider parameters (storage backends only, no vector DBs). -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize, IntoStaticStr)] #[serde(tag = "kind", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] pub enum InputProviderParams { /// Amazon S3 storage. S3(S3Params), @@ -66,14 +68,8 @@ pub enum InputProviderParams { impl InputProviderParams { /// Returns the provider kind as a string. - pub const fn kind(&self) -> &'static str { - match self { - Self::S3(_) => "s3", - Self::Gcs(_) => "gcs", - Self::Azblob(_) => "azblob", - Self::Postgres(_) => "postgres", - Self::Mysql(_) => "mysql", - } + pub fn kind(&self) -> &'static str { + self.into() } /// Returns the output data type for this provider. @@ -88,27 +84,27 @@ impl InputProviderParams { pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { match (self, credentials) { (Self::S3(p), ProviderCredentials::S3(c)) => Ok(InputProvider::S3( - S3Provider::create(p, c) + S3Provider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Gcs(p), ProviderCredentials::Gcs(c)) => Ok(InputProvider::Gcs( - GcsProvider::create(p, c) + GcsProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Azblob(p), ProviderCredentials::Azblob(c)) => Ok(InputProvider::Azblob( - AzblobProvider::create(p, c) + AzblobProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Postgres(p), ProviderCredentials::Postgres(c)) => Ok(InputProvider::Postgres( - PostgresProvider::create(p, c) + PostgresProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Mysql(p), ProviderCredentials::Mysql(c)) => Ok(InputProvider::Mysql( - MysqlProvider::create(p, c) + MysqlProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs index f3935d8..5800544 100644 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -1,38 +1,39 @@ //! Provider params, credentials, and registry. //! //! This module separates provider configuration into: -//! - [`ProviderCredentials`]: Sensitive credentials (stored per workspace) -//! - [`AiCredentials`]: AI provider credentials (stored per workspace) +//! - [`ProviderCredentials`]: All credentials (storage + AI, stored per workspace) //! - [`InputProviderConfig`] / [`OutputProviderConfig`]: Config with credentials reference + params //! - [`InputProviderParams`] / [`OutputProviderParams`]: Non-sensitive parameters (part of node definition) //! - [`CompletionProviderParams`] / [`EmbeddingProviderParams`]: AI provider parameters //! - [`CredentialsRegistry`]: In-memory registry for credentials lookup -//! -//! # Module Structure -//! -//! - [`backend`]: Re-exports from nvisy_dal + local AI provider implementations mod ai; -pub mod backend; mod inputs; mod outputs; mod registry; pub mod runtime; -pub use ai::{AiCredentials, CompletionProviderParams, EmbeddingProviderParams}; -use backend::{ - AnthropicCredentials, AzblobCredentials, CohereCredentials, GcsCredentials, GeminiCredentials, - MilvusCredentials, MysqlCredentials, OpenAiCredentials, PerplexityCredentials, - PgVectorCredentials, PineconeCredentials, PostgresCredentials, QdrantCredentials, - S3Credentials, -}; +pub use ai::{CompletionProviderParams, EmbeddingProviderParams}; use derive_more::From; pub use inputs::{InputProvider, InputProviderConfig, InputProviderParams}; +// Re-export dal credentials +pub use nvisy_dal::provider::{ + AzblobCredentials, GcsCredentials, MilvusCredentials, MysqlCredentials, PgVectorCredentials, + PineconeCredentials, PostgresCredentials, QdrantCredentials, S3Credentials, +}; +// Re-export rig types +pub use nvisy_rig::provider::{ + AnthropicModel, CohereCompletionModel, CohereEmbeddingModel, CompletionCredentials, + EmbeddingCredentials, GeminiCompletionModel, GeminiEmbeddingModel, OpenAiCompletionModel, + OpenAiEmbeddingModel, PerplexityModel, +}; pub use outputs::{OutputProvider, OutputProviderConfig, OutputProviderParams}; pub use registry::CredentialsRegistry; use serde::{Deserialize, Serialize}; use strum::IntoStaticStr; +use crate::error::{Error, Result}; + /// Provider credentials (sensitive). #[derive(Debug, Clone, From, Serialize, Deserialize, IntoStaticStr)] #[serde(tag = "provider", rename_all = "snake_case")] @@ -60,17 +61,11 @@ pub enum ProviderCredentials { /// pgvector credentials. PgVector(PgVectorCredentials), - // AI providers - /// OpenAI credentials. - OpenAi(OpenAiCredentials), - /// Anthropic credentials. - Anthropic(AnthropicCredentials), - /// Cohere credentials. - Cohere(CohereCredentials), - /// Google Gemini credentials. - Gemini(GeminiCredentials), - /// Perplexity credentials. - Perplexity(PerplexityCredentials), + // AI providers (completion) + /// Completion provider credentials. + Completion(CompletionCredentials), + /// Embedding provider credentials. + Embedding(EmbeddingCredentials), } impl ProviderCredentials { @@ -78,4 +73,26 @@ impl ProviderCredentials { pub fn kind(&self) -> &'static str { self.into() } + + /// Converts to completion credentials if applicable. + pub fn into_completion_credentials(self) -> Result { + match self { + Self::Completion(c) => Ok(c), + other => Err(Error::Internal(format!( + "expected completion credentials, got '{}'", + other.kind() + ))), + } + } + + /// Converts to embedding credentials if applicable. + pub fn into_embedding_credentials(self) -> Result { + match self { + Self::Embedding(c) => Ok(c), + other => Err(Error::Internal(format!( + "expected embedding credentials, got '{}'", + other.kind() + ))), + } + } } diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs index ea5f9cc..aa7d03a 100644 --- a/crates/nvisy-runtime/src/provider/outputs.rs +++ b/crates/nvisy-runtime/src/provider/outputs.rs @@ -6,7 +6,7 @@ use std::task::{Context as TaskContext, Poll}; use derive_more::From; use futures::Sink; -use nvisy_dal::core::IntoProvider as DalIntoProvider; +use nvisy_core::Provider; use nvisy_dal::provider::{ AzblobParams, AzblobProvider, GcsParams, GcsProvider, MilvusParams, MilvusProvider, MysqlParams, MysqlProvider, PgVectorParams, PgVectorProvider, PineconeParams, PineconeProvider, @@ -14,6 +14,7 @@ use nvisy_dal::provider::{ }; use nvisy_dal::{AnyDataValue, DataTypeId}; use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; use tokio::sync::Mutex; use uuid::Uuid; @@ -41,7 +42,7 @@ impl OutputProviderConfig { } /// Returns the provider kind as a string. - pub const fn kind(&self) -> &'static str { + pub fn kind(&self) -> &'static str { self.params.kind() } @@ -57,8 +58,9 @@ impl OutputProviderConfig { } /// Output provider parameters (storage backends + vector DBs). -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize, IntoStaticStr)] #[serde(tag = "kind", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] pub enum OutputProviderParams { /// Amazon S3 storage. S3(S3Params), @@ -82,18 +84,8 @@ pub enum OutputProviderParams { impl OutputProviderParams { /// Returns the provider kind as a string. - pub const fn kind(&self) -> &'static str { - match self { - Self::S3(_) => "s3", - Self::Gcs(_) => "gcs", - Self::Azblob(_) => "azblob", - Self::Postgres(_) => "postgres", - Self::Mysql(_) => "mysql", - Self::Qdrant(_) => "qdrant", - Self::Pinecone(_) => "pinecone", - Self::Milvus(_) => "milvus", - Self::PgVector(_) => "pgvector", - } + pub fn kind(&self) -> &'static str { + self.into() } /// Returns the output data type for this provider. @@ -111,47 +103,47 @@ impl OutputProviderParams { pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { match (self, credentials) { (Self::S3(p), ProviderCredentials::S3(c)) => Ok(OutputProvider::S3( - S3Provider::create(p, c) + S3Provider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Gcs(p), ProviderCredentials::Gcs(c)) => Ok(OutputProvider::Gcs( - GcsProvider::create(p, c) + GcsProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Azblob(p), ProviderCredentials::Azblob(c)) => Ok(OutputProvider::Azblob( - AzblobProvider::create(p, c) + AzblobProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Postgres(p), ProviderCredentials::Postgres(c)) => Ok(OutputProvider::Postgres( - PostgresProvider::create(p, c) + PostgresProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Mysql(p), ProviderCredentials::Mysql(c)) => Ok(OutputProvider::Mysql( - MysqlProvider::create(p, c) + MysqlProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => Ok(OutputProvider::Qdrant( - QdrantProvider::create(p, c) + QdrantProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => Ok(OutputProvider::Pinecone( - PineconeProvider::create(p, c) + PineconeProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::Milvus(p), ProviderCredentials::Milvus(c)) => Ok(OutputProvider::Milvus( - MilvusProvider::create(p, c) + MilvusProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), (Self::PgVector(p), ProviderCredentials::PgVector(c)) => Ok(OutputProvider::PgVector( - PgVectorProvider::create(p, c) + PgVectorProvider::connect(p, c) .await .map_err(|e| Error::Internal(e.to_string()))?, )), From 92b2bf49d7394fb7b7b2c04bc5abb6913fe18367 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sat, 24 Jan 2026 08:27:59 +0100 Subject: [PATCH 23/28] refactor(rig): reorganize agent module with typed tools and memory - Add JsonSchemaTool using schemars for schema generation and jsonschema for validation - Add JsonResponse for parsing JSON from LLM responses (handles markdown code blocks) - Create concrete schema types for agents: ColumnDescription, TextAnalysisOutput, StructuredOutput - Add with_tools flag to all agents for optional tool enablement - Add memory module with ChatHistory (compaction strategies) and WorkingMemory - Rename tools/ to tool/ and make module private - Update runtime processors to use specific agents instead of Agents bundle - Box large processor variants (EnrichProcessor, ExtractProcessor) in CompiledTransform - Remove unused chat, service, session, and tool modules --- Cargo.lock | 137 ++++++- Cargo.toml | 1 + crates/nvisy-cli/README.md | 5 +- crates/nvisy-core/README.md | 2 + crates/nvisy-dal/README.md | 2 + crates/nvisy-nats/README.md | 5 +- crates/nvisy-postgres/README.md | 6 +- crates/nvisy-rig/Cargo.toml | 6 +- crates/nvisy-rig/README.md | 2 + crates/nvisy-rig/src/agent/memory/history.rs | 204 ++++++++++ crates/nvisy-rig/src/agent/memory/mod.rs | 12 + crates/nvisy-rig/src/agent/memory/working.rs | 172 +++++++++ crates/nvisy-rig/src/agent/mod.rs | 59 +-- .../nvisy-rig/src/agent/structured_output.rs | 77 ++-- crates/nvisy-rig/src/agent/table.rs | 77 +++- crates/nvisy-rig/src/agent/text_analysis.rs | 112 ++++-- crates/nvisy-rig/src/agent/text_generation.rs | 44 ++- .../agent/{tools => tool}/document_fetch.rs | 13 +- .../nvisy-rig/src/agent/tool/json_schema.rs | 324 ++++++++++++++++ .../agent/{tools => tool}/metadata_query.rs | 8 +- crates/nvisy-rig/src/agent/tool/mod.rs | 13 + .../src/agent/{tools => tool}/scratchpad.rs | 212 +++++------ .../agent/{tools => tool}/vector_search.rs | 8 +- .../src/agent/tools/context_store.rs | 214 ----------- .../src/agent/tools/image_analysis.rs | 176 --------- .../nvisy-rig/src/agent/tools/json_schema.rs | 334 ----------------- crates/nvisy-rig/src/agent/tools/mod.rs | 30 -- crates/nvisy-rig/src/agent/tools/web_fetch.rs | 271 -------------- crates/nvisy-rig/src/agent/vision.rs | 46 ++- crates/nvisy-rig/src/chat/agent/context.rs | 68 ---- crates/nvisy-rig/src/chat/agent/executor.rs | 45 --- crates/nvisy-rig/src/chat/agent/mod.rs | 119 ------ crates/nvisy-rig/src/chat/agent/prompt.rs | 181 --------- crates/nvisy-rig/src/chat/event.rs | 53 --- crates/nvisy-rig/src/chat/mod.rs | 23 -- crates/nvisy-rig/src/chat/response.rs | 73 ---- crates/nvisy-rig/src/chat/service.rs | 163 -------- crates/nvisy-rig/src/chat/stream.rs | 114 ------ crates/nvisy-rig/src/chat/usage.rs | 86 ----- crates/nvisy-rig/src/error.rs | 10 - crates/nvisy-rig/src/lib.rs | 5 - crates/nvisy-rig/src/rag/searcher/mod.rs | 14 +- crates/nvisy-rig/src/service/config.rs | 70 ---- crates/nvisy-rig/src/service/mod.rs | 7 - crates/nvisy-rig/src/service/rig.rs | 51 --- crates/nvisy-rig/src/session/message.rs | 135 ------- crates/nvisy-rig/src/session/mod.rs | 325 ---------------- crates/nvisy-rig/src/session/policy.rs | 352 ------------------ crates/nvisy-rig/src/session/store.rs | 97 ----- crates/nvisy-rig/src/tool/definition.rs | 312 ---------------- crates/nvisy-rig/src/tool/edit/mod.rs | 102 ----- crates/nvisy-rig/src/tool/edit/operation.rs | 164 -------- crates/nvisy-rig/src/tool/edit/proposed.rs | 305 --------------- crates/nvisy-rig/src/tool/mod.rs | 148 -------- crates/nvisy-rig/src/tool/registry.rs | 152 -------- crates/nvisy-rig/src/tool/types.rs | 183 --------- crates/nvisy-runtime/README.md | 2 + crates/nvisy-runtime/src/engine/compiler.rs | 39 +- .../src/graph/transform/chunk.rs | 14 +- .../src/graph/transform/derive.rs | 20 +- .../src/graph/transform/enrich.rs | 24 +- .../src/graph/transform/extract.rs | 34 +- .../nvisy-runtime/src/graph/transform/mod.rs | 6 +- crates/nvisy-server/README.md | 7 +- crates/nvisy-webhook/README.md | 2 + 65 files changed, 1366 insertions(+), 4711 deletions(-) create mode 100644 crates/nvisy-rig/src/agent/memory/history.rs create mode 100644 crates/nvisy-rig/src/agent/memory/working.rs rename crates/nvisy-rig/src/agent/{tools => tool}/document_fetch.rs (90%) create mode 100644 crates/nvisy-rig/src/agent/tool/json_schema.rs rename crates/nvisy-rig/src/agent/{tools => tool}/metadata_query.rs (94%) create mode 100644 crates/nvisy-rig/src/agent/tool/mod.rs rename crates/nvisy-rig/src/agent/{tools => tool}/scratchpad.rs (72%) rename crates/nvisy-rig/src/agent/{tools => tool}/vector_search.rs (91%) delete mode 100644 crates/nvisy-rig/src/agent/tools/context_store.rs delete mode 100644 crates/nvisy-rig/src/agent/tools/image_analysis.rs delete mode 100644 crates/nvisy-rig/src/agent/tools/json_schema.rs delete mode 100644 crates/nvisy-rig/src/agent/tools/mod.rs delete mode 100644 crates/nvisy-rig/src/agent/tools/web_fetch.rs delete mode 100644 crates/nvisy-rig/src/chat/agent/context.rs delete mode 100644 crates/nvisy-rig/src/chat/agent/executor.rs delete mode 100644 crates/nvisy-rig/src/chat/agent/mod.rs delete mode 100644 crates/nvisy-rig/src/chat/agent/prompt.rs delete mode 100644 crates/nvisy-rig/src/chat/event.rs delete mode 100644 crates/nvisy-rig/src/chat/mod.rs delete mode 100644 crates/nvisy-rig/src/chat/response.rs delete mode 100644 crates/nvisy-rig/src/chat/service.rs delete mode 100644 crates/nvisy-rig/src/chat/stream.rs delete mode 100644 crates/nvisy-rig/src/chat/usage.rs delete mode 100644 crates/nvisy-rig/src/service/config.rs delete mode 100644 crates/nvisy-rig/src/service/mod.rs delete mode 100644 crates/nvisy-rig/src/service/rig.rs delete mode 100644 crates/nvisy-rig/src/session/message.rs delete mode 100644 crates/nvisy-rig/src/session/mod.rs delete mode 100644 crates/nvisy-rig/src/session/policy.rs delete mode 100644 crates/nvisy-rig/src/session/store.rs delete mode 100644 crates/nvisy-rig/src/tool/definition.rs delete mode 100644 crates/nvisy-rig/src/tool/edit/mod.rs delete mode 100644 crates/nvisy-rig/src/tool/edit/operation.rs delete mode 100644 crates/nvisy-rig/src/tool/edit/proposed.rs delete mode 100644 crates/nvisy-rig/src/tool/mod.rs delete mode 100644 crates/nvisy-rig/src/tool/registry.rs delete mode 100644 crates/nvisy-rig/src/tool/types.rs diff --git a/Cargo.lock b/Cargo.lock index dbbf9a3..3163278 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -37,6 +37,7 @@ dependencies = [ "cfg-if", "getrandom 0.3.4", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -676,7 +677,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ - "bit-vec", + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", ] [[package]] @@ -685,6 +695,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "1.3.2" @@ -736,6 +752,12 @@ dependencies = [ "generic-array", ] +[[package]] +name = "borrow-or-share" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0b364ead1874514c8c2855ab558056ebfeb775653e7ae45ff72f28f8f3166c" + [[package]] name = "brotli" version = "8.0.2" @@ -763,6 +785,12 @@ version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "byteorder" version = "1.5.0" @@ -1676,7 +1704,18 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" dependencies = [ - "bit-set", + "bit-set 0.5.3", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "fancy-regex" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298" +dependencies = [ + "bit-set 0.8.0", "regex-automata", "regex-syntax", ] @@ -1733,6 +1772,17 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "fluent-uri" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1918b65d96df47d3591bed19c5cca17e3fa5d0707318e4b5ef2eae01764df7e5" +dependencies = [ + "borrow-or-share", + "ref-cast", + "serde", +] + [[package]] name = "flume" version = "0.11.1" @@ -1780,6 +1830,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f158e3ff0a1b334408dc9fb811cd99b446986f4d8b741bb08f9df1604085ae7" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "fs-err" version = "3.2.2" @@ -2732,6 +2792,31 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonschema" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "161c33c3ec738cfea3288c5c53dfcdb32fd4fc2954de86ea06f71b5a1a40bfcd" +dependencies = [ + "ahash", + "base64 0.22.1", + "bytecount", + "email_address", + "fancy-regex 0.14.0", + "fraction", + "idna", + "itoa", + "num-cmp", + "once_cell", + "percent-encoding", + "referencing", + "regex-syntax", + "reqwest", + "serde", + "serde_json", + "uuid-simd", +] + [[package]] name = "jsonwebtoken" version = "9.3.1" @@ -3143,6 +3228,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + [[package]] name = "num-complex" version = "0.4.6" @@ -3332,10 +3423,12 @@ dependencies = [ "derive_more", "futures", "jiff", + "jsonschema", "nvisy-core", "nvisy-nats", "nvisy-postgres", "rig-core", + "schemars 0.9.0", "serde", "serde_json", "sha2", @@ -3684,6 +3777,12 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "parking" version = "2.2.1" @@ -4435,6 +4534,20 @@ dependencies = [ "syn 2.0.114", ] +[[package]] +name = "referencing" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40a64b3a635fad9000648b4d8a59c8710c523ab61a23d392a7d91d47683f5adc" +dependencies = [ + "ahash", + "fluent-uri", + "once_cell", + "parking_lot", + "percent-encoding", + "serde_json", +] + [[package]] name = "regex" version = "1.12.2" @@ -4504,6 +4617,7 @@ dependencies = [ "base64 0.22.1", "bytes", "encoding_rs", + "futures-channel", "futures-core", "futures-util", "h2 0.4.13", @@ -6426,6 +6540,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "uuid-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b082222b4f6619906941c17eb2297fff4c2fb96cb60164170522942a200bd8" +dependencies = [ + "outref", + "uuid", + "vsimd", +] + [[package]] name = "validator" version = "0.20.0" @@ -6474,6 +6599,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "want" version = "0.3.1" @@ -7247,7 +7378,7 @@ checksum = "ad76e35b00ad53688d6b90c431cabe3cbf51f7a4a154739e04b63004ab1c736c" dependencies = [ "chrono", "derive_builder", - "fancy-regex", + "fancy-regex 0.13.0", "itertools 0.13.0", "lazy_static", "regex", diff --git a/Cargo.toml b/Cargo.toml index 8477672..c2f0f51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,6 +75,7 @@ tower-http = { version = "0.6", features = ["full"] } # OpenAPI/Documentation aide = { version = "0.15", features = ["axum", "macros", "scalar"] } schemars = { version = "0.9", features = ["uuid1", "jiff02"] } +jsonschema = { version = "0.29", features = [] } # Authentication & Security jsonwebtoken = { version = "10.2", features = ["aws_lc_rs"] } diff --git a/crates/nvisy-cli/README.md b/crates/nvisy-cli/README.md index 05147b3..d3683a2 100644 --- a/crates/nvisy-cli/README.md +++ b/crates/nvisy-cli/README.md @@ -1,9 +1,8 @@ # nvisy-cli -Command-line interface and HTTP server for the Nvisy platform. +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) -[![Rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![Axum](https://img.shields.io/badge/Axum-0.8+-000000?style=flat-square&logo=rust&logoColor=white)](https://github.com/tokio-rs/axum) +Command-line interface and HTTP server for the Nvisy platform. ## Features diff --git a/crates/nvisy-core/README.md b/crates/nvisy-core/README.md index 75b613e..de8e9e6 100644 --- a/crates/nvisy-core/README.md +++ b/crates/nvisy-core/README.md @@ -1,5 +1,7 @@ # nvisy-core +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Core types and utilities shared across nvisy crates. ## Overview diff --git a/crates/nvisy-dal/README.md b/crates/nvisy-dal/README.md index 314f6e6..9367536 100644 --- a/crates/nvisy-dal/README.md +++ b/crates/nvisy-dal/README.md @@ -1,5 +1,7 @@ # nvisy-dal +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Data Abstraction Layer for workflow inputs and outputs. ## Overview diff --git a/crates/nvisy-nats/README.md b/crates/nvisy-nats/README.md index c0103ae..ccc1ab2 100644 --- a/crates/nvisy-nats/README.md +++ b/crates/nvisy-nats/README.md @@ -1,11 +1,10 @@ # nvisy-nats +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Task-focused NATS client for the Nvisy platform with comprehensive JetStream support and unified streaming infrastructure. -[![Rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![NATS](https://img.shields.io/badge/NATS-JetStream-000000?style=flat-square&logo=nats&logoColor=white)](https://nats.io/) - ## Features - **Type-Safe Operations** - Generic KV store with compile-time type safety diff --git a/crates/nvisy-postgres/README.md b/crates/nvisy-postgres/README.md index ddaf54a..f7d58ce 100644 --- a/crates/nvisy-postgres/README.md +++ b/crates/nvisy-postgres/README.md @@ -1,12 +1,10 @@ # nvisy-postgres +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Type-safe PostgreSQL database layer for the Nvisy platform with async connection pooling and embedded migrations. -[![Rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![Diesel](https://img.shields.io/badge/Diesel-2.3+-000000?style=flat-square&logo=rust&logoColor=white)](https://diesel.rs/) -[![PostgreSQL](https://img.shields.io/badge/PostgreSQL-17+-000000?style=flat-square&logo=postgresql&logoColor=white)](https://www.postgresql.org/) - ## Features - **Async Connection Pooling** - High-performance connection management with diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml index 17d5808..baccd0b 100644 --- a/crates/nvisy-rig/Cargo.toml +++ b/crates/nvisy-rig/Cargo.toml @@ -27,7 +27,7 @@ ollama = [] [dependencies] # Internal crates -nvisy-core = { path = "../nvisy-core" } +nvisy-core = { workspace = true } nvisy-nats = { workspace = true } nvisy-postgres = { workspace = true } @@ -43,6 +43,10 @@ async-trait = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +# Schema generation & validation +schemars = { workspace = true } +jsonschema = { workspace = true } + # Error handling thiserror = { workspace = true } diff --git a/crates/nvisy-rig/README.md b/crates/nvisy-rig/README.md index ab5e2cc..f8d93e9 100644 --- a/crates/nvisy-rig/README.md +++ b/crates/nvisy-rig/README.md @@ -1,5 +1,7 @@ # nvisy-rig +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Agent-centric AI framework for document processing, built on [Rig](https://github.com/0xPlaygrounds/rig). ## Overview diff --git a/crates/nvisy-rig/src/agent/memory/history.rs b/crates/nvisy-rig/src/agent/memory/history.rs new file mode 100644 index 0000000..25169a7 --- /dev/null +++ b/crates/nvisy-rig/src/agent/memory/history.rs @@ -0,0 +1,204 @@ +//! Chat history with automatic compaction strategies. +//! +//! Provides conversation history management with configurable compaction +//! strategies to handle context window limits. + +use rig::message::Message; + +/// Strategy for compacting chat history when capacity is exceeded. +#[derive(Debug, Clone, Default)] +pub enum CompactionStrategy { + /// Truncate oldest messages, keeping the most recent ones. + #[default] + Truncate, + + /// Summarize older messages into a context string. + Summarize { + /// Summary of compacted messages. + summary: String, + }, +} + +/// Chat history with automatic compaction. +/// +/// Manages conversation history with a configurable capacity limit and +/// compaction strategy for handling context window constraints. +#[derive(Debug, Clone)] +pub struct ChatHistory { + /// Messages in the conversation. + messages: Vec, + + /// Maximum number of messages before compaction. + capacity: usize, + + /// Strategy for handling overflow. + strategy: CompactionStrategy, +} + +impl ChatHistory { + /// Creates a new chat history with the given capacity. + /// + /// Uses truncation as the default compaction strategy. + pub fn new(capacity: usize) -> Self { + Self { + messages: Vec::with_capacity(capacity), + capacity, + strategy: CompactionStrategy::Truncate, + } + } + + /// Creates a chat history with a custom compaction strategy. + pub fn with_strategy(capacity: usize, strategy: CompactionStrategy) -> Self { + Self { + messages: Vec::with_capacity(capacity), + capacity, + strategy, + } + } + + /// Adds a message to the history, compacting if necessary. + pub fn push(&mut self, message: Message) { + self.messages.push(message); + + if self.messages.len() > self.capacity { + self.compact(); + } + } + + /// Adds multiple messages to the history. + pub fn extend(&mut self, messages: impl IntoIterator) { + for message in messages { + self.push(message); + } + } + + /// Returns the current messages. + pub fn messages(&self) -> &[Message] { + &self.messages + } + + /// Returns the number of messages currently stored. + pub fn len(&self) -> usize { + self.messages.len() + } + + /// Returns true if the history is empty. + pub fn is_empty(&self) -> bool { + self.messages.is_empty() + } + + /// Clears all messages and resets the summary. + pub fn clear(&mut self) { + self.messages.clear(); + self.strategy = CompactionStrategy::Truncate; + } + + /// Sets a new compaction strategy. + pub fn set_strategy(&mut self, strategy: CompactionStrategy) { + self.strategy = strategy; + } + + /// Updates the summary for summarize strategy. + /// + /// This should be called with an LLM-generated summary of the + /// compacted messages. + pub fn set_summary(&mut self, summary: String) { + self.strategy = CompactionStrategy::Summarize { summary }; + } + + /// Returns the current summary if using summarize strategy. + pub fn summary(&self) -> Option<&str> { + match &self.strategy { + CompactionStrategy::Summarize { summary } => Some(summary), + CompactionStrategy::Truncate => None, + } + } + + /// Compacts the history according to the current strategy. + fn compact(&mut self) { + let keep_count = self.capacity / 2; + let remove_count = self.messages.len().saturating_sub(keep_count); + + if remove_count == 0 { + return; + } + + match &mut self.strategy { + CompactionStrategy::Truncate => { + // Simply remove oldest messages + self.messages.drain(0..remove_count); + } + CompactionStrategy::Summarize { .. } => { + // Remove oldest messages (caller should update summary separately) + self.messages.drain(0..remove_count); + } + } + } +} + +impl Default for ChatHistory { + fn default() -> Self { + Self::new(100) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_history_is_empty() { + let history = ChatHistory::new(10); + assert!(history.is_empty()); + assert_eq!(history.len(), 0); + } + + #[test] + fn push_adds_messages() { + let mut history = ChatHistory::new(10); + history.push(Message::user("Hello")); + history.push(Message::assistant("Hi!")); + + assert_eq!(history.len(), 2); + } + + #[test] + fn truncate_compacts_when_over_capacity() { + let mut history = ChatHistory::new(4); + + for i in 0..6 { + history.push(Message::user(format!("Message {}", i))); + } + + // Should have compacted, keeping capacity/2 = 2 messages + assert!(history.len() <= 4); + } + + #[test] + fn summarize_strategy_stores_summary() { + let mut history = ChatHistory::with_strategy( + 10, + CompactionStrategy::Summarize { + summary: String::new(), + }, + ); + + history.push(Message::user("Hello")); + history.set_summary("User greeted the assistant.".to_string()); + + assert_eq!(history.summary(), Some("User greeted the assistant.")); + assert_eq!(history.len(), 1); + } + + #[test] + fn clear_resets_history() { + let mut history = ChatHistory::new(10); + history.push(Message::user("Test")); + history.set_summary("Summary".to_string()); + + history.clear(); + + assert!(history.is_empty()); + assert!(history.summary().is_none()); + } +} diff --git a/crates/nvisy-rig/src/agent/memory/mod.rs b/crates/nvisy-rig/src/agent/memory/mod.rs index 1650e89..971b9e7 100644 --- a/crates/nvisy-rig/src/agent/memory/mod.rs +++ b/crates/nvisy-rig/src/agent/memory/mod.rs @@ -1 +1,13 @@ //! Memory module for agent conversation history and context management. +//! +//! This module provides: +//! +//! - [`ChatHistory`] - Conversation history with automatic compaction +//! - [`CompactionStrategy`] - Strategy for handling history overflow (truncate or summarize) +//! - [`WorkingMemory`] - Key-value store for agent working context + +mod history; +mod working; + +pub use history::{ChatHistory, CompactionStrategy}; +pub use working::WorkingMemory; diff --git a/crates/nvisy-rig/src/agent/memory/working.rs b/crates/nvisy-rig/src/agent/memory/working.rs new file mode 100644 index 0000000..fce5e3a --- /dev/null +++ b/crates/nvisy-rig/src/agent/memory/working.rs @@ -0,0 +1,172 @@ +//! Working memory for agent context management. +//! +//! Provides a key-value store for agent working context that persists +//! across turns within a conversation. + +use std::collections::HashMap; + +/// Working memory for storing agent context between turns. +/// +/// This provides a simple key-value store for agents to maintain +/// context information like extracted entities, intermediate results, +/// or user preferences during a conversation. +#[derive(Debug, Clone, Default)] +pub struct WorkingMemory { + /// Key-value storage for context data. + entries: HashMap, + + /// Maximum number of entries to store. + capacity: usize, +} + +impl WorkingMemory { + /// Creates a new working memory with the given capacity. + pub fn new(capacity: usize) -> Self { + Self { + entries: HashMap::with_capacity(capacity), + capacity, + } + } + + /// Stores a value in working memory. + /// + /// If the key already exists, the value is updated. + /// If capacity is exceeded, the oldest entry is removed. + pub fn set(&mut self, key: impl Into, value: impl Into) { + use std::collections::hash_map::Entry; + + let key = key.into(); + let value = value.into(); + + // Check if we need to make room before borrowing via entry() + let needs_eviction = + !self.entries.contains_key(&key) && self.entries.len() >= self.capacity; + + if needs_eviction && let Some(remove_key) = self.entries.keys().next().cloned() { + self.entries.remove(&remove_key); + } + + match self.entries.entry(key) { + Entry::Occupied(mut e) => { + e.insert(value); + } + Entry::Vacant(e) => { + e.insert(value); + } + } + } + + /// Retrieves a value from working memory. + pub fn get(&self, key: &str) -> Option<&str> { + self.entries.get(key).map(|s| s.as_str()) + } + + /// Removes a value from working memory. + pub fn remove(&mut self, key: &str) -> Option { + self.entries.remove(key) + } + + /// Checks if a key exists in working memory. + pub fn contains(&self, key: &str) -> bool { + self.entries.contains_key(key) + } + + /// Returns all keys in working memory. + pub fn keys(&self) -> impl Iterator { + self.entries.keys().map(|s| s.as_str()) + } + + /// Returns the number of entries in working memory. + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Returns true if working memory is empty. + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Clears all entries from working memory. + pub fn clear(&mut self) { + self.entries.clear(); + } + + /// Formats working memory as a context string for prompts. + pub fn to_context_string(&self) -> String { + if self.entries.is_empty() { + return String::new(); + } + + let mut context = String::from("Working Memory:\n"); + for (key, value) in &self.entries { + context.push_str(&format!("- {}: {}\n", key, value)); + } + context + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn new_memory_is_empty() { + let memory = WorkingMemory::new(10); + assert!(memory.is_empty()); + assert_eq!(memory.len(), 0); + } + + #[test] + fn set_and_get() { + let mut memory = WorkingMemory::new(10); + memory.set("user_name", "Alice"); + + assert_eq!(memory.get("user_name"), Some("Alice")); + } + + #[test] + fn update_existing_key() { + let mut memory = WorkingMemory::new(10); + memory.set("count", "1"); + memory.set("count", "2"); + + assert_eq!(memory.get("count"), Some("2")); + assert_eq!(memory.len(), 1); + } + + #[test] + fn remove_entry() { + let mut memory = WorkingMemory::new(10); + memory.set("key", "value"); + + let removed = memory.remove("key"); + assert_eq!(removed, Some("value".to_string())); + assert!(memory.is_empty()); + } + + #[test] + fn respects_capacity() { + let mut memory = WorkingMemory::new(2); + memory.set("a", "1"); + memory.set("b", "2"); + memory.set("c", "3"); + + assert_eq!(memory.len(), 2); + } + + #[test] + fn context_string_format() { + let mut memory = WorkingMemory::new(10); + memory.set("task", "summarize"); + + let context = memory.to_context_string(); + assert!(context.contains("Working Memory:")); + assert!(context.contains("task: summarize")); + } + + #[test] + fn empty_context_string() { + let memory = WorkingMemory::new(10); + assert!(memory.to_context_string().is_empty()); + } +} diff --git a/crates/nvisy-rig/src/agent/mod.rs b/crates/nvisy-rig/src/agent/mod.rs index 74703c1..15736de 100644 --- a/crates/nvisy-rig/src/agent/mod.rs +++ b/crates/nvisy-rig/src/agent/mod.rs @@ -8,10 +8,21 @@ //! - [`TextGenerationAgent`] - Text generation (summarization, titles) //! - [`StructuredOutputAgent`] - JSON conversion (structured extraction) //! -//! Use [`Agents`] to create all agents from a single provider. +//! # Tool Support +//! +//! Each agent can optionally be created with tools enabled via the `with_tools` +//! parameter. When enabled, agents have access to relevant tools: +//! +//! | Agent | Tools | +//! |-------|-------| +//! | `VisionAgent` | `ScratchpadTool` | +//! | `TextAnalysisAgent` | `ScratchpadTool`, `JsonSchemaTool` | +//! | `TextGenerationAgent` | `ScratchpadTool` | +//! | `TableAgent` | `ScratchpadTool`, `JsonSchemaTool` | +//! | `StructuredOutputAgent` | `ScratchpadTool`, `JsonSchemaTool` | pub mod memory; -pub mod tools; +mod tool; mod structured_output; mod table; @@ -19,44 +30,10 @@ mod text_analysis; mod text_generation; mod vision; -pub use structured_output::StructuredOutputAgent; -pub use table::TableAgent; -pub use text_analysis::{Classification, Entity, Relationship, Sentiment, TextAnalysisAgent}; +pub use structured_output::{StructuredOutput, StructuredOutputAgent}; +pub use table::{ColumnDescription, TableAgent}; +pub use text_analysis::{ + Classification, Entity, Relationship, Sentiment, TextAnalysisAgent, TextAnalysisOutput, +}; pub use text_generation::TextGenerationAgent; pub use vision::VisionAgent; - -use crate::provider::CompletionProvider; - -/// Collection of all specialized agents. -/// -/// Provides convenient access to all agents created from a single completion provider. -/// -/// # Example -/// -/// ```ignore -/// let provider = CompletionProvider::new(...); -/// let agents = Agents::new(provider); -/// -/// let summary = agents.text_generation().summarize("...").await?; -/// let entities = agents.text_analysis().extract_entities("...").await?; -/// ``` -pub struct Agents { - pub structured_output_agent: StructuredOutputAgent, - pub table_agent: TableAgent, - pub text_analysis_agent: TextAnalysisAgent, - pub text_generation_agent: TextGenerationAgent, - pub vision_agent: VisionAgent, -} - -impl Agents { - /// Creates all agents from a completion provider. - pub fn new(provider: CompletionProvider) -> Self { - Self { - structured_output_agent: StructuredOutputAgent::new(provider.clone()), - table_agent: TableAgent::new(provider.clone()), - text_analysis_agent: TextAnalysisAgent::new(provider.clone()), - text_generation_agent: TextGenerationAgent::new(provider.clone()), - vision_agent: VisionAgent::new(provider), - } - } -} diff --git a/crates/nvisy-rig/src/agent/structured_output.rs b/crates/nvisy-rig/src/agent/structured_output.rs index 0d0a5b6..81bbb68 100644 --- a/crates/nvisy-rig/src/agent/structured_output.rs +++ b/crates/nvisy-rig/src/agent/structured_output.rs @@ -2,10 +2,13 @@ use rig::agent::{Agent, AgentBuilder}; use rig::completion::Prompt; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use serde_json::Value; +use super::tool::{JsonResponse, JsonSchemaTool, ScratchpadTool}; +use crate::Result; use crate::provider::CompletionProvider; -use crate::{Error, Result}; const NAME: &str = "StructuredOutputAgent"; const DESCRIPTION: &str = @@ -31,67 +34,79 @@ Schema: Only output valid JSON that conforms to the schema, no explanation. If a field cannot be determined from the text, use null."; +/// Generic structured output schema for validation. +/// +/// This is a flexible schema that accepts any valid JSON structure. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct StructuredOutput { + /// The extracted data as a JSON value. + #[serde(flatten)] + pub data: Value, +} + /// Agent for structured output tasks. /// /// Handles tasks that convert text to structured JSON: /// - Free-form JSON conversion /// - Schema-based structured extraction +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For drafting complex extractions iteratively +/// - `JsonSchemaTool` - For validating output against schemas pub struct StructuredOutputAgent { agent: Agent, + model_name: String, } impl StructuredOutputAgent { /// Creates a new structured output agent with the given completion provider. - pub fn new(provider: CompletionProvider) -> Self { - let agent = AgentBuilder::new(provider) + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad, schema validation) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) .name(NAME) .description(DESCRIPTION) - .preamble(PREAMBLE) - .build(); - Self { agent } + .preamble(PREAMBLE); + + let agent = if with_tools { + builder + .tool(ScratchpadTool::new()) + .tool(JsonSchemaTool::::new()) + .build() + } else { + builder.build() + }; + + Self { agent, model_name } } /// Converts text to JSON format. /// /// Attempts to extract structured information from free-form text /// and represent it as JSON. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] pub async fn to_json(&self, text: &str) -> Result { let prompt = format!("{}\n\nText:\n{}", PROMPT_TO_JSON, text); let response = self.agent.prompt(&prompt).await?; - parse_json(&response) + let value: Value = JsonResponse::parse(&response)?; + tracing::debug!("to_json completed"); + Ok(value) } /// Converts text to JSON matching a specific schema. /// /// Extracts information from text and structures it according to /// the provided JSON schema. + #[tracing::instrument(skip(self, text, schema), fields(agent = NAME, model = %self.model_name, text_len = text.len(), schema_len = schema.len()))] pub async fn to_structured_json(&self, text: &str, schema: &str) -> Result { let base_prompt = PROMPT_TO_STRUCTURED_JSON.replace("{}", schema); let prompt = format!("{}\n\nText:\n{}", base_prompt, text); let response = self.agent.prompt(&prompt).await?; - parse_json(&response) + let value: Value = JsonResponse::parse(&response)?; + tracing::debug!("to_structured_json completed"); + Ok(value) } } - -/// Parses JSON from LLM response, handling markdown code blocks. -fn parse_json(response: &str) -> Result { - // Try to extract JSON from markdown code block if present - let json_str = if response.contains("```json") { - response - .split("```json") - .nth(1) - .and_then(|s| s.split("```").next()) - .map(|s| s.trim()) - .unwrap_or(response.trim()) - } else if response.contains("```") { - response - .split("```") - .nth(1) - .map(|s| s.trim()) - .unwrap_or(response.trim()) - } else { - response.trim() - }; - - serde_json::from_str(json_str).map_err(|e| Error::parse(format!("invalid JSON: {e}"))) -} diff --git a/crates/nvisy-rig/src/agent/table.rs b/crates/nvisy-rig/src/agent/table.rs index 68e67be..cfce88c 100644 --- a/crates/nvisy-rig/src/agent/table.rs +++ b/crates/nvisy-rig/src/agent/table.rs @@ -2,7 +2,10 @@ use rig::agent::{Agent, AgentBuilder}; use rig::completion::Prompt; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use super::tool::{JsonResponse, JsonSchemaTool, ScratchpadTool}; use crate::Result; use crate::provider::CompletionProvider; @@ -49,60 +52,110 @@ Convert this table to a JSON array of objects. Each row should be an object with column names as keys. Only output valid JSON, no explanation."; +/// Column description for table schema validation. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct ColumnDescription { + /// Column name. + pub name: String, + /// Data type (text, number, date, etc.). + #[serde(rename = "type")] + pub data_type: String, + /// Brief description of what the column contains. + pub description: String, +} + /// Agent for table processing tasks. /// /// Handles tasks that involve understanding and transforming tables: /// - Table description /// - Column descriptions /// - Format conversion (HTML, Markdown, CSV, JSON) +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For working on format conversions iteratively +/// - `JsonSchemaTool` - For validating JSON output pub struct TableAgent { agent: Agent, + model_name: String, } impl TableAgent { /// Creates a new table agent with the given completion provider. - pub fn new(provider: CompletionProvider) -> Self { - let agent = AgentBuilder::new(provider) + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad, schema validation) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) .name(NAME) .description(DESCRIPTION) - .preamble(PREAMBLE) - .build(); - Self { agent } + .preamble(PREAMBLE); + + let agent = if with_tools { + builder + .tool(ScratchpadTool::new()) + .tool(JsonSchemaTool::>::new()) + .build() + } else { + builder.build() + }; + + Self { agent, model_name } } /// Generates a description of a table. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] pub async fn describe(&self, table_content: &str) -> Result { let prompt = format!("{}\n\nTable:\n{}", PROMPT_DESCRIBE, table_content); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "describe completed"); + Ok(response) } /// Generates descriptions for each column in a table. - pub async fn describe_columns(&self, table_content: &str) -> Result { + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] + pub async fn describe_columns(&self, table_content: &str) -> Result> { let prompt = format!("{}\n\nTable:\n{}", PROMPT_DESCRIBE_COLUMNS, table_content); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + let columns: Vec = JsonResponse::parse(&response)?; + tracing::debug!(column_count = columns.len(), "describe_columns completed"); + Ok(columns) } /// Converts a table to HTML format. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] pub async fn to_html(&self, table_content: &str) -> Result { let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_HTML, table_content); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "to_html completed"); + Ok(response) } /// Converts a table to Markdown format. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] pub async fn to_markdown(&self, table_content: &str) -> Result { let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_MARKDOWN, table_content); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "to_markdown completed"); + Ok(response) } /// Converts a table to CSV format. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] pub async fn to_csv(&self, table_content: &str) -> Result { let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_CSV, table_content); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "to_csv completed"); + Ok(response) } /// Converts a table to JSON format. + #[tracing::instrument(skip(self, table_content), fields(agent = NAME, model = %self.model_name, content_len = table_content.len()))] pub async fn to_json(&self, table_content: &str) -> Result { let prompt = format!("{}\n\nTable:\n{}", PROMPT_TO_JSON, table_content); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "to_json completed"); + Ok(response) } } diff --git a/crates/nvisy-rig/src/agent/text_analysis.rs b/crates/nvisy-rig/src/agent/text_analysis.rs index f798968..fe5a954 100644 --- a/crates/nvisy-rig/src/agent/text_analysis.rs +++ b/crates/nvisy-rig/src/agent/text_analysis.rs @@ -4,13 +4,15 @@ use std::collections::HashMap; use rig::agent::{Agent, AgentBuilder}; use rig::completion::Prompt; +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use super::tool::{JsonResponse, JsonSchemaTool, ScratchpadTool}; +use crate::Result; use crate::provider::CompletionProvider; -use crate::{Error, Result}; /// A named entity extracted from text. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct Entity { /// The text of the entity. pub text: String, @@ -23,7 +25,7 @@ pub struct Entity { } /// Classification result with labels and confidence scores. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct Classification { /// The matched category labels. pub labels: Vec, @@ -32,7 +34,7 @@ pub struct Classification { } /// Sentiment analysis result. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct Sentiment { /// The overall sentiment: "positive", "negative", "neutral", or "mixed". pub sentiment: String, @@ -44,7 +46,7 @@ pub struct Sentiment { } /// A relationship between two entities. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct Relationship { /// The first entity in the relationship. pub subject: String, @@ -54,6 +56,26 @@ pub struct Relationship { pub object: String, } +/// Combined schema for text analysis outputs. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct TextAnalysisOutput { + /// Extracted entities. + #[serde(default)] + pub entities: Option>, + /// Extracted keywords. + #[serde(default)] + pub keywords: Option>, + /// Classification result. + #[serde(default)] + pub classification: Option, + /// Sentiment analysis result. + #[serde(default)] + pub sentiment: Option, + /// Extracted relationships. + #[serde(default)] + pub relationships: Option>, +} + const NAME: &str = "TextAnalysisAgent"; const DESCRIPTION: &str = "Agent for text analysis including entity extraction, keyword extraction, classification, and sentiment analysis"; @@ -106,78 +128,92 @@ Format as a JSON array with objects containing: /// - Classification /// - Sentiment analysis /// - Relationship extraction +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For drafting and refining extractions +/// - `JsonSchemaTool` - For validating output against schemas pub struct TextAnalysisAgent { agent: Agent, + model_name: String, } impl TextAnalysisAgent { /// Creates a new text analysis agent with the given completion provider. - pub fn new(provider: CompletionProvider) -> Self { - let agent = AgentBuilder::new(provider) + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad, schema validation) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) .name(NAME) .description(DESCRIPTION) - .preamble(PREAMBLE) - .build(); - Self { agent } + .preamble(PREAMBLE); + + let agent = if with_tools { + builder + .tool(ScratchpadTool::new()) + .tool(JsonSchemaTool::::new()) + .build() + } else { + builder.build() + }; + + Self { agent, model_name } } /// Extracts named entities from text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] pub async fn extract_entities(&self, text: &str) -> Result> { let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_ENTITIES, text); let response = self.agent.prompt(&prompt).await?; - parse_json(&response) + let entities: Vec = JsonResponse::parse(&response)?; + tracing::debug!(entity_count = entities.len(), "extract_entities completed"); + Ok(entities) } /// Extracts keywords from text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] pub async fn extract_keywords(&self, text: &str) -> Result> { let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_KEYWORDS, text); let response = self.agent.prompt(&prompt).await?; - parse_json(&response) + let keywords: Vec = JsonResponse::parse(&response)?; + tracing::debug!(keyword_count = keywords.len(), "extract_keywords completed"); + Ok(keywords) } /// Classifies text into provided categories. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len(), label_count = labels.len()))] pub async fn classify(&self, text: &str, labels: &[String]) -> Result { let labels_str = labels.join(", "); let base_prompt = PROMPT_CLASSIFY.replace("{}", &labels_str); let prompt = format!("{}\n\nText:\n{}", base_prompt, text); let response = self.agent.prompt(&prompt).await?; - parse_json(&response) + let classification: Classification = JsonResponse::parse(&response)?; + tracing::debug!(matched_labels = ?classification.labels, "classify completed"); + Ok(classification) } /// Analyzes sentiment of text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] pub async fn analyze_sentiment(&self, text: &str) -> Result { let prompt = format!("{}\n\nText:\n{}", PROMPT_ANALYZE_SENTIMENT, text); let response = self.agent.prompt(&prompt).await?; - parse_json(&response) + let sentiment: Sentiment = JsonResponse::parse(&response)?; + tracing::debug!(sentiment = %sentiment.sentiment, confidence = %sentiment.confidence, "analyze_sentiment completed"); + Ok(sentiment) } /// Extracts relationships between entities in text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] pub async fn extract_relationships(&self, text: &str) -> Result> { let prompt = format!("{}\n\nText:\n{}", PROMPT_EXTRACT_RELATIONSHIPS, text); let response = self.agent.prompt(&prompt).await?; - parse_json(&response) + let relationships: Vec = JsonResponse::parse(&response)?; + tracing::debug!( + relationship_count = relationships.len(), + "extract_relationships completed" + ); + Ok(relationships) } } - -/// Parses JSON from LLM response, handling markdown code blocks. -fn parse_json(response: &str) -> Result { - // Try to extract JSON from markdown code block if present - let json_str = if response.contains("```json") { - response - .split("```json") - .nth(1) - .and_then(|s| s.split("```").next()) - .map(|s| s.trim()) - .unwrap_or(response.trim()) - } else if response.contains("```") { - response - .split("```") - .nth(1) - .map(|s| s.trim()) - .unwrap_or(response.trim()) - } else { - response.trim() - }; - - serde_json::from_str(json_str).map_err(|e| Error::parse(format!("invalid JSON: {e}"))) -} diff --git a/crates/nvisy-rig/src/agent/text_generation.rs b/crates/nvisy-rig/src/agent/text_generation.rs index 675e8c7..3faddd2 100644 --- a/crates/nvisy-rig/src/agent/text_generation.rs +++ b/crates/nvisy-rig/src/agent/text_generation.rs @@ -3,6 +3,7 @@ use rig::agent::{Agent, AgentBuilder}; use rig::completion::Prompt; +use super::tool::ScratchpadTool; use crate::Result; use crate::provider::CompletionProvider; @@ -39,37 +40,59 @@ Only output the context statement, no explanation."; /// - Summarization /// - Title generation /// - Contextual chunking (adding context to chunks) +/// +/// When `with_tools` is enabled, the agent has access to: +/// - `ScratchpadTool` - For drafting and refining content iteratively pub struct TextGenerationAgent { agent: Agent, + model_name: String, } impl TextGenerationAgent { /// Creates a new text generation agent with the given completion provider. - pub fn new(provider: CompletionProvider) -> Self { - let agent = AgentBuilder::new(provider) + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad for drafting) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) .name(NAME) .description(DESCRIPTION) - .preamble(PREAMBLE) - .build(); - Self { agent } + .preamble(PREAMBLE); + + let agent = if with_tools { + builder.tool(ScratchpadTool::new()).build() + } else { + builder.build() + }; + + Self { agent, model_name } } /// Generates a summary of the text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] pub async fn summarize(&self, text: &str) -> Result { let prompt = format!("{}\n\nText:\n{}", PROMPT_SUMMARIZE, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "summarize completed"); + Ok(response) } /// Generates a title for the text. + #[tracing::instrument(skip(self, text), fields(agent = NAME, model = %self.model_name, text_len = text.len()))] pub async fn generate_title(&self, text: &str) -> Result { let prompt = format!("{}\n\nText:\n{}", PROMPT_GENERATE_TITLE, text); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(title = %response, "generate_title completed"); + Ok(response) } /// Generates contextual information for a chunk. /// /// This is used for contextual chunking, where each chunk is enriched /// with context about how it fits into the larger document. + #[tracing::instrument(skip(self, chunk, document_summary), fields(agent = NAME, model = %self.model_name, chunk_len = chunk.len(), summary_len = document_summary.len()))] pub async fn generate_chunk_context( &self, chunk: &str, @@ -79,6 +102,11 @@ impl TextGenerationAgent { "{}\n\nDocument Summary:\n{}\n\nChunk:\n{}", PROMPT_GENERATE_CHUNK_CONTEXT, document_summary, chunk ); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!( + response_len = response.len(), + "generate_chunk_context completed" + ); + Ok(response) } } diff --git a/crates/nvisy-rig/src/agent/tools/document_fetch.rs b/crates/nvisy-rig/src/agent/tool/document_fetch.rs similarity index 90% rename from crates/nvisy-rig/src/agent/tools/document_fetch.rs rename to crates/nvisy-rig/src/agent/tool/document_fetch.rs index 7205761..2daa407 100644 --- a/crates/nvisy-rig/src/agent/tools/document_fetch.rs +++ b/crates/nvisy-rig/src/agent/tool/document_fetch.rs @@ -101,18 +101,21 @@ impl Tool for DocumentFetchTool { } } + #[tracing::instrument(skip(self), fields(tool = Self::NAME, id = ?args.id, ids_count = args.ids.as_ref().map(|v| v.len())))] async fn call(&self, args: Self::Args) -> Result { - match (args.id, args.ids) { + let results = match (args.id, args.ids) { (Some(id), _) => { let doc = self .fetcher .fetch(&id) .await? .ok_or(DocumentFetchError::NotFound(id))?; - Ok(vec![doc]) + vec![doc] } - (None, Some(ids)) => self.fetcher.fetch_many(&ids).await, - (None, None) => Ok(vec![]), - } + (None, Some(ids)) => self.fetcher.fetch_many(&ids).await?, + (None, None) => vec![], + }; + tracing::debug!(result_count = results.len(), "document_fetch completed"); + Ok(results) } } diff --git a/crates/nvisy-rig/src/agent/tool/json_schema.rs b/crates/nvisy-rig/src/agent/tool/json_schema.rs new file mode 100644 index 0000000..f9fcdf5 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tool/json_schema.rs @@ -0,0 +1,324 @@ +//! JSON schema validation and response parsing. +//! +//! This module provides: +//! - Schema generation from Rust types via `schemars` +//! - JSON validation against schemas via `jsonschema` +//! - LLM response parsing (handles markdown code blocks, etc.) + +use std::marker::PhantomData; + +use jsonschema::Validator; +use rig::completion::ToolDefinition; +use rig::tool::Tool; +use schemars::JsonSchema; +use schemars::generate::SchemaSettings; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::{Error, Result}; + +/// Error type for JSON schema operations. +#[derive(Debug, thiserror::Error)] +#[error("json schema error")] +pub struct JsonSchemaError; + +/// Arguments for JSON schema validation. +/// +/// Generic over `T` which defines the expected schema via `schemars::JsonSchema`. +#[derive(Debug, Deserialize)] +pub struct JsonSchemaArgs { + /// The JSON data to validate. + pub data: Value, + #[serde(skip)] + _marker: PhantomData, +} + +/// Result of JSON schema validation. +#[derive(Debug, Serialize)] +pub struct JsonSchemaResult { + /// Whether the data is valid. + pub valid: bool, + /// Validation errors if any. + #[serde(skip_serializing_if = "Vec::is_empty")] + pub errors: Vec, +} + +/// Tool for validating JSON against a schema derived from a Rust type. +/// +/// Uses `schemars` to generate the JSON schema from the type parameter `T`, +/// and `jsonschema` for validation. +pub struct JsonSchemaTool { + validator: Validator, + _marker: PhantomData, +} + +impl JsonSchemaTool { + /// Creates a new JSON schema tool for type `T`. + pub fn new() -> Self { + let mut generator = SchemaSettings::draft07().into_generator(); + let schema = generator.root_schema_for::(); + let schema_value = serde_json::to_value(&schema).expect("schema serialization cannot fail"); + let validator = Validator::new(&schema_value).expect("valid schema"); + + Self { + validator, + _marker: PhantomData, + } + } + + /// Validates JSON data against the schema. + fn validate_data(&self, data: &Value) -> Vec { + self.validator + .iter_errors(data) + .map(|e| e.to_string()) + .collect() + } +} + +impl Default for JsonSchemaTool { + fn default() -> Self { + Self::new() + } +} + +impl Tool for JsonSchemaTool { + type Args = JsonSchemaArgs; + type Error = JsonSchemaError; + type Output = JsonSchemaResult; + + const NAME: &'static str = "json_schema"; + + async fn definition(&self, _prompt: String) -> ToolDefinition { + ToolDefinition { + name: Self::NAME.to_string(), + description: "Validate JSON data against a JSON Schema. Use this to verify that structured data conforms to expected format.".to_string(), + parameters: serde_json::json!({ + "type": "object", + "properties": { + "data": { + "description": "The JSON data to validate" + } + }, + "required": ["data"] + }), + } + } + + #[tracing::instrument(skip(self, args), fields(tool = Self::NAME))] + async fn call(&self, args: Self::Args) -> std::result::Result { + let errors = self.validate_data(&args.data); + let valid = errors.is_empty(); + + tracing::debug!(valid, error_count = errors.len(), "json_schema completed"); + + Ok(JsonSchemaResult { valid, errors }) + } +} + +/// Parser for extracting and validating JSON from LLM responses. +/// +/// Handles common LLM output patterns: +/// - Plain JSON +/// - JSON wrapped in markdown code blocks (```json ... ```) +/// - JSON wrapped in generic code blocks (``` ... ```) +/// - JSON with surrounding explanatory text +/// +/// # Example +/// +/// ```ignore +/// use nvisy_rig::agent::tool::JsonResponse; +/// use serde::Deserialize; +/// +/// #[derive(Deserialize)] +/// struct UserInfo { +/// name: String, +/// age: u32, +/// } +/// +/// let response = r#"Here's the extracted data: +/// ```json +/// {"name": "Alice", "age": 30} +/// ```"#; +/// +/// let info: UserInfo = JsonResponse::parse(response)?; +/// ``` +pub struct JsonResponse; + +impl JsonResponse { + /// Extracts JSON content from a response, stripping markdown formatting. + pub fn extract(response: &str) -> &str { + // Try ```json block first + if let Some(start) = response.find("```json") { + let after_marker = &response[start + 7..]; + if let Some(end) = after_marker.find("```") { + return after_marker[..end].trim(); + } + } + + // Try generic ``` block + if let Some(start) = response.find("```") { + let after_marker = &response[start + 3..]; + // Skip language identifier if on same line + let content_start = after_marker.find('\n').map(|i| i + 1).unwrap_or(0); + let after_newline = &after_marker[content_start..]; + if let Some(end) = after_newline.find("```") { + return after_newline[..end].trim(); + } + } + + // Try to find JSON object or array boundaries + let trimmed = response.trim(); + if (trimmed.starts_with('{') && trimmed.ends_with('}')) + || (trimmed.starts_with('[') && trimmed.ends_with(']')) + { + return trimmed; + } + + // Find first { or [ and last } or ] + let start = trimmed.find(['{', '[']).unwrap_or(0); + let end = trimmed + .rfind(['}', ']']) + .map(|i| i + 1) + .unwrap_or(trimmed.len()); + + if start < end { + &trimmed[start..end] + } else { + trimmed + } + } + + /// Parses JSON from an LLM response into the specified type. + /// + /// Automatically strips markdown code blocks and surrounding text. + pub fn parse(response: &str) -> Result { + let json_str = Self::extract(response); + serde_json::from_str(json_str).map_err(|e| Error::parse(format!("invalid JSON: {e}"))) + } +} + +#[cfg(test)] +mod tests { + use schemars::JsonSchema; + use serde::Deserialize; + use serde_json::json; + + use super::*; + + #[derive(Debug, Deserialize, JsonSchema, PartialEq)] + struct TestPerson { + name: String, + age: u32, + } + + #[tokio::test] + async fn test_valid_object() { + let tool = JsonSchemaTool::::new(); + let result = tool + .call(JsonSchemaArgs { + data: json!({ + "name": "Alice", + "age": 30 + }), + _marker: PhantomData, + }) + .await + .unwrap(); + + assert!(result.valid); + assert!(result.errors.is_empty()); + } + + #[tokio::test] + async fn test_missing_required() { + let tool = JsonSchemaTool::::new(); + let result = tool + .call(JsonSchemaArgs { + data: json!({}), + _marker: PhantomData, + }) + .await + .unwrap(); + + assert!(!result.valid); + assert!(!result.errors.is_empty()); + } + + #[tokio::test] + async fn test_type_mismatch() { + let tool = JsonSchemaTool::::new(); + let result = tool + .call(JsonSchemaArgs { + data: json!({ + "name": 123, + "age": 30 + }), + _marker: PhantomData, + }) + .await + .unwrap(); + + assert!(!result.valid); + assert!(!result.errors.is_empty()); + } + + // JsonResponse tests + + #[derive(Debug, Deserialize, PartialEq)] + struct TestData { + key: String, + } + + #[test] + fn parse_plain_json() { + let response = r#"{"key": "value"}"#; + let result: TestData = JsonResponse::parse(response).unwrap(); + assert_eq!(result.key, "value"); + } + + #[test] + fn parse_json_with_markdown_block() { + let response = r#"Here's the JSON: +```json +{"key": "value"} +```"#; + let result: TestData = JsonResponse::parse(response).unwrap(); + assert_eq!(result.key, "value"); + } + + #[test] + fn parse_json_with_generic_code_block() { + let response = r#"``` +{"key": "value"} +```"#; + let result: TestData = JsonResponse::parse(response).unwrap(); + assert_eq!(result.key, "value"); + } + + #[test] + fn parse_json_with_surrounding_text() { + let response = r#"The result is: {"key": "value"} as requested."#; + let result: TestData = JsonResponse::parse(response).unwrap(); + assert_eq!(result.key, "value"); + } + + #[test] + fn parse_array() { + let response = r#"[{"key": "a"}, {"key": "b"}]"#; + let result: Vec = JsonResponse::parse(response).unwrap(); + assert_eq!(result.len(), 2); + assert_eq!(result[0].key, "a"); + assert_eq!(result[1].key, "b"); + } + + #[test] + fn extract_returns_json_content() { + let extracted = JsonResponse::extract( + r#"```json +{"key": "value"} +```"#, + ); + assert_eq!(extracted, r#"{"key": "value"}"#); + } +} diff --git a/crates/nvisy-rig/src/agent/tools/metadata_query.rs b/crates/nvisy-rig/src/agent/tool/metadata_query.rs similarity index 94% rename from crates/nvisy-rig/src/agent/tools/metadata_query.rs rename to crates/nvisy-rig/src/agent/tool/metadata_query.rs index 395e435..c0919f8 100644 --- a/crates/nvisy-rig/src/agent/tools/metadata_query.rs +++ b/crates/nvisy-rig/src/agent/tool/metadata_query.rs @@ -170,9 +170,13 @@ impl Tool for MetadataQueryTool { } } + #[tracing::instrument(skip(self), fields(tool = Self::NAME, filter_count = args.filters.len(), limit = args.limit, offset = args.offset))] async fn call(&self, args: Self::Args) -> Result { - self.querier + let results = self + .querier .query(&args.filters, args.limit, args.offset) - .await + .await?; + tracing::debug!(result_count = results.len(), "metadata_query completed"); + Ok(results) } } diff --git a/crates/nvisy-rig/src/agent/tool/mod.rs b/crates/nvisy-rig/src/agent/tool/mod.rs new file mode 100644 index 0000000..37f2d07 --- /dev/null +++ b/crates/nvisy-rig/src/agent/tool/mod.rs @@ -0,0 +1,13 @@ +//! Tools module for agent function calling capabilities. +//! +//! This module provides tools used internally by agents: +//! +//! - [`ScratchpadTool`] - Temporary working storage for drafting +//! - [`JsonSchemaTool`] - Validate JSON against schema (generic over `T: JsonSchema`) +//! - [`JsonResponse`] - Parse JSON from LLM responses (handles markdown blocks, etc.) + +mod json_schema; +mod scratchpad; + +pub use json_schema::{JsonResponse, JsonSchemaTool}; +pub use scratchpad::ScratchpadTool; diff --git a/crates/nvisy-rig/src/agent/tools/scratchpad.rs b/crates/nvisy-rig/src/agent/tool/scratchpad.rs similarity index 72% rename from crates/nvisy-rig/src/agent/tools/scratchpad.rs rename to crates/nvisy-rig/src/agent/tool/scratchpad.rs index 8d9ca86..d7bfe8f 100644 --- a/crates/nvisy-rig/src/agent/tools/scratchpad.rs +++ b/crates/nvisy-rig/src/agent/tool/scratchpad.rs @@ -3,103 +3,15 @@ use std::collections::HashMap; use std::sync::Arc; -use async_trait::async_trait; use rig::completion::ToolDefinition; use rig::tool::Tool; use serde::{Deserialize, Serialize}; use tokio::sync::RwLock; -/// Trait for scratchpad implementations. -#[async_trait] -pub trait Scratchpad: Send + Sync { - /// Write to the scratchpad. - async fn write(&self, content: &str) -> Result<(), ScratchpadError>; - - /// Append to the scratchpad. - async fn append(&self, content: &str) -> Result<(), ScratchpadError>; - - /// Read the scratchpad content. - async fn read(&self) -> Result; - - /// Clear the scratchpad. - async fn clear(&self) -> Result<(), ScratchpadError>; - - /// Get a named section from the scratchpad. - async fn get_section(&self, name: &str) -> Result, ScratchpadError>; - - /// Set a named section in the scratchpad. - async fn set_section(&self, name: &str, content: &str) -> Result<(), ScratchpadError>; -} - -/// In-memory scratchpad implementation. -pub struct InMemoryScratchpad { - content: RwLock, - sections: RwLock>, -} - -impl InMemoryScratchpad { - /// Creates a new empty scratchpad. - pub fn new() -> Self { - Self { - content: RwLock::new(String::new()), - sections: RwLock::new(HashMap::new()), - } - } -} - -impl Default for InMemoryScratchpad { - fn default() -> Self { - Self::new() - } -} - -#[async_trait] -impl Scratchpad for InMemoryScratchpad { - async fn write(&self, content: &str) -> Result<(), ScratchpadError> { - let mut guard = self.content.write().await; - *guard = content.to_string(); - Ok(()) - } - - async fn append(&self, content: &str) -> Result<(), ScratchpadError> { - let mut guard = self.content.write().await; - guard.push_str(content); - Ok(()) - } - - async fn read(&self) -> Result { - let guard = self.content.read().await; - Ok(guard.clone()) - } - - async fn clear(&self) -> Result<(), ScratchpadError> { - let mut guard = self.content.write().await; - guard.clear(); - let mut sections = self.sections.write().await; - sections.clear(); - Ok(()) - } - - async fn get_section(&self, name: &str) -> Result, ScratchpadError> { - let guard = self.sections.read().await; - Ok(guard.get(name).cloned()) - } - - async fn set_section(&self, name: &str, content: &str) -> Result<(), ScratchpadError> { - let mut guard = self.sections.write().await; - guard.insert(name.to_string(), content.to_string()); - Ok(()) - } -} - /// Error type for scratchpad operations. #[derive(Debug, thiserror::Error)] -pub enum ScratchpadError { - #[error("write failed: {0}")] - Write(String), - #[error("read failed: {0}")] - Read(String), -} +#[error("scratchpad error")] +pub struct ScratchpadError; /// The operation to perform on the scratchpad. #[derive(Debug, Deserialize)] @@ -139,33 +51,77 @@ pub struct ScratchpadResult { pub message: Option, } -/// Tool for temporary working storage. -pub struct ScratchpadTool { - scratchpad: Arc, +/// In-memory scratchpad storage. +struct InMemoryScratchpad { + content: RwLock, + sections: RwLock>, } -impl ScratchpadTool { - /// Creates a new scratchpad tool. - pub fn new(scratchpad: S) -> Self { +impl InMemoryScratchpad { + fn new() -> Self { Self { - scratchpad: Arc::new(scratchpad), + content: RwLock::new(String::new()), + sections: RwLock::new(HashMap::new()), } } - /// Creates a new scratchpad tool from an Arc. - pub fn from_arc(scratchpad: Arc) -> Self { - Self { scratchpad } + async fn write(&self, content: &str) { + let mut guard = self.content.write().await; + *guard = content.to_string(); + } + + async fn append(&self, content: &str) { + let mut guard = self.content.write().await; + guard.push_str(content); } + + async fn read(&self) -> String { + let guard = self.content.read().await; + guard.clone() + } + + async fn clear(&self) { + let mut guard = self.content.write().await; + guard.clear(); + let mut sections = self.sections.write().await; + sections.clear(); + } + + async fn get_section(&self, name: &str) -> Option { + let guard = self.sections.read().await; + guard.get(name).cloned() + } + + async fn set_section(&self, name: &str, content: &str) { + let mut guard = self.sections.write().await; + guard.insert(name.to_string(), content.to_string()); + } +} + +/// Tool for temporary working storage. +/// +/// Provides a scratchpad for agents to draft, edit, and organize content +/// during multi-step reasoning tasks. +pub struct ScratchpadTool { + scratchpad: Arc, } -impl ScratchpadTool { +impl ScratchpadTool { /// Creates a new scratchpad tool with in-memory storage. - pub fn in_memory() -> Self { - Self::new(InMemoryScratchpad::new()) + pub fn new() -> Self { + Self { + scratchpad: Arc::new(InMemoryScratchpad::new()), + } } } -impl Tool for ScratchpadTool { +impl Default for ScratchpadTool { + fn default() -> Self { + Self::new() + } +} + +impl Tool for ScratchpadTool { type Args = ScratchpadArgs; type Error = ScratchpadError; type Output = ScratchpadResult; @@ -252,56 +208,64 @@ impl Tool for ScratchpadTool { } } + #[tracing::instrument(skip(self, args), fields(tool = Self::NAME, operation = ?std::mem::discriminant(&args.operation)))] async fn call(&self, args: Self::Args) -> Result { - match args.operation { + let result = match args.operation { ScratchpadOperation::Write { content } => { - self.scratchpad.write(&content).await?; - Ok(ScratchpadResult { + tracing::debug!(content_len = content.len(), "scratchpad write"); + self.scratchpad.write(&content).await; + ScratchpadResult { success: true, content: None, message: Some("Content written to scratchpad".to_string()), - }) + } } ScratchpadOperation::Append { content } => { - self.scratchpad.append(&content).await?; - Ok(ScratchpadResult { + tracing::debug!(content_len = content.len(), "scratchpad append"); + self.scratchpad.append(&content).await; + ScratchpadResult { success: true, content: None, message: Some("Content appended to scratchpad".to_string()), - }) + } } ScratchpadOperation::Read => { - let content = self.scratchpad.read().await?; - Ok(ScratchpadResult { + let content = self.scratchpad.read().await; + tracing::debug!(content_len = content.len(), "scratchpad read"); + ScratchpadResult { success: true, content: Some(content), message: None, - }) + } } ScratchpadOperation::Clear => { - self.scratchpad.clear().await?; - Ok(ScratchpadResult { + tracing::debug!("scratchpad clear"); + self.scratchpad.clear().await; + ScratchpadResult { success: true, content: None, message: Some("Scratchpad cleared".to_string()), - }) + } } ScratchpadOperation::GetSection { name } => { - let content = self.scratchpad.get_section(&name).await?; - Ok(ScratchpadResult { + let content = self.scratchpad.get_section(&name).await; + tracing::debug!(section = %name, found = content.is_some(), "scratchpad get_section"); + ScratchpadResult { success: content.is_some(), content, message: None, - }) + } } ScratchpadOperation::SetSection { name, content } => { - self.scratchpad.set_section(&name, &content).await?; - Ok(ScratchpadResult { + tracing::debug!(section = %name, content_len = content.len(), "scratchpad set_section"); + self.scratchpad.set_section(&name, &content).await; + ScratchpadResult { success: true, content: None, message: Some(format!("Section '{name}' updated")), - }) + } } - } + }; + Ok(result) } } diff --git a/crates/nvisy-rig/src/agent/tools/vector_search.rs b/crates/nvisy-rig/src/agent/tool/vector_search.rs similarity index 91% rename from crates/nvisy-rig/src/agent/tools/vector_search.rs rename to crates/nvisy-rig/src/agent/tool/vector_search.rs index f2f8c74..d92fea6 100644 --- a/crates/nvisy-rig/src/agent/tools/vector_search.rs +++ b/crates/nvisy-rig/src/agent/tool/vector_search.rs @@ -115,9 +115,13 @@ impl Tool for VectorSearchTool { } } + #[tracing::instrument(skip(self), fields(tool = Self::NAME, query_len = args.query.len(), limit = args.limit, threshold = ?args.threshold))] async fn call(&self, args: Self::Args) -> Result { - self.searcher + let results = self + .searcher .search(&args.query, args.limit, args.threshold) - .await + .await?; + tracing::debug!(result_count = results.len(), "vector_search completed"); + Ok(results) } } diff --git a/crates/nvisy-rig/src/agent/tools/context_store.rs b/crates/nvisy-rig/src/agent/tools/context_store.rs deleted file mode 100644 index 05e4376..0000000 --- a/crates/nvisy-rig/src/agent/tools/context_store.rs +++ /dev/null @@ -1,214 +0,0 @@ -//! Context store tool for persistent agent memory. - -use std::sync::Arc; - -use async_trait::async_trait; -use rig::completion::ToolDefinition; -use rig::tool::Tool; -use serde::{Deserialize, Serialize}; - -/// Trait for context store implementations. -#[async_trait] -pub trait ContextStore: Send + Sync { - /// Store a value with a key. - async fn set(&self, key: &str, value: serde_json::Value) -> Result<(), ContextStoreError>; - - /// Retrieve a value by key. - async fn get(&self, key: &str) -> Result, ContextStoreError>; - - /// Delete a value by key. - async fn delete(&self, key: &str) -> Result; - - /// List all keys with optional prefix filter. - async fn list(&self, prefix: Option<&str>) -> Result, ContextStoreError>; -} - -/// Error type for context store operations. -#[derive(Debug, thiserror::Error)] -pub enum ContextStoreError { - #[error("store failed: {0}")] - Store(String), - #[error("retrieve failed: {0}")] - Retrieve(String), - #[error("serialization error: {0}")] - Serialization(String), -} - -/// The operation to perform on the context store. -#[derive(Debug, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum ContextOperation { - /// Store a value. - Set { - key: String, - value: serde_json::Value, - }, - /// Retrieve a value. - Get { key: String }, - /// Delete a value. - Delete { key: String }, - /// List all keys. - List { prefix: Option }, -} - -/// Arguments for context store operations. -#[derive(Debug, Deserialize)] -pub struct ContextStoreArgs { - /// The operation to perform. - pub operation: ContextOperation, -} - -/// Result of a context store operation. -#[derive(Debug, Serialize)] -pub struct ContextStoreResult { - /// Whether the operation succeeded. - pub success: bool, - /// The result value (for get operations). - #[serde(skip_serializing_if = "Option::is_none")] - pub value: Option, - /// List of keys (for list operations). - #[serde(skip_serializing_if = "Option::is_none")] - pub keys: Option>, - /// Optional message. - #[serde(skip_serializing_if = "Option::is_none")] - pub message: Option, -} - -/// Tool for storing and retrieving context. -pub struct ContextStoreTool { - store: Arc, -} - -impl ContextStoreTool { - /// Creates a new context store tool. - pub fn new(store: S) -> Self { - Self { - store: Arc::new(store), - } - } - - /// Creates a new context store tool from an Arc. - pub fn from_arc(store: Arc) -> Self { - Self { store } - } -} - -impl Tool for ContextStoreTool { - type Args = ContextStoreArgs; - type Error = ContextStoreError; - type Output = ContextStoreResult; - - const NAME: &'static str = "context_store"; - - async fn definition(&self, _prompt: String) -> ToolDefinition { - ToolDefinition { - name: Self::NAME.to_string(), - description: "Store and retrieve persistent context values. Use this to remember information across conversation turns or save intermediate results.".to_string(), - parameters: serde_json::json!({ - "type": "object", - "properties": { - "operation": { - "type": "object", - "oneOf": [ - { - "type": "object", - "properties": { - "set": { - "type": "object", - "properties": { - "key": { "type": "string" }, - "value": {} - }, - "required": ["key", "value"] - } - } - }, - { - "type": "object", - "properties": { - "get": { - "type": "object", - "properties": { - "key": { "type": "string" } - }, - "required": ["key"] - } - } - }, - { - "type": "object", - "properties": { - "delete": { - "type": "object", - "properties": { - "key": { "type": "string" } - }, - "required": ["key"] - } - } - }, - { - "type": "object", - "properties": { - "list": { - "type": "object", - "properties": { - "prefix": { "type": "string" } - } - } - } - } - ], - "description": "The operation to perform: set, get, delete, or list" - } - }, - "required": ["operation"] - }), - } - } - - async fn call(&self, args: Self::Args) -> Result { - match args.operation { - ContextOperation::Set { key, value } => { - self.store.set(&key, value).await?; - Ok(ContextStoreResult { - success: true, - value: None, - keys: None, - message: Some(format!("Stored value for key: {key}")), - }) - } - ContextOperation::Get { key } => { - let value = self.store.get(&key).await?; - Ok(ContextStoreResult { - success: value.is_some(), - value, - keys: None, - message: None, - }) - } - ContextOperation::Delete { key } => { - let deleted = self.store.delete(&key).await?; - Ok(ContextStoreResult { - success: deleted, - value: None, - keys: None, - message: if deleted { - Some(format!("Deleted key: {key}")) - } else { - Some(format!("Key not found: {key}")) - }, - }) - } - ContextOperation::List { prefix } => { - let keys = self.store.list(prefix.as_deref()).await?; - Ok(ContextStoreResult { - success: true, - value: None, - keys: Some(keys), - message: None, - }) - } - } - } -} diff --git a/crates/nvisy-rig/src/agent/tools/image_analysis.rs b/crates/nvisy-rig/src/agent/tools/image_analysis.rs deleted file mode 100644 index 702f2fd..0000000 --- a/crates/nvisy-rig/src/agent/tools/image_analysis.rs +++ /dev/null @@ -1,176 +0,0 @@ -//! Image analysis tool using VLM. - -use std::sync::Arc; - -use rig::completion::ToolDefinition; -use rig::tool::Tool; -use serde::{Deserialize, Serialize}; - -use crate::agent::VisionAgent; - -/// Error type for image analysis operations. -#[derive(Debug, thiserror::Error)] -pub enum ImageAnalysisError { - #[error("analysis failed: {0}")] - Analysis(String), - #[error("invalid image: {0}")] - InvalidImage(String), - #[error("unsupported format: {0}")] - UnsupportedFormat(String), -} - -impl From for ImageAnalysisError { - fn from(e: crate::Error) -> Self { - Self::Analysis(e.to_string()) - } -} - -/// The type of analysis to perform. -#[derive(Debug, Clone, Deserialize, Serialize)] -#[serde(rename_all = "snake_case")] -pub enum AnalysisType { - /// Brief description (1-2 sentences). - Describe, - /// Detailed description. - DescribeDetailed, - /// Extract text (OCR). - ExtractText, - /// Detect objects. - DetectObjects, - /// Custom prompt. - Custom { prompt: String }, -} - -/// Arguments for image analysis. -#[derive(Debug, Deserialize)] -pub struct ImageAnalysisArgs { - /// The image data as base64 or URL. - pub image: String, - /// The type of analysis to perform. - #[serde(default = "default_analysis_type")] - pub analysis_type: AnalysisType, -} - -fn default_analysis_type() -> AnalysisType { - AnalysisType::Describe -} - -/// Result of image analysis. -#[derive(Debug, Serialize)] -pub struct ImageAnalysisResult { - /// The analysis result. - pub result: String, - /// The type of analysis performed. - pub analysis_type: AnalysisType, -} - -/// Tool for analyzing images using VLM. -pub struct ImageAnalysisTool { - agent: Arc, -} - -impl ImageAnalysisTool { - /// Creates a new image analysis tool. - pub fn new(agent: VisionAgent) -> Self { - Self { - agent: Arc::new(agent), - } - } - - /// Creates a new image analysis tool from an Arc. - pub fn from_arc(agent: Arc) -> Self { - Self { agent } - } -} - -impl Tool for ImageAnalysisTool { - type Args = ImageAnalysisArgs; - type Error = ImageAnalysisError; - type Output = ImageAnalysisResult; - - const NAME: &'static str = "image_analysis"; - - async fn definition(&self, _prompt: String) -> ToolDefinition { - ToolDefinition { - name: Self::NAME.to_string(), - description: "Analyze an image using vision-language model. Can describe images, extract text (OCR), detect objects, or answer custom questions about the image.".to_string(), - parameters: serde_json::json!({ - "type": "object", - "properties": { - "image": { - "type": "string", - "description": "The image as base64-encoded data or a URL" - }, - "analysis_type": { - "type": "object", - "oneOf": [ - { - "type": "object", - "properties": { - "describe": { "type": "object" } - }, - "description": "Brief description (1-2 sentences)" - }, - { - "type": "object", - "properties": { - "describe_detailed": { "type": "object" } - }, - "description": "Detailed description" - }, - { - "type": "object", - "properties": { - "extract_text": { "type": "object" } - }, - "description": "Extract text from the image (OCR)" - }, - { - "type": "object", - "properties": { - "detect_objects": { "type": "object" } - }, - "description": "Detect and list objects in the image" - }, - { - "type": "object", - "properties": { - "custom": { - "type": "object", - "properties": { - "prompt": { "type": "string" } - }, - "required": ["prompt"] - } - }, - "description": "Custom analysis with your own prompt" - } - ], - "description": "The type of analysis to perform (default: describe)" - } - }, - "required": ["image"] - }), - } - } - - async fn call(&self, args: Self::Args) -> Result { - let result = match &args.analysis_type { - AnalysisType::Describe => self.agent.describe(&args.image).await?, - AnalysisType::DescribeDetailed => self.agent.describe_detailed(&args.image).await?, - AnalysisType::ExtractText => self.agent.extract_text(&args.image).await?, - AnalysisType::DetectObjects => self.agent.detect_objects(&args.image).await?, - AnalysisType::Custom { prompt } => { - // For custom prompts, we use describe with a modified prompt - // In a real implementation, VisionAgent would have a custom method - let custom_prompt = format!("{}\n\n[Image: {}]", prompt, args.image); - self.agent.describe(&custom_prompt).await? - } - }; - - Ok(ImageAnalysisResult { - result, - analysis_type: args.analysis_type, - }) - } -} diff --git a/crates/nvisy-rig/src/agent/tools/json_schema.rs b/crates/nvisy-rig/src/agent/tools/json_schema.rs deleted file mode 100644 index 51caaeb..0000000 --- a/crates/nvisy-rig/src/agent/tools/json_schema.rs +++ /dev/null @@ -1,334 +0,0 @@ -//! JSON schema validation tool. - -use rig::completion::ToolDefinition; -use rig::tool::Tool; -use serde::{Deserialize, Serialize}; -use serde_json::Value; - -/// Error type for JSON schema operations. -#[derive(Debug, thiserror::Error)] -pub enum JsonSchemaError { - #[error("invalid schema: {0}")] - InvalidSchema(String), - #[error("invalid JSON: {0}")] - InvalidJson(String), - #[error("validation failed: {errors:?}")] - ValidationFailed { errors: Vec }, -} - -/// Arguments for JSON schema validation. -#[derive(Debug, Deserialize)] -pub struct JsonSchemaArgs { - /// The JSON schema to validate against. - pub schema: Value, - /// The JSON data to validate. - pub data: Value, -} - -/// Result of JSON schema validation. -#[derive(Debug, Serialize)] -pub struct JsonSchemaResult { - /// Whether the data is valid. - pub valid: bool, - /// Validation errors if any. - #[serde(skip_serializing_if = "Vec::is_empty")] - pub errors: Vec, -} - -/// Tool for validating JSON against a schema. -pub struct JsonSchemaTool; - -impl JsonSchemaTool { - /// Creates a new JSON schema tool. - pub fn new() -> Self { - Self - } - - /// Validates JSON data against a schema. - /// - /// This is a simplified validator that checks: - /// - Type matching - /// - Required properties - /// - Basic constraints - fn validate(schema: &Value, data: &Value, path: &str) -> Vec { - let mut errors = Vec::new(); - - // Get the expected type - let expected_type = schema.get("type").and_then(|t| t.as_str()); - - match expected_type { - Some("object") => { - if !data.is_object() { - errors.push(format!("{path}: expected object, got {}", type_name(data))); - return errors; - } - - let obj = data.as_object().unwrap(); - - // Check required properties - if let Some(required) = schema.get("required").and_then(|r| r.as_array()) { - for req in required { - if let Some(field) = req.as_str() - && !obj.contains_key(field) - { - errors.push(format!("{path}: missing required property '{field}'")); - } - } - } - - // Validate properties - if let Some(properties) = schema.get("properties").and_then(|p| p.as_object()) { - for (key, prop_schema) in properties { - if let Some(value) = obj.get(key) { - let prop_path = if path.is_empty() { - key.clone() - } else { - format!("{path}.{key}") - }; - errors.extend(Self::validate(prop_schema, value, &prop_path)); - } - } - } - } - Some("array") => { - if !data.is_array() { - errors.push(format!("{path}: expected array, got {}", type_name(data))); - return errors; - } - - let arr = data.as_array().unwrap(); - - // Check min/max items - if let Some(min) = schema.get("minItems").and_then(|m| m.as_u64()) - && (arr.len() as u64) < min - { - errors.push(format!( - "{path}: array has {} items, minimum is {min}", - arr.len() - )); - } - if let Some(max) = schema.get("maxItems").and_then(|m| m.as_u64()) - && (arr.len() as u64) > max - { - errors.push(format!( - "{path}: array has {} items, maximum is {max}", - arr.len() - )); - } - - // Validate items - if let Some(items_schema) = schema.get("items") { - for (i, item) in arr.iter().enumerate() { - let item_path = format!("{path}[{i}]"); - errors.extend(Self::validate(items_schema, item, &item_path)); - } - } - } - Some("string") => { - if !data.is_string() { - errors.push(format!("{path}: expected string, got {}", type_name(data))); - return errors; - } - - let s = data.as_str().unwrap(); - - // Check min/max length - if let Some(min) = schema.get("minLength").and_then(|m| m.as_u64()) - && (s.len() as u64) < min - { - errors.push(format!( - "{path}: string length {} is less than minimum {min}", - s.len() - )); - } - if let Some(max) = schema.get("maxLength").and_then(|m| m.as_u64()) - && (s.len() as u64) > max - { - errors.push(format!( - "{path}: string length {} exceeds maximum {max}", - s.len() - )); - } - - // Check enum - if let Some(enum_values) = schema.get("enum").and_then(|e| e.as_array()) - && !enum_values.contains(data) - { - errors.push(format!("{path}: value not in enum")); - } - } - Some("number") | Some("integer") => { - let is_valid = if expected_type == Some("integer") { - data.is_i64() || data.is_u64() - } else { - data.is_number() - }; - - if !is_valid { - errors.push(format!( - "{path}: expected {}, got {}", - expected_type.unwrap(), - type_name(data) - )); - return errors; - } - - if let Some(num) = data.as_f64() { - if let Some(min) = schema.get("minimum").and_then(|m| m.as_f64()) - && num < min - { - errors.push(format!("{path}: {num} is less than minimum {min}")); - } - if let Some(max) = schema.get("maximum").and_then(|m| m.as_f64()) - && num > max - { - errors.push(format!("{path}: {num} exceeds maximum {max}")); - } - } - } - Some("boolean") => { - if !data.is_boolean() { - errors.push(format!("{path}: expected boolean, got {}", type_name(data))); - } - } - Some("null") => { - if !data.is_null() { - errors.push(format!("{path}: expected null, got {}", type_name(data))); - } - } - None => { - // No type specified, accept anything - } - Some(t) => { - errors.push(format!("{path}: unknown type '{t}'")); - } - } - - errors - } -} - -fn type_name(value: &Value) -> &'static str { - match value { - Value::Null => "null", - Value::Bool(_) => "boolean", - Value::Number(n) => { - if n.is_i64() || n.is_u64() { - "integer" - } else { - "number" - } - } - Value::String(_) => "string", - Value::Array(_) => "array", - Value::Object(_) => "object", - } -} - -impl Default for JsonSchemaTool { - fn default() -> Self { - Self::new() - } -} - -impl Tool for JsonSchemaTool { - type Args = JsonSchemaArgs; - type Error = JsonSchemaError; - type Output = JsonSchemaResult; - - const NAME: &'static str = "json_schema"; - - async fn definition(&self, _prompt: String) -> ToolDefinition { - ToolDefinition { - name: Self::NAME.to_string(), - description: "Validate JSON data against a JSON Schema. Use this to verify that structured data conforms to expected format.".to_string(), - parameters: serde_json::json!({ - "type": "object", - "properties": { - "schema": { - "type": "object", - "description": "The JSON Schema to validate against" - }, - "data": { - "description": "The JSON data to validate" - } - }, - "required": ["schema", "data"] - }), - } - } - - async fn call(&self, args: Self::Args) -> Result { - let errors = Self::validate(&args.schema, &args.data, ""); - - Ok(JsonSchemaResult { - valid: errors.is_empty(), - errors, - }) - } -} - -#[cfg(test)] -mod tests { - use serde_json::json; - - use super::*; - - #[tokio::test] - async fn test_valid_object() { - let tool = JsonSchemaTool::new(); - let result = tool - .call(JsonSchemaArgs { - schema: json!({ - "type": "object", - "properties": { - "name": { "type": "string" }, - "age": { "type": "integer" } - }, - "required": ["name"] - }), - data: json!({ - "name": "Alice", - "age": 30 - }), - }) - .await - .unwrap(); - - assert!(result.valid); - assert!(result.errors.is_empty()); - } - - #[tokio::test] - async fn test_missing_required() { - let tool = JsonSchemaTool::new(); - let result = tool - .call(JsonSchemaArgs { - schema: json!({ - "type": "object", - "required": ["name"] - }), - data: json!({}), - }) - .await - .unwrap(); - - assert!(!result.valid); - assert!(result.errors[0].contains("missing required")); - } - - #[tokio::test] - async fn test_type_mismatch() { - let tool = JsonSchemaTool::new(); - let result = tool - .call(JsonSchemaArgs { - schema: json!({ "type": "string" }), - data: json!(42), - }) - .await - .unwrap(); - - assert!(!result.valid); - assert!(result.errors[0].contains("expected string")); - } -} diff --git a/crates/nvisy-rig/src/agent/tools/mod.rs b/crates/nvisy-rig/src/agent/tools/mod.rs deleted file mode 100644 index bc1b1e2..0000000 --- a/crates/nvisy-rig/src/agent/tools/mod.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! Tools module for agent function calling capabilities. -//! -//! This module provides tools that agents can use during execution: -//! -//! - [`VectorSearchTool`] - Search vector store for similar chunks -//! - [`DocumentFetchTool`] - Fetch document/chunk by ID -//! - [`MetadataQueryTool`] - Query documents by metadata filters -//! - [`ContextStoreTool`] - Save/retrieve from agent memory -//! - [`ScratchpadTool`] - Temporary working storage -//! - [`WebFetchTool`] - Fetch content from URLs -//! - [`ImageAnalysisTool`] - Analyze images with VLM -//! - [`JsonSchemaTool`] - Validate JSON against schema - -mod context_store; -mod document_fetch; -mod image_analysis; -mod json_schema; -mod metadata_query; -mod scratchpad; -mod vector_search; -mod web_fetch; - -pub use context_store::{ContextStore, ContextStoreTool}; -pub use document_fetch::{DocumentFetchTool, DocumentFetcher}; -pub use image_analysis::ImageAnalysisTool; -pub use json_schema::JsonSchemaTool; -pub use metadata_query::{MetadataQuerier, MetadataQueryTool}; -pub use scratchpad::{Scratchpad, ScratchpadTool}; -pub use vector_search::{VectorSearchTool, VectorSearcher}; -pub use web_fetch::{FetchResponse, WebFetchTool, WebFetcher}; diff --git a/crates/nvisy-rig/src/agent/tools/web_fetch.rs b/crates/nvisy-rig/src/agent/tools/web_fetch.rs deleted file mode 100644 index e58ca0c..0000000 --- a/crates/nvisy-rig/src/agent/tools/web_fetch.rs +++ /dev/null @@ -1,271 +0,0 @@ -//! Web fetch tool for retrieving content from URLs. - -use std::sync::Arc; - -use async_trait::async_trait; -use rig::completion::ToolDefinition; -use rig::tool::Tool; -use serde::{Deserialize, Serialize}; - -/// Error type for web fetch operations. -#[derive(Debug, thiserror::Error)] -pub enum WebFetchError { - #[error("request failed: {0}")] - Request(String), - #[error("invalid URL: {0}")] - InvalidUrl(String), - #[error("timeout")] - Timeout, - #[error("content too large: {size} bytes (max: {max})")] - ContentTooLarge { size: usize, max: usize }, - #[error("unsupported content type: {0}")] - UnsupportedContentType(String), -} - -/// Arguments for web fetch. -#[derive(Debug, Deserialize)] -pub struct WebFetchArgs { - /// The URL to fetch. - pub url: String, - /// Maximum content size in bytes. - #[serde(default = "default_max_size")] - pub max_size: usize, - /// Whether to extract text only (strip HTML). - #[serde(default = "default_extract_text")] - pub extract_text: bool, - /// Timeout in seconds. - #[serde(default = "default_timeout")] - pub timeout_secs: u64, -} - -fn default_max_size() -> usize { - 1_000_000 // 1MB -} - -fn default_extract_text() -> bool { - true -} - -fn default_timeout() -> u64 { - 30 -} - -/// Result of a web fetch operation. -#[derive(Debug, Serialize)] -pub struct WebFetchResult { - /// The fetched content. - pub content: String, - /// The content type. - pub content_type: Option, - /// The final URL (after redirects). - pub final_url: String, - /// Content length in bytes. - pub length: usize, - /// Page title if available. - #[serde(skip_serializing_if = "Option::is_none")] - pub title: Option, -} - -/// Trait for fetching web content. -/// -/// Implementations should handle HTTP requests, redirects, and content extraction. -#[async_trait] -pub trait WebFetcher: Send + Sync { - /// Fetches content from a URL. - /// - /// # Arguments - /// - /// * `url` - The URL to fetch - /// * `max_size` - Maximum content size in bytes - /// * `timeout_secs` - Request timeout in seconds - /// - /// # Returns - /// - /// The fetched content as bytes, the final URL, and the content type. - async fn fetch( - &self, - url: &str, - max_size: usize, - timeout_secs: u64, - ) -> Result; -} - -/// Raw response from a web fetch operation. -#[derive(Debug)] -pub struct FetchResponse { - /// The raw content bytes. - pub bytes: bytes::Bytes, - /// The final URL after redirects. - pub final_url: String, - /// The content type header value. - pub content_type: Option, -} - -/// Tool for fetching web content. -/// -/// This tool uses a pluggable `WebFetcher` implementation for making HTTP requests. -pub struct WebFetchTool { - fetcher: Arc, - max_size: usize, -} - -impl WebFetchTool { - /// Creates a new web fetch tool. - pub fn new(fetcher: F) -> Self { - Self { - fetcher: Arc::new(fetcher), - max_size: default_max_size(), - } - } - - /// Creates a new web fetch tool with a shared fetcher. - pub fn with_arc(fetcher: Arc) -> Self { - Self { - fetcher, - max_size: default_max_size(), - } - } - - /// Creates a new web fetch tool with custom max size. - pub fn with_max_size(fetcher: F, max_size: usize) -> Self { - Self { - fetcher: Arc::new(fetcher), - max_size, - } - } - - /// Extracts text content from HTML. - fn extract_text_from_html(html: &str) -> (String, Option) { - // Simple HTML text extraction - // In production, you might want to use a proper HTML parser like scraper - - // Extract title - let title = html.find("").and_then(|start| { - let start = start + 7; - html[start..] - .find("") - .map(|end| html[start..start + end].trim().to_string()) - }); - - // Remove script and style tags - let mut text = html.to_string(); - - // Remove script tags - while let Some(start) = text.find("") { - text = format!("{}{}", &text[..start], &text[start + end + 9..]); - } else { - break; - } - } - - // Remove style tags - while let Some(start) = text.find("") { - text = format!("{}{}", &text[..start], &text[start + end + 8..]); - } else { - break; - } - } - - // Remove all HTML tags - let mut result = String::new(); - let mut in_tag = false; - for c in text.chars() { - match c { - '<' => in_tag = true, - '>' => in_tag = false, - _ if !in_tag => result.push(c), - _ => {} - } - } - - // Decode common HTML entities - let result = result - .replace(" ", " ") - .replace("&", "&") - .replace("<", "<") - .replace(">", ">") - .replace(""", "\"") - .replace("'", "'"); - - // Normalize whitespace - let result: String = result.split_whitespace().collect::>().join(" "); - - (result, title) - } -} - -impl Tool for WebFetchTool { - type Args = WebFetchArgs; - type Error = WebFetchError; - type Output = WebFetchResult; - - const NAME: &'static str = "web_fetch"; - - async fn definition(&self, _prompt: String) -> ToolDefinition { - ToolDefinition { - name: Self::NAME.to_string(), - description: "Fetch content from a URL. Can retrieve web pages, APIs, or other HTTP resources. Optionally extracts text from HTML.".to_string(), - parameters: serde_json::json!({ - "type": "object", - "properties": { - "url": { - "type": "string", - "description": "The URL to fetch" - }, - "max_size": { - "type": "integer", - "description": "Maximum content size in bytes (default: 1MB)", - "default": 1000000 - }, - "extract_text": { - "type": "boolean", - "description": "Extract text only from HTML (default: true)", - "default": true - }, - "timeout_secs": { - "type": "integer", - "description": "Request timeout in seconds (default: 30)", - "default": 30 - } - }, - "required": ["url"] - }), - } - } - - async fn call(&self, args: Self::Args) -> Result { - let max_size = args.max_size.min(self.max_size); - - let response = self - .fetcher - .fetch(&args.url, max_size, args.timeout_secs) - .await?; - - let content = String::from_utf8_lossy(&response.bytes).to_string(); - let length = content.len(); - - let is_html = response - .content_type - .as_ref() - .map(|ct| ct.contains("text/html")) - .unwrap_or(false) - || content.trim_start().starts_with(", + model_name: String, } impl VisionAgent { /// Creates a new vision agent with the given completion provider. - pub fn new(provider: CompletionProvider) -> Self { - let agent = AgentBuilder::new(provider) + /// + /// # Arguments + /// * `provider` - The completion provider to use + /// * `with_tools` - Whether to enable tool usage (scratchpad for drafting) + pub fn new(provider: CompletionProvider, with_tools: bool) -> Self { + let model_name = provider.model_name().to_string(); + let builder = AgentBuilder::new(provider) .name(NAME) .description(DESCRIPTION) - .preamble(PREAMBLE) - .build(); - Self { agent } + .preamble(PREAMBLE); + + let agent = if with_tools { + builder.tool(ScratchpadTool::new()).build() + } else { + builder.build() + }; + + Self { agent, model_name } } /// Generates a brief description of an image. + #[tracing::instrument(skip(self, image_base64), fields(agent = NAME, model = %self.model_name, image_len = image_base64.len()))] pub async fn describe(&self, image_base64: &str) -> Result { let prompt = format!("{}\n\n[Image: {}]", PROMPT_DESCRIBE, image_base64); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "describe completed"); + Ok(response) } /// Generates a detailed description of an image. + #[tracing::instrument(skip(self, image_base64), fields(agent = NAME, model = %self.model_name, image_len = image_base64.len()))] pub async fn describe_detailed(&self, image_base64: &str) -> Result { let prompt = format!("{}\n\n[Image: {}]", PROMPT_DESCRIBE_DETAILED, image_base64); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "describe_detailed completed"); + Ok(response) } /// Extracts text from an image using generative OCR. + #[tracing::instrument(skip(self, image_base64), fields(agent = NAME, model = %self.model_name, image_len = image_base64.len()))] pub async fn extract_text(&self, image_base64: &str) -> Result { let prompt = format!("{}\n\n[Image: {}]", PROMPT_EXTRACT_TEXT, image_base64); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "extract_text completed"); + Ok(response) } /// Detects and lists objects in an image. + #[tracing::instrument(skip(self, image_base64), fields(agent = NAME, model = %self.model_name, image_len = image_base64.len()))] pub async fn detect_objects(&self, image_base64: &str) -> Result { let prompt = format!("{}\n\n[Image: {}]", PROMPT_DETECT_OBJECTS, image_base64); - Ok(self.agent.prompt(&prompt).await?) + let response = self.agent.prompt(&prompt).await?; + tracing::debug!(response_len = response.len(), "detect_objects completed"); + Ok(response) } } diff --git a/crates/nvisy-rig/src/chat/agent/context.rs b/crates/nvisy-rig/src/chat/agent/context.rs deleted file mode 100644 index c6a4483..0000000 --- a/crates/nvisy-rig/src/chat/agent/context.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! Agent context for a single request. - -use crate::rag::RetrievedChunk; -use crate::session::Session; - -/// Context for an agent request. -#[derive(Debug, Clone)] -pub struct AgentContext { - /// The session this request belongs to. - session: Session, - - /// The user's message. - message: String, - - /// Retrieved document chunks for RAG. - retrieved_chunks: Vec, -} - -impl AgentContext { - /// Creates a new agent context. - pub fn new(session: Session, message: String, retrieved_chunks: Vec) -> Self { - Self { - session, - message, - retrieved_chunks, - } - } - - /// Returns the session. - pub fn session(&self) -> &Session { - &self.session - } - - /// Returns the user's message. - pub fn message(&self) -> &str { - &self.message - } - - /// Returns the retrieved chunks. - pub fn retrieved_chunks(&self) -> &[RetrievedChunk] { - &self.retrieved_chunks - } - - /// Returns whether there are any retrieved chunks. - pub fn has_context(&self) -> bool { - !self.retrieved_chunks.is_empty() - } -} - -#[cfg(test)] -mod tests { - use uuid::Uuid; - - use super::*; - use crate::session::CreateSession; - - #[test] - fn context_without_chunks() { - let session = Session::new(CreateSession::new( - Uuid::now_v7(), - Uuid::now_v7(), - Uuid::now_v7(), - )); - let context = AgentContext::new(session, "Hello".to_string(), Vec::new()); - - assert!(!context.has_context()); - } -} diff --git a/crates/nvisy-rig/src/chat/agent/executor.rs b/crates/nvisy-rig/src/chat/agent/executor.rs deleted file mode 100644 index 4bdb38e..0000000 --- a/crates/nvisy-rig/src/chat/agent/executor.rs +++ /dev/null @@ -1,45 +0,0 @@ -//! Agent executor that runs the conversation loop. - -use std::sync::Arc; - -use futures::StreamExt; -use futures::stream::BoxStream; - -use super::{AgentConfig, AgentContext, ChatEvent}; -use crate::Result; -use crate::provider::CompletionModel; -use crate::tool::ToolRegistry; - -/// Executor for running the agent loop. -pub struct AgentExecutor { - config: AgentConfig, - tools: Arc, - context: AgentContext, - model: CompletionModel, -} - -impl AgentExecutor { - /// Creates a new executor. - pub fn new( - config: AgentConfig, - tools: Arc, - context: AgentContext, - model: CompletionModel, - ) -> Self { - Self { - config, - tools, - context, - model, - } - } - - /// Runs the agent loop and returns a stream of events. - pub async fn run(self) -> Result>> { - // TODO: Implement the actual agent loop - let _ = (&self.config, &self.tools, &self.context, &self.model); - - let stream = futures::stream::empty(); - Ok(stream.boxed()) - } -} diff --git a/crates/nvisy-rig/src/chat/agent/mod.rs b/crates/nvisy-rig/src/chat/agent/mod.rs deleted file mode 100644 index 929d18d..0000000 --- a/crates/nvisy-rig/src/chat/agent/mod.rs +++ /dev/null @@ -1,119 +0,0 @@ -//! Agent module for orchestrating AI-powered document processing. - -mod context; -mod executor; -mod prompt; - -use std::sync::Arc; - -pub use context::AgentContext; -pub use executor::AgentExecutor; -use futures::stream::BoxStream; -pub use prompt::PromptBuilder; -use uuid::Uuid; - -use super::ChatEvent; -use crate::Result; -use crate::provider::CompletionModel; -use crate::rag::RetrievedChunk; -use crate::session::Session; -use crate::tool::ToolRegistry; -use crate::tool::edit::ProposedEdit; - -/// Configuration for the agent. -#[derive(Debug, Clone)] -pub struct AgentConfig { - /// Maximum number of tool call iterations. - pub max_iterations: usize, - - /// Maximum tokens for completion. - pub max_tokens: u32, - - /// Temperature for generation. - pub temperature: f32, - - /// Whether to include thinking in output. - pub include_thinking: bool, - - /// Default completion model. - pub default_model: CompletionModel, -} - -impl Default for AgentConfig { - fn default() -> Self { - Self { - max_iterations: 10, - max_tokens: 4096, - temperature: 0.7, - include_thinking: false, - default_model: CompletionModel::Ollama("llama3.2".to_string()), - } - } -} - -/// The core agent that processes chat messages. -pub struct Agent { - config: AgentConfig, - tools: Arc, -} - -impl Agent { - /// Creates a new agent. - pub fn new(config: AgentConfig, tools: Arc) -> Self { - Self { config, tools } - } - - /// Processes a chat message and returns a stream of events. - pub async fn process( - &self, - session: &Session, - message: &str, - retrieved_chunks: Vec, - model_override: Option, - ) -> Result>> { - let context = AgentContext::new(session.clone(), message.to_string(), retrieved_chunks); - - let model = model_override.unwrap_or_else(|| self.config.default_model.clone()); - - let executor = AgentExecutor::new(self.config.clone(), self.tools.clone(), context, model); - - executor.run().await - } - - /// Returns proposed edits from an agent run. - pub fn extract_edits(&self, _events: &[ChatEvent]) -> Vec { - Vec::new() - } -} - -/// Result of an agent run. -#[derive(Debug, Clone)] -pub struct AgentResult { - /// The final response text. - pub response: String, - - /// Message ID. - pub message_id: Uuid, - - /// Proposed edits. - pub proposed_edits: Vec, - - /// Edits that were auto-applied. - pub applied_edits: Vec, - - /// Total tokens used. - pub total_tokens: u32, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn agent_config_defaults() { - let config = AgentConfig::default(); - assert_eq!(config.max_iterations, 10); - assert_eq!(config.max_tokens, 4096); - assert!(!config.include_thinking); - } -} diff --git a/crates/nvisy-rig/src/chat/agent/prompt.rs b/crates/nvisy-rig/src/chat/agent/prompt.rs deleted file mode 100644 index 232ca7e..0000000 --- a/crates/nvisy-rig/src/chat/agent/prompt.rs +++ /dev/null @@ -1,181 +0,0 @@ -//! Prompt building for the agent. - -use crate::rag::RetrievedChunk; -use crate::session::{Message, Session}; -use crate::tool::ToolDefinition; - -/// Builder for constructing agent prompts. -#[derive(Debug, Clone)] -pub struct PromptBuilder { - system_prompt: String, - tools: Vec, - context_chunks: Vec, - history: Vec, - user_message: String, -} - -impl PromptBuilder { - /// Creates a new prompt builder with the default system prompt. - pub fn new() -> Self { - Self { - system_prompt: default_system_prompt(), - tools: Vec::new(), - context_chunks: Vec::new(), - history: Vec::new(), - user_message: String::new(), - } - } - - /// Sets a custom system prompt. - pub fn with_system_prompt(mut self, prompt: impl Into) -> Self { - self.system_prompt = prompt.into(); - self - } - - /// Adds available tools. - pub fn with_tools(mut self, tools: Vec) -> Self { - self.tools = tools; - self - } - - /// Adds retrieved context chunks. - pub fn with_context(mut self, chunks: Vec) -> Self { - self.context_chunks = chunks; - self - } - - /// Adds conversation history from session. - pub fn with_session(mut self, session: &Session) -> Self { - self.history = session.messages().to_vec(); - if let Some(custom_prompt) = session.system_prompt() { - self.system_prompt = custom_prompt.to_string(); - } - self - } - - /// Sets the user message. - pub fn with_user_message(mut self, message: impl Into) -> Self { - self.user_message = message.into(); - self - } - - /// Builds the system prompt with context. - pub fn build_system_prompt(&self) -> String { - let mut prompt = self.system_prompt.clone(); - - // Add tool descriptions - if !self.tools.is_empty() { - prompt.push_str("\n\n## Available Tools\n\n"); - for tool in &self.tools { - prompt.push_str(&format!("### {}\n{}\n\n", tool.name(), tool.description())); - } - } - - // Add context chunks - if !self.context_chunks.is_empty() { - prompt.push_str("\n\n## Document Context\n\n"); - for (i, chunk) in self.context_chunks.iter().enumerate() { - let content = chunk.content_or_placeholder(); - prompt.push_str(&format!( - "### Chunk {} (relevance: {:.2})\n```\n{}\n```\n\n", - i + 1, - chunk.score, - content - )); - } - } - - prompt - } - - /// Builds the complete message list for the API call. - pub fn build_messages(&self) -> Vec { - let mut messages = Vec::new(); - - // Add system message - messages.push(Message::system(self.build_system_prompt())); - - // Add history - messages.extend(self.history.clone()); - - // Add current user message - if !self.user_message.is_empty() { - messages.push(Message::user(&self.user_message)); - } - - messages - } -} - -impl Default for PromptBuilder { - fn default() -> Self { - Self::new() - } -} - -/// Default system prompt for document processing. -fn default_system_prompt() -> String { - r#"You are an AI assistant specialized in document processing and editing. Your role is to help users understand, analyze, and modify their documents. - -## Capabilities - -You can: -- Extract specific content (tables, sections, figures) -- Redact sensitive information (PII, confidential data) -- Summarize or restructure content -- Answer questions about the document -- Make precise edits as requested - -## Guidelines - -1. **Be precise**: When making edits, be specific about locations and changes. -2. **Preserve structure**: Maintain the document's formatting and organization. -3. **Confirm before destructive changes**: For irreversible operations, confirm with the user first. -4. **Reference accurately**: When citing content, use exact quotes or page/section references. -5. **Respect confidentiality**: Handle sensitive content appropriately. - -## Tool Usage - -Use the available tools to: -- Read document content -- Make edits -- Extract specific elements -- Search within the document - -Always explain what you're doing and why."# - .to_string() -} - -#[cfg(test)] -mod tests { - use uuid::Uuid; - - use super::*; - use crate::rag::ChunkMetadata; - - #[test] - fn prompt_builder_default() { - let builder = PromptBuilder::new(); - let system = builder.build_system_prompt(); - - assert!(system.contains("document processing")); - assert!(system.contains("Capabilities")); - } - - #[test] - fn prompt_builder_with_context() { - let chunk = RetrievedChunk::new( - Uuid::nil(), - Uuid::nil(), - 0.95, - ChunkMetadata::new(0, 0, 100), - ) - .with_content("test content".to_string()); - - let builder = PromptBuilder::new().with_context(vec![chunk]); - - let system = builder.build_system_prompt(); - assert!(system.contains("Document Context")); - assert!(system.contains("test content")); - } -} diff --git a/crates/nvisy-rig/src/chat/event.rs b/crates/nvisy-rig/src/chat/event.rs deleted file mode 100644 index 486a0d0..0000000 --- a/crates/nvisy-rig/src/chat/event.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! Chat events emitted during streaming. - -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::ChatResponse; -use crate::tool::edit::ProposedEdit; -use crate::tool::{ToolCall, ToolResult}; - -/// Events emitted during chat processing. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ChatEvent { - /// Agent is thinking/planning. - Thinking { content: String }, - - /// Text delta from the model. - TextDelta { delta: String }, - - /// Agent is calling a tool. - ToolCall { call: ToolCall }, - - /// Tool execution completed. - ToolResult { result: ToolResult }, - - /// Agent proposes an edit to the document. - ProposedEdit { edit: ProposedEdit }, - - /// Edit was auto-applied based on policy. - EditApplied { edit_id: Uuid }, - - /// Chat response completed. - Done { response: ChatResponse }, - - /// Error occurred during processing. - Error { message: String }, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn chat_event_serialization() { - let event = ChatEvent::TextDelta { - delta: "Hello".to_string(), - }; - - let json = serde_json::to_string(&event).expect("ChatEvent should serialize to JSON"); - assert!(json.contains("text_delta")); - assert!(json.contains("Hello")); - } -} diff --git a/crates/nvisy-rig/src/chat/mod.rs b/crates/nvisy-rig/src/chat/mod.rs deleted file mode 100644 index 9766167..0000000 --- a/crates/nvisy-rig/src/chat/mod.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Chat service for AI-powered document conversations. -//! -//! This module provides: -//! - [`ChatService`] - Main entry point for chat functionality -//! - [`ChatStream`] - Streaming chat response -//! - [`ChatEvent`] - Events emitted during chat -//! - [`ChatResponse`] - Complete response after stream ends -//! - [`UsageStats`] - Token usage statistics -//! - [`agent`] - Agent execution for processing chat messages - -pub mod agent; -mod event; -mod response; -mod service; -mod stream; -mod usage; - -pub use agent::{Agent, AgentConfig, AgentContext, AgentExecutor, PromptBuilder}; -pub use event::ChatEvent; -pub use response::ChatResponse; -pub use service::ChatService; -pub use stream::ChatStream; -pub use usage::UsageStats; diff --git a/crates/nvisy-rig/src/chat/response.rs b/crates/nvisy-rig/src/chat/response.rs deleted file mode 100644 index dc3f463..0000000 --- a/crates/nvisy-rig/src/chat/response.rs +++ /dev/null @@ -1,73 +0,0 @@ -//! Chat response types. - -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::UsageStats; -use crate::tool::edit::ProposedEdit; - -/// Complete chat response after stream ends. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ChatResponse { - /// Unique message ID. - pub id: Uuid, - - /// Complete response text. - pub content: String, - - /// Model used for completion. - pub model: String, - - /// Token usage statistics. - pub usage: UsageStats, - - /// Proposed edits from this response. - pub proposed_edits: Vec, - - /// Edits that were auto-applied. - pub applied_edits: Vec, -} - -impl ChatResponse { - /// Creates a new chat response. - pub fn new(content: String, model: String, usage: UsageStats) -> Self { - Self { - id: Uuid::now_v7(), - content, - model, - usage, - proposed_edits: Vec::new(), - applied_edits: Vec::new(), - } - } - - /// Adds proposed edits to the response. - pub fn with_proposed_edits(mut self, edits: Vec) -> Self { - self.proposed_edits = edits; - self - } - - /// Adds applied edits to the response. - pub fn with_applied_edits(mut self, edit_ids: Vec) -> Self { - self.applied_edits = edit_ids; - self - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn chat_response_builder() { - let response = ChatResponse::new( - "Test content".to_string(), - "gpt-4".to_string(), - UsageStats::default(), - ); - - assert!(!response.id.is_nil()); - assert_eq!(response.content, "Test content"); - assert_eq!(response.model, "gpt-4"); - } -} diff --git a/crates/nvisy-rig/src/chat/service.rs b/crates/nvisy-rig/src/chat/service.rs deleted file mode 100644 index 880ab90..0000000 --- a/crates/nvisy-rig/src/chat/service.rs +++ /dev/null @@ -1,163 +0,0 @@ -//! Chat service for managing sessions and conversations. - -use std::sync::Arc; - -use nvisy_nats::NatsClient; -use uuid::Uuid; - -use super::ChatStream; -use crate::provider::{CompletionModel, EmbeddingProvider}; -use crate::session::{CreateSession, Session, SessionStore}; -use crate::tool::ToolRegistry; -use crate::tool::edit::ApplyResult; -use crate::{Error, Result}; - -/// Inner state for [`ChatService`]. -struct ChatServiceInner { - embedding_provider: EmbeddingProvider, - tools: ToolRegistry, - sessions: SessionStore, -} - -/// Chat service for AI-powered document conversations. -#[derive(Clone)] -pub struct ChatService { - inner: Arc, -} - -impl ChatService { - /// Creates a new ChatService. - pub async fn new(embedding_provider: EmbeddingProvider, nats: NatsClient) -> Result { - let tools = ToolRegistry::with_defaults(); - let sessions = SessionStore::new(nats).await?; - - Ok(Self { - inner: Arc::new(ChatServiceInner { - embedding_provider, - tools, - sessions, - }), - }) - } - - /// Creates a new ChatService with custom tools and session store. - pub fn with_components( - embedding_provider: EmbeddingProvider, - tools: ToolRegistry, - sessions: SessionStore, - ) -> Self { - Self { - inner: Arc::new(ChatServiceInner { - embedding_provider, - tools, - sessions, - }), - } - } - - /// Creates a new chat session for a document. - pub async fn create_session(&self, request: CreateSession) -> Result { - let session = Session::new(request); - self.inner.sessions.create(&session).await?; - Ok(session) - } - - /// Retrieves an existing session. - pub async fn get_session(&self, session_id: Uuid) -> Result> { - self.inner.sessions.get(session_id).await - } - - /// Sends a chat message and returns a streaming response. - pub async fn chat(&self, session_id: Uuid, message: &str) -> Result { - self.inner.sessions.touch(session_id).await?; - - let session = self - .inner - .sessions - .get(session_id) - .await? - .ok_or_else(|| crate::Error::session("session not found"))?; - - ChatStream::new(session, message.to_string(), self.clone()).await - } - - /// Sends a chat message with a specific model override. - pub async fn chat_with_model( - &self, - session_id: Uuid, - message: &str, - model: CompletionModel, - ) -> Result { - self.inner.sessions.touch(session_id).await?; - - let session = self - .inner - .sessions - .get(session_id) - .await? - .ok_or_else(|| crate::Error::session("session not found"))?; - - ChatStream::with_model(session, message.to_string(), Some(model), self.clone()).await - } - - /// Approves and applies pending edits. - pub async fn apply_edits(&self, session_id: Uuid, edit_ids: &[Uuid]) -> Result { - let mut session = self - .inner - .sessions - .get(session_id) - .await? - .ok_or_else(|| crate::Error::session("session not found"))?; - - let result = session.apply_edits(edit_ids)?; - self.inner.sessions.update(&session).await?; - - Ok(result) - } - - /// Rejects pending edits. - pub async fn reject_edits(&self, session_id: Uuid, edit_ids: &[Uuid]) -> Result<()> { - let mut session = self - .inner - .sessions - .get(session_id) - .await? - .ok_or_else(|| crate::Error::session("session not found"))?; - - session.reject_edits(edit_ids); - self.inner.sessions.update(&session).await?; - - Ok(()) - } - - /// Ends a session and cleans up all pending edits. - pub async fn end_session(&self, session_id: Uuid) -> Result<()> { - self.inner.sessions.delete(session_id).await - } - - /// Generates embeddings for text. - pub async fn embed(&self, text: &str) -> Result> { - let embedding = self - .inner - .embedding_provider - .embed_text(text) - .await - .map_err(|e| Error::provider("embedding", e.to_string()))?; - Ok(embedding.vec) - } - - /// Returns a reference to the embedding provider. - pub fn embedding_provider(&self) -> &EmbeddingProvider { - &self.inner.embedding_provider - } - - /// Returns a reference to the tool registry. - pub fn tools(&self) -> &ToolRegistry { - &self.inner.tools - } - - /// Returns a reference to the session store. - pub fn sessions(&self) -> &SessionStore { - &self.inner.sessions - } -} diff --git a/crates/nvisy-rig/src/chat/stream.rs b/crates/nvisy-rig/src/chat/stream.rs deleted file mode 100644 index a6758e5..0000000 --- a/crates/nvisy-rig/src/chat/stream.rs +++ /dev/null @@ -1,114 +0,0 @@ -//! Streaming chat response. - -use std::pin::Pin; -use std::task::{Context, Poll}; - -use futures::Stream; -use uuid::Uuid; - -use super::{ChatEvent, ChatResponse, ChatService, UsageStats}; -use crate::Result; -use crate::provider::CompletionModel; -use crate::session::Session; -use crate::tool::edit::ProposedEdit; - -/// Streaming chat response. -pub struct ChatStream { - session: Session, - message: String, - model_override: Option, - service: ChatService, - - started: bool, - finished: bool, - accumulated_content: String, - proposed_edits: Vec, - applied_edits: Vec, -} - -impl ChatStream { - /// Creates a new chat stream. - pub async fn new(session: Session, message: String, service: ChatService) -> Result { - Ok(Self { - session, - message, - model_override: None, - service, - started: false, - finished: false, - accumulated_content: String::new(), - proposed_edits: Vec::new(), - applied_edits: Vec::new(), - }) - } - - /// Creates a new chat stream with a model override. - pub async fn with_model( - session: Session, - message: String, - model_override: Option, - service: ChatService, - ) -> Result { - Ok(Self { - session, - message, - model_override, - service, - started: false, - finished: false, - accumulated_content: String::new(), - proposed_edits: Vec::new(), - applied_edits: Vec::new(), - }) - } - - /// Returns the session ID. - pub fn session_id(&self) -> Uuid { - self.session.id() - } - - /// Returns the document ID being processed. - pub fn document_id(&self) -> Uuid { - self.session.document_id() - } - - fn poll_next_event(&mut self, _cx: &mut Context<'_>) -> Poll>> { - if self.finished { - return Poll::Ready(None); - } - - if !self.started { - self.started = true; - - let _ = (&self.message, &self.service, &self.accumulated_content); - - self.finished = true; - - let model = self - .model_override - .as_ref() - .map(|m| m.as_str().to_string()) - .unwrap_or_else(|| "default".to_string()); - - let response = ChatResponse::new( - "Agent pipeline not yet implemented".to_string(), - model, - UsageStats::default(), - ) - .with_proposed_edits(self.proposed_edits.clone()) - .with_applied_edits(self.applied_edits.clone()); - - return Poll::Ready(Some(Ok(ChatEvent::Done { response }))); - } - - Poll::Ready(None) - } -} - -impl Stream for ChatStream { - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.poll_next_event(cx) - } -} diff --git a/crates/nvisy-rig/src/chat/usage.rs b/crates/nvisy-rig/src/chat/usage.rs deleted file mode 100644 index b79f634..0000000 --- a/crates/nvisy-rig/src/chat/usage.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! Token usage statistics. - -use serde::{Deserialize, Serialize}; - -/// Token usage statistics for a chat completion. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct UsageStats { - /// Number of input tokens (prompt). - pub input_tokens: u32, - - /// Number of output tokens (completion). - pub output_tokens: u32, - - /// Number of tokens used for reasoning/thinking. - pub reasoning_tokens: u32, - - /// Total tokens (input + output). - pub total_tokens: u32, - - /// Estimated cost in USD (if available). - pub estimated_cost_usd: Option, -} - -impl UsageStats { - /// Creates new usage stats. - pub fn new(input_tokens: u32, output_tokens: u32) -> Self { - Self { - input_tokens, - output_tokens, - reasoning_tokens: 0, - total_tokens: input_tokens + output_tokens, - estimated_cost_usd: None, - } - } - - /// Adds reasoning tokens. - pub fn with_reasoning_tokens(mut self, reasoning_tokens: u32) -> Self { - self.reasoning_tokens = reasoning_tokens; - self - } - - /// Sets the estimated cost. - pub fn with_cost(mut self, cost_usd: f64) -> Self { - self.estimated_cost_usd = Some(cost_usd); - self - } - - /// Accumulates usage from another stats instance. - pub fn accumulate(&mut self, other: &UsageStats) { - self.input_tokens += other.input_tokens; - self.output_tokens += other.output_tokens; - self.reasoning_tokens += other.reasoning_tokens; - self.total_tokens += other.total_tokens; - - if let Some(other_cost) = other.estimated_cost_usd { - self.estimated_cost_usd = Some(self.estimated_cost_usd.unwrap_or(0.0) + other_cost); - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn usage_stats_new() { - let stats = UsageStats::new(100, 50); - assert_eq!(stats.input_tokens, 100); - assert_eq!(stats.output_tokens, 50); - assert_eq!(stats.total_tokens, 150); - } - - #[test] - fn usage_stats_accumulate() { - let mut stats = UsageStats::new(100, 50); - let other = UsageStats::new(200, 100).with_cost(0.01); - - stats.accumulate(&other); - - assert_eq!(stats.input_tokens, 300); - assert_eq!(stats.output_tokens, 150); - // 150 (original) + 300 (other) = 450 - assert_eq!(stats.total_tokens, 450); - assert_eq!(stats.estimated_cost_usd, Some(0.01)); - } -} diff --git a/crates/nvisy-rig/src/error.rs b/crates/nvisy-rig/src/error.rs index ed4a0b4..926545f 100644 --- a/crates/nvisy-rig/src/error.rs +++ b/crates/nvisy-rig/src/error.rs @@ -15,10 +15,6 @@ pub enum Error { #[error("provider error: {provider}: {message}")] Provider { provider: String, message: String }, - /// Session error (not found, expired, etc.) - #[error("session error: {0}")] - Session(String), - /// RAG retrieval error. #[error("retrieval error: {0}")] Retrieval(String), @@ -53,11 +49,6 @@ impl Error { } } - /// Creates a session error. - pub fn session(message: impl fmt::Display) -> Self { - Self::Session(message.to_string()) - } - /// Creates a retrieval error. pub fn retrieval(message: impl fmt::Display) -> Self { Self::Retrieval(message.to_string()) @@ -86,7 +77,6 @@ impl From for nvisy_core::Error { nvisy_core::ErrorKind::ExternalError, format!("{}: {}", provider, message), ), - Error::Session(msg) => (nvisy_core::ErrorKind::InvalidInput, msg.clone()), Error::Retrieval(msg) => (nvisy_core::ErrorKind::ExternalError, msg.clone()), Error::Embedding(_) => (nvisy_core::ErrorKind::ExternalError, err.to_string()), Error::Completion(_) => (nvisy_core::ErrorKind::ExternalError, err.to_string()), diff --git a/crates/nvisy-rig/src/lib.rs b/crates/nvisy-rig/src/lib.rs index 53c9ee5..38416f0 100644 --- a/crates/nvisy-rig/src/lib.rs +++ b/crates/nvisy-rig/src/lib.rs @@ -3,16 +3,11 @@ #![doc = include_str!("../README.md")] pub mod agent; -pub mod chat; mod error; pub mod provider; pub mod rag; -mod service; -mod session; -mod tool; pub use error::{Error, Result}; -pub use service::{RigConfig, RigService}; /// Tracing target for the main library. pub const TRACING_TARGET: &str = "nvisy_rig"; diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs index 976e5b6..1f95b01 100644 --- a/crates/nvisy-rig/src/rag/searcher/mod.rs +++ b/crates/nvisy-rig/src/rag/searcher/mod.rs @@ -59,6 +59,7 @@ impl Searcher { } /// Searches for relevant chunks without loading content. + #[tracing::instrument(skip(self, query), fields(query_len = query.len(), limit, scope = ?self.scope))] pub async fn query(&self, query: &str, limit: u32) -> Result> { let embedding = self .provider @@ -98,7 +99,7 @@ impl Searcher { } .map_err(|e| Error::retrieval(format!("vector search failed: {e}")))?; - let chunks = scored_chunks + let chunks: Vec = scored_chunks .into_iter() .map(|scored| { let chunk = scored.chunk; @@ -107,17 +108,21 @@ impl Searcher { }) .collect(); + tracing::debug!(result_count = chunks.len(), "query completed"); Ok(chunks) } /// Searches for relevant chunks and loads their content. + #[tracing::instrument(skip(self, query), fields(query_len = query.len(), limit))] pub async fn query_with_content(&self, query: &str, limit: u32) -> Result> { let mut chunks = self.query(query, limit).await?; self.load_content(&mut chunks).await?; + tracing::debug!(result_count = chunks.len(), "query_with_content completed"); Ok(chunks) } /// Loads content for retrieved chunks from NATS. + #[tracing::instrument(skip(self, chunks), fields(chunk_count = chunks.len()))] pub async fn load_content(&self, chunks: &mut [RetrievedChunk]) -> Result<()> { let mut by_file: HashMap> = HashMap::new(); for (idx, chunk) in chunks.iter().enumerate() { @@ -126,11 +131,14 @@ impl Searcher { } } + let file_count = by_file.len(); + tracing::debug!(file_count, "loading content from files"); + for (file_id, indices) in by_file { let file_content = match self.fetch_file(file_id).await { Ok(content) => content, Err(e) => { - tracing::warn!(file_id = %file_id, error = %e, "Failed to fetch file"); + tracing::warn!(file_id = %file_id, error = %e, "failed to fetch file"); continue; } }; @@ -149,6 +157,7 @@ impl Searcher { Ok(()) } + #[tracing::instrument(skip(self), fields(%file_id))] async fn fetch_file(&self, file_id: Uuid) -> Result> { let key = FileKey::from_parts(Uuid::nil(), file_id); @@ -166,6 +175,7 @@ impl Searcher { .await .map_err(|e| Error::retrieval(format!("failed to read file: {e}")))?; + tracing::debug!(content_len = content.len(), "file fetched"); Ok(content) } } diff --git a/crates/nvisy-rig/src/service/config.rs b/crates/nvisy-rig/src/service/config.rs deleted file mode 100644 index 4f1dbed..0000000 --- a/crates/nvisy-rig/src/service/config.rs +++ /dev/null @@ -1,70 +0,0 @@ -//! Configuration for the rig service. - -#[cfg(feature = "config")] -use clap::Args; -use serde::{Deserialize, Serialize}; - -#[cfg(feature = "ollama")] -use crate::provider::{EmbeddingProvider, OllamaEmbeddingModel}; - -/// Configuration for AI services (chat and RAG). -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "config", derive(Args))] -pub struct RigConfig { - /// Ollama base URL for embeddings. - #[cfg(feature = "ollama")] - #[cfg_attr( - feature = "config", - arg( - long, - env = "OLLAMA_BASE_URL", - default_value = "http://localhost:11434" - ) - )] - pub ollama_base_url: String, - - /// Ollama embedding model name. - #[cfg(feature = "ollama")] - #[cfg_attr( - feature = "config", - arg( - long, - env = "OLLAMA_EMBEDDING_MODEL", - default_value = "nomic-embed-text" - ) - )] - pub ollama_embedding_model: String, - - /// Ollama embedding model dimensions. - #[cfg(feature = "ollama")] - #[cfg_attr( - feature = "config", - arg(long, env = "OLLAMA_EMBEDDING_DIMENSIONS", default_value = "768") - )] - pub ollama_embedding_dimensions: usize, -} - -impl Default for RigConfig { - fn default() -> Self { - Self { - #[cfg(feature = "ollama")] - ollama_base_url: "http://localhost:11434".to_string(), - #[cfg(feature = "ollama")] - ollama_embedding_model: "nomic-embed-text".to_string(), - #[cfg(feature = "ollama")] - ollama_embedding_dimensions: 768, - } - } -} - -#[cfg(feature = "ollama")] -impl RigConfig { - /// Creates an Ollama embedding provider from this configuration. - pub(crate) fn embedding_provider(&self) -> nvisy_core::Result { - let model = OllamaEmbeddingModel::new( - &self.ollama_embedding_model, - self.ollama_embedding_dimensions, - ); - EmbeddingProvider::ollama(&self.ollama_base_url, model) - } -} diff --git a/crates/nvisy-rig/src/service/mod.rs b/crates/nvisy-rig/src/service/mod.rs deleted file mode 100644 index 08e2a4e..0000000 --- a/crates/nvisy-rig/src/service/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -//! Unified AI service combining chat and RAG capabilities. - -mod config; -mod rig; - -pub use config::RigConfig; -pub use rig::RigService; diff --git a/crates/nvisy-rig/src/service/rig.rs b/crates/nvisy-rig/src/service/rig.rs deleted file mode 100644 index bde2b47..0000000 --- a/crates/nvisy-rig/src/service/rig.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Unified AI service combining chat and RAG capabilities. - -use std::sync::Arc; - -use nvisy_nats::NatsClient; -use nvisy_postgres::PgClient; - -use super::RigConfig; -use crate::chat::ChatService; -use crate::rag::{RagConfig, RagService}; -use crate::{Error, Result}; - -/// Inner state for [`RigService`]. -struct RigServiceInner { - chat: ChatService, - rag: RagService, -} - -/// Unified AI service providing chat and RAG capabilities. -#[derive(Clone)] -pub struct RigService { - inner: Arc, -} - -impl RigService { - /// Creates a new RigService from configuration. - pub async fn new(config: RigConfig, db: PgClient, nats: NatsClient) -> Result { - let embedding_provider = config - .embedding_provider() - .map_err(|e| Error::config(e.to_string()))?; - - let rag_config = RagConfig::default(); - let rag = RagService::new(rag_config, embedding_provider.clone(), db, nats.clone()).await?; - - let chat = ChatService::new(embedding_provider, nats).await?; - - Ok(Self { - inner: Arc::new(RigServiceInner { chat, rag }), - }) - } - - /// Returns a reference to the chat service. - pub fn chat(&self) -> &ChatService { - &self.inner.chat - } - - /// Returns a reference to the RAG service. - pub fn rag(&self) -> &RagService { - &self.inner.rag - } -} diff --git a/crates/nvisy-rig/src/session/message.rs b/crates/nvisy-rig/src/session/message.rs deleted file mode 100644 index cf1b405..0000000 --- a/crates/nvisy-rig/src/session/message.rs +++ /dev/null @@ -1,135 +0,0 @@ -//! Chat message types. - -use jiff::Timestamp; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -/// Role of a message in the conversation. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum MessageRole { - /// System prompt. - System, - /// User message. - User, - /// Assistant response. - Assistant, - /// Tool result. - Tool, -} - -/// A message in the conversation history. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Message { - /// Unique message ID. - id: Uuid, - - /// Message role. - role: MessageRole, - - /// Message content. - content: String, - - /// Tool call ID (for tool messages). - tool_call_id: Option, - - /// When the message was created. - created_at: Timestamp, -} - -impl Message { - /// Creates a system message. - pub fn system(content: impl Into) -> Self { - Self { - id: Uuid::now_v7(), - role: MessageRole::System, - content: content.into(), - tool_call_id: None, - created_at: Timestamp::now(), - } - } - - /// Creates a user message. - pub fn user(content: impl Into) -> Self { - Self { - id: Uuid::now_v7(), - role: MessageRole::User, - content: content.into(), - tool_call_id: None, - created_at: Timestamp::now(), - } - } - - /// Creates an assistant message. - pub fn assistant(content: impl Into) -> Self { - Self { - id: Uuid::now_v7(), - role: MessageRole::Assistant, - content: content.into(), - tool_call_id: None, - created_at: Timestamp::now(), - } - } - - /// Creates a tool result message. - pub fn tool(tool_call_id: Uuid, content: impl Into) -> Self { - Self { - id: Uuid::now_v7(), - role: MessageRole::Tool, - content: content.into(), - tool_call_id: Some(tool_call_id), - created_at: Timestamp::now(), - } - } - - /// Returns the message ID. - pub fn id(&self) -> Uuid { - self.id - } - - /// Returns the message role. - pub fn role(&self) -> MessageRole { - self.role - } - - /// Returns the message content. - pub fn content(&self) -> &str { - &self.content - } - - /// Returns the tool call ID if this is a tool message. - pub fn tool_call_id(&self) -> Option { - self.tool_call_id - } - - /// Returns when the message was created. - pub fn created_at(&self) -> Timestamp { - self.created_at - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn message_roles() { - let system = Message::system("You are a helpful assistant"); - let user = Message::user("Hello"); - let assistant = Message::assistant("Hi!"); - let tool = Message::tool(Uuid::now_v7(), "result"); - - assert_eq!(system.role(), MessageRole::System); - assert_eq!(user.role(), MessageRole::User); - assert_eq!(assistant.role(), MessageRole::Assistant); - assert_eq!(tool.role(), MessageRole::Tool); - } - - #[test] - fn tool_message_has_call_id() { - let call_id = Uuid::now_v7(); - let tool = Message::tool(call_id, "result"); - - assert_eq!(tool.tool_call_id(), Some(call_id)); - } -} diff --git a/crates/nvisy-rig/src/session/mod.rs b/crates/nvisy-rig/src/session/mod.rs deleted file mode 100644 index 077f49e..0000000 --- a/crates/nvisy-rig/src/session/mod.rs +++ /dev/null @@ -1,325 +0,0 @@ -//! Session management for chat conversations. -//! -//! Sessions are ephemeral and stored in NATS KV with TTL. -//! They track conversation history, pending edits, and auto-apply policies. - -mod message; -mod policy; -mod store; - -use jiff::Timestamp; -pub use message::Message; -pub use policy::{ApplyPolicy, ApprovalHistory, AutoApplyContext}; -use serde::{Deserialize, Serialize}; -pub use store::SessionStore; -use uuid::Uuid; - -use crate::Result; -use crate::tool::edit::{ApplyResult, ProposedEdit}; - -/// Request to create a new session. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CreateSession { - /// Document being processed. - pub document_id: Uuid, - - /// Workspace context. - pub workspace_id: Uuid, - - /// User initiating the session. - pub user_id: Uuid, - - /// Auto-apply policy for edits. - #[serde(default)] - pub apply_policy: ApplyPolicy, - - /// Initial system prompt override. - pub system_prompt: Option, - - /// Model preference. - pub model: Option, -} - -impl CreateSession { - /// Creates a new session request. - pub fn new(document_id: Uuid, workspace_id: Uuid, user_id: Uuid) -> Self { - Self { - document_id, - workspace_id, - user_id, - apply_policy: ApplyPolicy::default(), - system_prompt: None, - model: None, - } - } - - /// Sets the auto-apply policy. - pub fn with_policy(mut self, policy: ApplyPolicy) -> Self { - self.apply_policy = policy; - self - } - - /// Sets a custom system prompt. - pub fn with_system_prompt(mut self, prompt: impl Into) -> Self { - self.system_prompt = Some(prompt.into()); - self - } - - /// Sets a model preference. - pub fn with_model(mut self, model: impl Into) -> Self { - self.model = Some(model.into()); - self - } -} - -/// An active chat session. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Session { - /// Unique session ID. - id: Uuid, - - /// Document being processed. - document_id: Uuid, - - /// Workspace context. - workspace_id: Uuid, - - /// User who created the session. - user_id: Uuid, - - /// Auto-apply policy. - apply_policy: ApplyPolicy, - - /// Custom system prompt. - system_prompt: Option, - - /// Model preference. - model: Option, - - /// Conversation history. - messages: Vec, - - /// Pending edits awaiting approval. - pending_edits: Vec, - - /// Applied edit IDs. - applied_edits: Vec, - - /// Rejected edit IDs. - rejected_edits: Vec, - - /// Count of auto-applied edits in this session. - auto_applied_count: usize, - - /// Approval history for learning policies. - approval_history: ApprovalHistory, - - /// When the session was created. - created_at: Timestamp, - - /// Last activity time. - last_activity_at: Timestamp, -} - -impl Session { - /// Creates a new session from a request. - pub fn new(request: CreateSession) -> Self { - let now = Timestamp::now(); - Self { - id: Uuid::now_v7(), - document_id: request.document_id, - workspace_id: request.workspace_id, - user_id: request.user_id, - apply_policy: request.apply_policy, - system_prompt: request.system_prompt, - model: request.model, - messages: Vec::new(), - pending_edits: Vec::new(), - applied_edits: Vec::new(), - rejected_edits: Vec::new(), - auto_applied_count: 0, - approval_history: ApprovalHistory::new(), - created_at: now, - last_activity_at: now, - } - } - - /// Returns the session ID. - pub fn id(&self) -> Uuid { - self.id - } - - /// Returns the document ID. - pub fn document_id(&self) -> Uuid { - self.document_id - } - - /// Returns the workspace ID. - pub fn workspace_id(&self) -> Uuid { - self.workspace_id - } - - /// Returns the user ID. - pub fn user_id(&self) -> Uuid { - self.user_id - } - - /// Returns the auto-apply policy. - pub fn apply_policy(&self) -> &ApplyPolicy { - &self.apply_policy - } - - /// Returns the custom system prompt. - pub fn system_prompt(&self) -> Option<&str> { - self.system_prompt.as_deref() - } - - /// Returns the model preference. - pub fn model(&self) -> Option<&str> { - self.model.as_deref() - } - - /// Returns the conversation messages. - pub fn messages(&self) -> &[Message] { - &self.messages - } - - /// Returns pending edits. - pub fn pending_edits(&self) -> &[ProposedEdit] { - &self.pending_edits - } - - /// Returns applied edit IDs. - pub fn applied_edits(&self) -> &[Uuid] { - &self.applied_edits - } - - /// Returns the creation time. - pub fn created_at(&self) -> Timestamp { - self.created_at - } - - /// Returns the last activity time. - pub fn last_activity_at(&self) -> Timestamp { - self.last_activity_at - } - - /// Adds a user message. - pub fn add_user_message(&mut self, content: impl Into) { - self.messages.push(Message::user(content)); - self.last_activity_at = Timestamp::now(); - } - - /// Adds an assistant message. - pub fn add_assistant_message(&mut self, content: impl Into) { - self.messages.push(Message::assistant(content)); - self.last_activity_at = Timestamp::now(); - } - - /// Adds a tool result message. - pub fn add_tool_message(&mut self, tool_call_id: Uuid, content: impl Into) { - self.messages.push(Message::tool(tool_call_id, content)); - self.last_activity_at = Timestamp::now(); - } - - /// Adds a proposed edit. - pub fn add_proposed_edit(&mut self, edit: ProposedEdit) { - self.pending_edits.push(edit); - self.last_activity_at = Timestamp::now(); - } - - /// Checks if an edit should be auto-applied. - pub fn should_auto_apply(&self, edit: &ProposedEdit) -> bool { - let op_type = edit.operation_type(); - let context = AutoApplyContext::new(op_type) - .with_idempotent(edit.is_idempotent()) - .with_auto_applied_count(self.auto_applied_count) - .with_approval_count(self.approval_history.approval_count(op_type)); - - self.apply_policy.should_auto_apply(&context) - } - - /// Records that an edit was auto-applied. - pub fn record_auto_apply(&mut self) { - self.auto_applied_count += 1; - } - - /// Returns the approval history. - pub fn approval_history(&self) -> &ApprovalHistory { - &self.approval_history - } - - /// Applies pending edits by ID. - /// - /// This also records the approval in the history for learning policies. - pub fn apply_edits(&mut self, edit_ids: &[Uuid]) -> Result { - let mut applied = Vec::new(); - let mut not_found = Vec::new(); - - for id in edit_ids { - if let Some(pos) = self.pending_edits.iter().position(|e| e.id() == *id) { - let edit = self.pending_edits.remove(pos); - // Record approval for learning policies - self.approval_history.record_approval(edit.operation_type()); - applied.push(edit); - self.applied_edits.push(*id); - } else { - not_found.push(*id); - } - } - - self.last_activity_at = Timestamp::now(); - - Ok(ApplyResult { - applied, - not_found, - errors: Vec::new(), - }) - } - - /// Rejects pending edits by ID. - pub fn reject_edits(&mut self, edit_ids: &[Uuid]) { - for id in edit_ids { - if let Some(pos) = self.pending_edits.iter().position(|e| e.id() == *id) { - self.pending_edits.remove(pos); - self.rejected_edits.push(*id); - } - } - self.last_activity_at = Timestamp::now(); - } - - /// Touches the session to update last activity time. - pub fn touch(&mut self) { - self.last_activity_at = Timestamp::now(); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::session::message::MessageRole; - - fn test_request() -> CreateSession { - CreateSession::new(Uuid::now_v7(), Uuid::now_v7(), Uuid::now_v7()) - } - - #[test] - fn session_creation() { - let session = Session::new(test_request()); - assert!(!session.id().is_nil()); - assert!(session.messages().is_empty()); - assert!(session.pending_edits().is_empty()); - } - - #[test] - fn session_add_messages() { - let mut session = Session::new(test_request()); - - session.add_user_message("Hello"); - session.add_assistant_message("Hi there!"); - - assert_eq!(session.messages().len(), 2); - assert_eq!(session.messages()[0].role(), MessageRole::User); - assert_eq!(session.messages()[1].role(), MessageRole::Assistant); - } -} diff --git a/crates/nvisy-rig/src/session/policy.rs b/crates/nvisy-rig/src/session/policy.rs deleted file mode 100644 index 4719355..0000000 --- a/crates/nvisy-rig/src/session/policy.rs +++ /dev/null @@ -1,352 +0,0 @@ -//! Auto-apply policies for edit approval. - -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; - -/// Context for auto-apply decisions. -#[derive(Debug, Clone)] -pub struct AutoApplyContext { - /// Number of edits already auto-applied in this session. - pub auto_applied_count: usize, - - /// Whether the edit is idempotent. - pub is_idempotent: bool, - - /// The operation type being evaluated (e.g., "replace", "insert", "delete"). - pub operation_type: String, - - /// Number of times user has approved this operation type in this session. - pub approval_count_for_type: usize, -} - -impl AutoApplyContext { - /// Creates a new context for auto-apply evaluation. - pub fn new(operation_type: impl Into) -> Self { - Self { - auto_applied_count: 0, - is_idempotent: false, - operation_type: operation_type.into(), - approval_count_for_type: 0, - } - } - - /// Sets whether the operation is idempotent. - pub fn with_idempotent(mut self, is_idempotent: bool) -> Self { - self.is_idempotent = is_idempotent; - self - } - - /// Sets the number of auto-applied edits in the session. - pub fn with_auto_applied_count(mut self, count: usize) -> Self { - self.auto_applied_count = count; - self - } - - /// Sets the approval count for this operation type. - pub fn with_approval_count(mut self, count: usize) -> Self { - self.approval_count_for_type = count; - self - } -} - -/// Tracks approval history per operation type. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct ApprovalHistory { - /// Count of approvals per operation type. - approvals: HashMap, -} - -impl ApprovalHistory { - /// Creates a new empty approval history. - pub fn new() -> Self { - Self::default() - } - - /// Records an approval for the given operation type. - pub fn record_approval(&mut self, operation_type: &str) { - *self - .approvals - .entry(operation_type.to_string()) - .or_insert(0) += 1; - } - - /// Returns the approval count for the given operation type. - pub fn approval_count(&self, operation_type: &str) -> usize { - self.approvals.get(operation_type).copied().unwrap_or(0) - } - - /// Clears all approval history. - pub fn clear(&mut self) { - self.approvals.clear(); - } -} - -/// Policy for automatically applying edits. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -#[serde(tag = "mode", rename_all = "snake_case")] -pub enum ApplyPolicy { - /// Never auto-apply, always require approval. - #[default] - RequireApproval, - - /// Auto-apply idempotent operations only. - IdempotentOnly, - - /// Auto-apply after user approves similar operations. - LearnFromApproval, - - /// Auto-apply all edits (dangerous). - AutoApplyAll, - - /// Custom policy with specific rules. - Custom(CustomPolicy), -} - -impl ApplyPolicy { - /// Creates a policy that requires approval for everything. - pub fn require_approval() -> Self { - Self::RequireApproval - } - - /// Creates a policy that auto-applies idempotent operations. - pub fn idempotent_only() -> Self { - Self::IdempotentOnly - } - - /// Creates a policy that learns from user approvals. - pub fn learn_from_approval() -> Self { - Self::LearnFromApproval - } - - /// Creates a policy that auto-applies everything. - /// - /// # Warning - /// This is dangerous and should only be used for testing - /// or when the user explicitly opts in. - pub fn auto_apply_all() -> Self { - Self::AutoApplyAll - } - - /// Determines if an edit should be auto-applied. - pub fn should_auto_apply(&self, context: &AutoApplyContext) -> bool { - match self { - Self::RequireApproval => false, - Self::IdempotentOnly => context.is_idempotent, - Self::LearnFromApproval => { - // Auto-apply if idempotent OR if user has approved at least one similar edit - context.is_idempotent || context.approval_count_for_type > 0 - } - Self::AutoApplyAll => true, - Self::Custom(policy) => policy.should_auto_apply(context), - } - } -} - -/// Custom auto-apply policy with fine-grained rules. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CustomPolicy { - /// Auto-apply idempotent operations. - pub auto_apply_idempotent: bool, - - /// Auto-apply after N similar approvals for the same operation type. - pub learn_threshold: Option, - - /// Maximum edits to auto-apply per session. - pub max_auto_apply: Option, - - /// Allowed operation types for auto-apply. - /// If empty, all operation types are considered. - pub allowed_operations: Vec, -} - -impl CustomPolicy { - /// Creates a custom policy that auto-applies idempotent operations. - pub fn idempotent_only() -> Self { - Self { - auto_apply_idempotent: true, - learn_threshold: None, - max_auto_apply: None, - allowed_operations: Vec::new(), - } - } - - /// Creates a custom policy that learns from approvals. - pub fn learning(threshold: usize) -> Self { - Self { - auto_apply_idempotent: true, - learn_threshold: Some(threshold), - max_auto_apply: None, - allowed_operations: Vec::new(), - } - } - - /// Sets the maximum number of auto-applied edits. - pub fn with_max_auto_apply(mut self, max: usize) -> Self { - self.max_auto_apply = Some(max); - self - } - - /// Sets the allowed operation types. - pub fn with_allowed_operations(mut self, operations: Vec) -> Self { - self.allowed_operations = operations; - self - } - - /// Determines if an edit should be auto-applied. - pub fn should_auto_apply(&self, context: &AutoApplyContext) -> bool { - // Check max auto-apply limit - if let Some(max) = self.max_auto_apply - && context.auto_applied_count >= max - { - return false; - } - - // Check if operation type is allowed (empty = all allowed) - if !self.allowed_operations.is_empty() - && !self.allowed_operations.contains(&context.operation_type) - { - return false; - } - - // Check idempotent rule - if self.auto_apply_idempotent && context.is_idempotent { - return true; - } - - // Check learn threshold - if let Some(threshold) = self.learn_threshold - && context.approval_count_for_type >= threshold - { - return true; - } - - false - } -} - -impl Default for CustomPolicy { - fn default() -> Self { - Self { - auto_apply_idempotent: true, - learn_threshold: Some(2), - max_auto_apply: Some(10), - allowed_operations: Vec::new(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn context_for(op_type: &str) -> AutoApplyContext { - AutoApplyContext::new(op_type) - } - - #[test] - fn require_approval_never_auto_applies() { - let policy = ApplyPolicy::require_approval(); - let context = context_for("replace") - .with_idempotent(true) - .with_approval_count(10); - - assert!(!policy.should_auto_apply(&context)); - } - - #[test] - fn idempotent_only_checks_idempotency() { - let policy = ApplyPolicy::idempotent_only(); - - let idempotent = context_for("replace").with_idempotent(true); - let non_idempotent = context_for("replace").with_idempotent(false); - - assert!(policy.should_auto_apply(&idempotent)); - assert!(!policy.should_auto_apply(&non_idempotent)); - } - - #[test] - fn learn_from_approval() { - let policy = ApplyPolicy::learn_from_approval(); - - // Non-idempotent with no approvals - should not auto-apply - let no_approvals = context_for("delete"); - assert!(!policy.should_auto_apply(&no_approvals)); - - // Non-idempotent with approvals - should auto-apply - let with_approvals = context_for("delete").with_approval_count(1); - assert!(policy.should_auto_apply(&with_approvals)); - - // Idempotent without approvals - should still auto-apply - let idempotent = context_for("insert").with_idempotent(true); - assert!(policy.should_auto_apply(&idempotent)); - } - - #[test] - fn auto_apply_all() { - let policy = ApplyPolicy::auto_apply_all(); - let context = context_for("delete"); - - assert!(policy.should_auto_apply(&context)); - } - - #[test] - fn custom_policy_max_limit() { - let policy = CustomPolicy::default().with_max_auto_apply(5); - - // Under limit - let under_limit = context_for("replace") - .with_idempotent(true) - .with_auto_applied_count(4); - assert!(policy.should_auto_apply(&under_limit)); - - // At limit - let at_limit = context_for("replace") - .with_idempotent(true) - .with_auto_applied_count(5); - assert!(!policy.should_auto_apply(&at_limit)); - } - - #[test] - fn custom_policy_learn_threshold() { - let policy = CustomPolicy::learning(3); - - // Below threshold - let below = context_for("delete").with_approval_count(2); - assert!(!policy.should_auto_apply(&below)); - - // At threshold - let at_threshold = context_for("delete").with_approval_count(3); - assert!(policy.should_auto_apply(&at_threshold)); - } - - #[test] - fn custom_policy_allowed_operations() { - let policy = CustomPolicy::idempotent_only() - .with_allowed_operations(vec!["insert".to_string(), "replace".to_string()]); - - // Allowed operation - let allowed = context_for("insert").with_idempotent(true); - assert!(policy.should_auto_apply(&allowed)); - - // Disallowed operation - let disallowed = context_for("delete").with_idempotent(true); - assert!(!policy.should_auto_apply(&disallowed)); - } - - #[test] - fn approval_history_tracking() { - let mut history = ApprovalHistory::new(); - - assert_eq!(history.approval_count("replace"), 0); - - history.record_approval("replace"); - assert_eq!(history.approval_count("replace"), 1); - - history.record_approval("replace"); - history.record_approval("insert"); - assert_eq!(history.approval_count("replace"), 2); - assert_eq!(history.approval_count("insert"), 1); - assert_eq!(history.approval_count("delete"), 0); - } -} diff --git a/crates/nvisy-rig/src/session/store.rs b/crates/nvisy-rig/src/session/store.rs deleted file mode 100644 index b0011ba..0000000 --- a/crates/nvisy-rig/src/session/store.rs +++ /dev/null @@ -1,97 +0,0 @@ -//! Session storage backed by NATS KV. -//! -//! This module provides session persistence using the NATS KV store -//! from nvisy-nats. Sessions are automatically expired based on TTL. - -use std::time::Duration; - -use derive_more::{Deref, DerefMut}; -use nvisy_nats::NatsClient; -use nvisy_nats::kv::{ChatHistoryBucket, KvStore, SessionKey}; - -use super::Session; -use crate::Result; - -/// Type alias for session KV store. -type SessionKvStore = KvStore; - -/// Session store backed by NATS KV. -/// -/// This is a thin wrapper around `KvStore` -/// that provides session persistence for rig agents. -/// -/// This type is cheap to clone and can be shared across threads. -#[derive(Clone, Deref, DerefMut)] -pub struct SessionStore { - #[deref] - #[deref_mut] - inner: SessionKvStore, -} - -impl SessionStore { - /// Creates a new session store with default TTL (30 minutes). - pub async fn new(nats: NatsClient) -> Result { - let inner = nats - .chat_history_store() - .await - .map_err(|e| crate::Error::session(format!("failed to create store: {e}")))?; - Ok(Self { inner }) - } - - /// Creates a session store with custom TTL. - pub async fn with_ttl(nats: NatsClient, ttl: Duration) -> Result { - let inner = nats - .chat_history_store_with_ttl(ttl) - .await - .map_err(|e| crate::Error::session(format!("failed to create store: {e}")))?; - Ok(Self { inner }) - } - - /// Creates a new session. - pub async fn create(&self, session: &Session) -> Result<()> { - let key = SessionKey::from(session.id()); - self.inner - .put(&key, session) - .await - .map_err(|e| crate::Error::session(format!("failed to create: {e}")))?; - Ok(()) - } - - /// Gets a session by ID. - pub async fn get(&self, session_id: uuid::Uuid) -> Result> { - let key = SessionKey::from(session_id); - self.inner - .get_value(&key) - .await - .map_err(|e| crate::Error::session(format!("failed to get: {e}"))) - } - - /// Updates an existing session (also resets TTL). - pub async fn update(&self, session: &Session) -> Result<()> { - let key = SessionKey::from(session.id()); - self.inner - .put(&key, session) - .await - .map_err(|e| crate::Error::session(format!("failed to update: {e}")))?; - Ok(()) - } - - /// Touches a session to reset its TTL. - pub async fn touch(&self, session_id: uuid::Uuid) -> Result<()> { - let key = SessionKey::from(session_id); - self.inner - .touch(&key) - .await - .map_err(|e| crate::Error::session(format!("failed to touch: {e}")))?; - Ok(()) - } - - /// Deletes a session. - pub async fn delete(&self, session_id: uuid::Uuid) -> Result<()> { - let key = SessionKey::from(session_id); - self.inner - .delete(&key) - .await - .map_err(|e| crate::Error::session(format!("failed to delete: {e}"))) - } -} diff --git a/crates/nvisy-rig/src/tool/definition.rs b/crates/nvisy-rig/src/tool/definition.rs deleted file mode 100644 index 7cd7b04..0000000 --- a/crates/nvisy-rig/src/tool/definition.rs +++ /dev/null @@ -1,312 +0,0 @@ -//! Tool definitions and schemas. - -use serde::{Deserialize, Serialize}; -use serde_json::Value; - -/// Definition of a tool available to the agent. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ToolDefinition { - /// Unique name of the tool. - name: String, - - /// Human-readable description. - description: String, - - /// JSON Schema for the tool's parameters. - parameters: Value, - - /// Whether the tool is idempotent. - idempotent: bool, - - /// Whether the tool requires user confirmation. - requires_confirmation: bool, -} - -impl ToolDefinition { - /// Creates a new tool definition. - pub fn new(name: impl Into, description: impl Into, parameters: Value) -> Self { - Self { - name: name.into(), - description: description.into(), - parameters, - idempotent: false, - requires_confirmation: false, - } - } - - /// Marks the tool as idempotent. - pub fn idempotent(mut self) -> Self { - self.idempotent = true; - self - } - - /// Marks the tool as requiring confirmation. - pub fn with_confirmation(mut self) -> Self { - self.requires_confirmation = true; - self - } - - /// Returns the tool name. - pub fn name(&self) -> &str { - &self.name - } - - /// Returns the tool description. - pub fn description(&self) -> &str { - &self.description - } - - /// Returns the parameter schema. - pub fn parameters(&self) -> &Value { - &self.parameters - } - - /// Returns whether the tool is idempotent. - pub fn is_idempotent(&self) -> bool { - self.idempotent - } - - /// Returns whether the tool requires confirmation. - pub fn requires_confirmation(&self) -> bool { - self.requires_confirmation - } - - /// Converts to OpenAI function format. - pub fn to_openai_function(&self) -> Value { - serde_json::json!({ - "type": "function", - "function": { - "name": self.name, - "description": self.description, - "parameters": self.parameters - } - }) - } - - /// Converts to Anthropic tool format. - pub fn to_anthropic_tool(&self) -> Value { - serde_json::json!({ - "name": self.name, - "description": self.description, - "input_schema": self.parameters - }) - } -} - -/// Builder for common tool definitions. -pub struct ToolBuilder; - -impl ToolBuilder { - /// Creates the search tool definition. - pub fn search() -> ToolDefinition { - ToolDefinition::new( - "search", - "Search for content within the document", - serde_json::json!({ - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "The search query" - }, - "max_results": { - "type": "integer", - "description": "Maximum number of results to return", - "default": 5 - } - }, - "required": ["query"] - }), - ) - .idempotent() - } - - /// Creates the read tool definition. - pub fn read() -> ToolDefinition { - ToolDefinition::new( - "read", - "Read a specific section or page of the document", - serde_json::json!({ - "type": "object", - "properties": { - "section": { - "type": "string", - "description": "Section identifier (page number, heading, etc.)" - }, - "range": { - "type": "object", - "properties": { - "start": { "type": "integer" }, - "end": { "type": "integer" } - }, - "description": "Page range to read" - } - } - }), - ) - .idempotent() - } - - /// Creates the extract tool definition. - pub fn extract() -> ToolDefinition { - ToolDefinition::new( - "extract", - "Extract a specific element from the document (table, figure, etc.)", - serde_json::json!({ - "type": "object", - "properties": { - "element_type": { - "type": "string", - "enum": ["table", "figure", "section", "list", "code"], - "description": "Type of element to extract" - }, - "identifier": { - "type": "string", - "description": "Element identifier (e.g., 'Table 12.6', 'Figure 3')" - }, - "format": { - "type": "string", - "enum": ["markdown", "json", "csv", "text"], - "description": "Output format for the extracted content" - } - }, - "required": ["element_type", "identifier"] - }), - ) - .idempotent() - } - - /// Creates the edit tool definition. - pub fn edit() -> ToolDefinition { - ToolDefinition::new( - "edit", - "Edit content in the document", - serde_json::json!({ - "type": "object", - "properties": { - "location": { - "type": "object", - "properties": { - "page": { "type": "integer" }, - "section": { "type": "string" }, - "offset": { "type": "integer" } - }, - "description": "Location of the content to edit" - }, - "original": { - "type": "string", - "description": "Original content to replace" - }, - "replacement": { - "type": "string", - "description": "New content" - }, - "reason": { - "type": "string", - "description": "Reason for the edit" - } - }, - "required": ["location", "original", "replacement"] - }), - ) - .with_confirmation() - } - - /// Creates the insert tool definition. - pub fn insert() -> ToolDefinition { - ToolDefinition::new( - "insert", - "Insert new content into the document", - serde_json::json!({ - "type": "object", - "properties": { - "location": { - "type": "object", - "properties": { - "page": { "type": "integer" }, - "section": { "type": "string" }, - "position": { - "type": "string", - "enum": ["before", "after", "start", "end"] - } - }, - "description": "Where to insert the content" - }, - "content": { - "type": "string", - "description": "Content to insert" - }, - "reason": { - "type": "string", - "description": "Reason for the insertion" - } - }, - "required": ["location", "content"] - }), - ) - .with_confirmation() - } - - /// Creates the redact tool definition. - pub fn redact() -> ToolDefinition { - ToolDefinition::new( - "redact", - "Redact sensitive information from the document", - serde_json::json!({ - "type": "object", - "properties": { - "pattern": { - "type": "string", - "description": "Pattern to match for redaction (regex supported)" - }, - "category": { - "type": "string", - "enum": ["pii", "financial", "medical", "legal", "custom"], - "description": "Category of information to redact" - }, - "replacement": { - "type": "string", - "description": "Replacement text (default: [REDACTED])" - }, - "preview": { - "type": "boolean", - "description": "If true, return matches without redacting", - "default": false - } - }, - "required": ["category"] - }), - ) - .with_confirmation() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tool_builder_search() { - let tool = ToolBuilder::search(); - assert_eq!(tool.name(), "search"); - assert!(tool.is_idempotent()); - assert!(!tool.requires_confirmation()); - } - - #[test] - fn tool_builder_edit() { - let tool = ToolBuilder::edit(); - assert_eq!(tool.name(), "edit"); - assert!(!tool.is_idempotent()); - assert!(tool.requires_confirmation()); - } - - #[test] - fn tool_to_openai_format() { - let tool = ToolBuilder::search(); - let openai = tool.to_openai_function(); - - assert_eq!(openai["type"], "function"); - assert_eq!(openai["function"]["name"], "search"); - } -} diff --git a/crates/nvisy-rig/src/tool/edit/mod.rs b/crates/nvisy-rig/src/tool/edit/mod.rs deleted file mode 100644 index db9aa38..0000000 --- a/crates/nvisy-rig/src/tool/edit/mod.rs +++ /dev/null @@ -1,102 +0,0 @@ -//! Edit module for document modifications. -//! -//! This module handles: -//! - Proposed edits from the agent -//! - Edit approval and rejection -//! - Edit application to documents -//! - Edit preview generation - -mod operation; -mod proposed; - -pub use operation::{EditLocation, EditOperation}; -pub use proposed::ProposedEdit; -use uuid::Uuid; - -/// Result of applying edits. -#[derive(Debug, Clone)] -pub struct ApplyResult { - /// Successfully applied edits. - pub applied: Vec, - - /// Edits that were not found. - pub not_found: Vec, - - /// Edits that failed to apply. - pub errors: Vec, -} - -impl ApplyResult { - /// Returns whether all edits were applied successfully. - pub fn is_success(&self) -> bool { - self.not_found.is_empty() && self.errors.is_empty() - } - - /// Returns the count of successfully applied edits. - pub fn applied_count(&self) -> usize { - self.applied.len() - } - - /// Returns the count of failed edits. - pub fn failed_count(&self) -> usize { - self.not_found.len() + self.errors.len() - } -} - -/// Error that occurred while applying an edit. -#[derive(Debug, Clone)] -pub struct ApplyError { - /// The edit ID that failed. - pub edit_id: Uuid, - - /// Error message. - pub message: String, - - /// Whether the error is recoverable. - pub recoverable: bool, -} - -impl ApplyError { - /// Creates a new apply error. - pub fn new(edit_id: Uuid, message: impl Into) -> Self { - Self { - edit_id, - message: message.into(), - recoverable: false, - } - } - - /// Marks the error as recoverable. - pub fn recoverable(mut self) -> Self { - self.recoverable = true; - self - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn apply_result_success() { - let result = ApplyResult { - applied: vec![], - not_found: vec![], - errors: vec![], - }; - - assert!(result.is_success()); - } - - #[test] - fn apply_result_with_errors() { - let result = ApplyResult { - applied: vec![], - not_found: vec![Uuid::now_v7()], - errors: vec![], - }; - - assert!(!result.is_success()); - assert_eq!(result.failed_count(), 1); - } -} diff --git a/crates/nvisy-rig/src/tool/edit/operation.rs b/crates/nvisy-rig/src/tool/edit/operation.rs deleted file mode 100644 index a03ec99..0000000 --- a/crates/nvisy-rig/src/tool/edit/operation.rs +++ /dev/null @@ -1,164 +0,0 @@ -//! Edit operations and locations. - -use serde::{Deserialize, Serialize}; - -/// Location within a document for an edit. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct EditLocation { - /// Page number (1-indexed). - pub page: Option, - - /// Section identifier. - pub section: Option, - - /// Character offset from start. - pub offset: Option, - - /// Length of the affected region. - pub length: Option, -} - -impl EditLocation { - /// Creates a page-based location. - pub fn page(page: u32) -> Self { - Self { - page: Some(page), - section: None, - offset: None, - length: None, - } - } - - /// Creates a section-based location. - pub fn section(section: impl Into) -> Self { - Self { - page: None, - section: Some(section.into()), - offset: None, - length: None, - } - } - - /// Creates an offset-based location. - pub fn offset(offset: usize, length: usize) -> Self { - Self { - page: None, - section: None, - offset: Some(offset), - length: Some(length), - } - } - - /// Adds page information. - pub fn with_page(mut self, page: u32) -> Self { - self.page = Some(page); - self - } - - /// Adds section information. - pub fn with_section(mut self, section: impl Into) -> Self { - self.section = Some(section.into()); - self - } - - /// Returns a display string for the location. - pub fn display(&self) -> String { - let mut parts = Vec::new(); - - if let Some(page) = self.page { - parts.push(format!("page {page}")); - } - - if let Some(section) = &self.section { - parts.push(format!("'{section}'")); - } - - if let Some(offset) = self.offset { - if let Some(length) = self.length { - parts.push(format!("offset {offset}..{}", offset + length)); - } else { - parts.push(format!("offset {offset}")); - } - } - - if parts.is_empty() { - "unspecified location".to_string() - } else { - parts.join(", ") - } - } -} - -/// Type of edit operation. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum EditOperation { - /// Replace existing content. - Replace, - - /// Insert new content. - Insert, - - /// Delete content. - Delete, - - /// Redact content (replace with placeholder). - Redact, - - /// Extract content (copy without modifying). - Extract, -} - -impl EditOperation { - /// Returns whether this operation is idempotent. - pub fn is_idempotent(&self) -> bool { - matches!(self, Self::Extract) - } - - /// Returns whether this operation modifies the document. - pub fn is_destructive(&self) -> bool { - matches!( - self, - Self::Replace | Self::Insert | Self::Delete | Self::Redact - ) - } - - /// Returns a human-readable name. - pub fn name(&self) -> &'static str { - match self { - Self::Replace => "replace", - Self::Insert => "insert", - Self::Delete => "delete", - Self::Redact => "redact", - Self::Extract => "extract", - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn edit_location_display() { - let loc = EditLocation::page(5).with_section("Introduction"); - let display = loc.display(); - - assert!(display.contains("page 5")); - assert!(display.contains("Introduction")); - } - - #[test] - fn edit_operation_idempotency() { - assert!(EditOperation::Extract.is_idempotent()); - assert!(!EditOperation::Replace.is_idempotent()); - assert!(!EditOperation::Delete.is_idempotent()); - } - - #[test] - fn edit_operation_destructive() { - assert!(EditOperation::Replace.is_destructive()); - assert!(EditOperation::Delete.is_destructive()); - assert!(!EditOperation::Extract.is_destructive()); - } -} diff --git a/crates/nvisy-rig/src/tool/edit/proposed.rs b/crates/nvisy-rig/src/tool/edit/proposed.rs deleted file mode 100644 index d73c803..0000000 --- a/crates/nvisy-rig/src/tool/edit/proposed.rs +++ /dev/null @@ -1,305 +0,0 @@ -//! Proposed edit types. - -use jiff::Timestamp; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::{EditLocation, EditOperation}; - -/// A proposed edit to a document. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProposedEdit { - /// Unique edit ID. - id: Uuid, - - /// Document being edited. - document_id: Uuid, - - /// Type of operation. - operation: EditOperation, - - /// Location of the edit. - location: EditLocation, - - /// Original content (for replace/delete). - original: Option, - - /// New content (for replace/insert). - replacement: Option, - - /// Reason for the edit. - reason: String, - - /// Preview of the result. - preview: Option, - - /// When the edit was proposed. - proposed_at: Timestamp, - - /// Current status. - status: EditStatus, -} - -impl ProposedEdit { - /// Creates a new proposed edit. - pub fn new( - document_id: Uuid, - operation: EditOperation, - location: EditLocation, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation, - location, - original: None, - replacement: None, - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Creates a replace edit. - pub fn replace( - document_id: Uuid, - location: EditLocation, - original: impl Into, - replacement: impl Into, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation: EditOperation::Replace, - location, - original: Some(original.into()), - replacement: Some(replacement.into()), - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Creates an insert edit. - pub fn insert( - document_id: Uuid, - location: EditLocation, - content: impl Into, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation: EditOperation::Insert, - location, - original: None, - replacement: Some(content.into()), - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Creates a delete edit. - pub fn delete( - document_id: Uuid, - location: EditLocation, - content: impl Into, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation: EditOperation::Delete, - location, - original: Some(content.into()), - replacement: None, - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Creates a redact edit. - pub fn redact( - document_id: Uuid, - location: EditLocation, - content: impl Into, - reason: impl Into, - ) -> Self { - Self { - id: Uuid::now_v7(), - document_id, - operation: EditOperation::Redact, - location, - original: Some(content.into()), - replacement: Some("[REDACTED]".to_string()), - reason: reason.into(), - preview: None, - proposed_at: Timestamp::now(), - status: EditStatus::Pending, - } - } - - /// Adds a preview. - pub fn with_preview(mut self, preview: impl Into) -> Self { - self.preview = Some(preview.into()); - self - } - - /// Returns the edit ID. - pub fn id(&self) -> Uuid { - self.id - } - - /// Returns the document ID. - pub fn document_id(&self) -> Uuid { - self.document_id - } - - /// Returns the operation type. - pub fn operation(&self) -> EditOperation { - self.operation - } - - /// Returns the operation type as a string. - pub fn operation_type(&self) -> &'static str { - self.operation.name() - } - - /// Returns the location. - pub fn location(&self) -> &EditLocation { - &self.location - } - - /// Returns the original content. - pub fn original(&self) -> Option<&str> { - self.original.as_deref() - } - - /// Returns the replacement content. - pub fn replacement(&self) -> Option<&str> { - self.replacement.as_deref() - } - - /// Returns the reason. - pub fn reason(&self) -> &str { - &self.reason - } - - /// Returns the preview. - pub fn preview(&self) -> Option<&str> { - self.preview.as_deref() - } - - /// Returns when the edit was proposed. - pub fn proposed_at(&self) -> Timestamp { - self.proposed_at - } - - /// Returns the current status. - pub fn status(&self) -> EditStatus { - self.status - } - - /// Returns whether this operation is idempotent. - pub fn is_idempotent(&self) -> bool { - self.operation.is_idempotent() - } - - /// Returns whether this edit is pending. - pub fn is_pending(&self) -> bool { - self.status == EditStatus::Pending - } - - /// Marks the edit as applied. - pub fn mark_applied(&mut self) { - self.status = EditStatus::Applied; - } - - /// Marks the edit as rejected. - pub fn mark_rejected(&mut self) { - self.status = EditStatus::Rejected; - } - - /// Returns a summary of the edit for display. - pub fn summary(&self) -> String { - format!( - "{} at {}: {}", - self.operation.name(), - self.location.display(), - self.reason - ) - } -} - -/// Status of a proposed edit. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum EditStatus { - /// Awaiting user approval. - Pending, - - /// Approved and applied. - Applied, - - /// Rejected by user. - Rejected, - - /// Failed to apply. - Failed, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn proposed_edit_replace() { - let edit = ProposedEdit::replace( - Uuid::now_v7(), - EditLocation::page(1), - "old text", - "new text", - "fixing typo", - ); - - assert_eq!(edit.operation(), EditOperation::Replace); - assert_eq!(edit.original(), Some("old text")); - assert_eq!(edit.replacement(), Some("new text")); - assert!(edit.is_pending()); - } - - #[test] - fn proposed_edit_redact() { - let edit = ProposedEdit::redact( - Uuid::now_v7(), - EditLocation::page(5), - "SSN: 123-45-6789", - "removing PII", - ); - - assert_eq!(edit.operation(), EditOperation::Redact); - assert_eq!(edit.replacement(), Some("[REDACTED]")); - } - - #[test] - fn proposed_edit_summary() { - let edit = ProposedEdit::delete( - Uuid::now_v7(), - EditLocation::section("Appendix"), - "old content", - "removing outdated section", - ); - - let summary = edit.summary(); - assert!(summary.contains("delete")); - assert!(summary.contains("Appendix")); - } -} diff --git a/crates/nvisy-rig/src/tool/mod.rs b/crates/nvisy-rig/src/tool/mod.rs deleted file mode 100644 index 8bca68a..0000000 --- a/crates/nvisy-rig/src/tool/mod.rs +++ /dev/null @@ -1,148 +0,0 @@ -//! Tool module for agent capabilities. -//! -//! This module provides tool support for agents, building on rig-core's -//! tool infrastructure while adding document-specific tools and registry. -//! -//! ## Rig-core integration -//! -//! We re-export key types from rig-core: -//! - [`rig::tool::Tool`] - The core tool trait -//! - [`rig::tool::ToolDyn`] - Dynamic dispatch wrapper -//! - [`rig::completion::ToolDefinition`] - Tool schema definition -//! -//! ## Document tools -//! -//! Pre-built tools for document processing: -//! - `search` - Search document content -//! - `read` - Read specific sections -//! - `extract` - Extract elements (tables, figures) -//! - `edit` - Modify document content -//! - `insert` - Add new content -//! - `redact` - Redact sensitive information -//! -//! ## Submodules -//! -//! - [`edit`] - Proposed edits and edit operations - -mod definition; -pub mod edit; -mod registry; -mod types; - -// Re-export rig-core tool types -// Our extensions -pub use definition::ToolDefinition; -pub use registry::ToolRegistry; -pub use rig::tool::{Tool, ToolDyn, ToolError}; -use serde::{Deserialize, Serialize}; -pub use types::{ToolInput, ToolOutput}; -use uuid::Uuid; - -/// A tool call made by the agent. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ToolCall { - /// Unique ID for this call. - pub id: Uuid, - - /// Name of the tool being called. - pub name: String, - - /// Arguments to the tool (JSON). - pub arguments: serde_json::Value, -} - -impl ToolCall { - /// Creates a new tool call. - pub fn new(name: impl Into, arguments: serde_json::Value) -> Self { - Self { - id: Uuid::now_v7(), - name: name.into(), - arguments, - } - } - - /// Returns whether this tool call is idempotent. - pub fn is_idempotent(&self) -> bool { - matches!(self.name.as_str(), "search" | "extract" | "read") - } - - /// Returns the arguments as a JSON string. - pub fn arguments_string(&self) -> String { - serde_json::to_string(&self.arguments).unwrap_or_default() - } -} - -/// Result of a tool execution. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ToolResult { - /// The call ID this result is for. - pub call_id: Uuid, - - /// Whether the call succeeded. - pub success: bool, - - /// Output from the tool. - pub output: ToolOutput, - - /// Error message if failed. - pub error: Option, -} - -impl ToolResult { - /// Creates a successful result. - pub fn success(call_id: Uuid, output: ToolOutput) -> Self { - Self { - call_id, - success: true, - output, - error: None, - } - } - - /// Creates a failed result. - pub fn failure(call_id: Uuid, error: impl Into) -> Self { - Self { - call_id, - success: false, - output: ToolOutput::empty(), - error: Some(error.into()), - } - } - - /// Creates a result from a rig tool error. - pub fn from_error(call_id: Uuid, error: ToolError) -> Self { - Self::failure(call_id, error.to_string()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tool_call_idempotency() { - let search = ToolCall::new("search", serde_json::json!({"query": "test"})); - let edit = ToolCall::new("edit", serde_json::json!({"content": "new"})); - - assert!(search.is_idempotent()); - assert!(!edit.is_idempotent()); - } - - #[test] - fn tool_result_success() { - let call_id = Uuid::now_v7(); - let result = ToolResult::success(call_id, ToolOutput::text("done")); - - assert!(result.success); - assert!(result.error.is_none()); - } - - #[test] - fn tool_result_failure() { - let call_id = Uuid::now_v7(); - let result = ToolResult::failure(call_id, "something went wrong"); - - assert!(!result.success); - assert_eq!(result.error, Some("something went wrong".to_string())); - } -} diff --git a/crates/nvisy-rig/src/tool/registry.rs b/crates/nvisy-rig/src/tool/registry.rs deleted file mode 100644 index 612bb99..0000000 --- a/crates/nvisy-rig/src/tool/registry.rs +++ /dev/null @@ -1,152 +0,0 @@ -//! Tool registry for managing available tools. - -use std::collections::HashMap; -use std::sync::Arc; - -use super::{ToolCall, ToolDefinition, ToolInput, ToolOutput, ToolResult}; -use crate::Result; - -/// Handler function for tool execution. -pub type ToolHandler = - Arc futures::future::BoxFuture<'static, Result> + Send + Sync>; - -/// Registry of available tools. -#[derive(Default)] -pub struct ToolRegistry { - definitions: HashMap, - handlers: HashMap, -} - -impl ToolRegistry { - /// Creates a new empty registry. - pub fn new() -> Self { - Self::default() - } - - /// Creates a registry with default tools. - pub fn with_defaults() -> Self { - use super::definition::ToolBuilder; - - let mut registry = Self::new(); - - // Register default tool definitions - registry.register_definition(ToolBuilder::search()); - registry.register_definition(ToolBuilder::read()); - registry.register_definition(ToolBuilder::extract()); - registry.register_definition(ToolBuilder::edit()); - registry.register_definition(ToolBuilder::insert()); - registry.register_definition(ToolBuilder::redact()); - - registry - } - - /// Registers a tool definition. - pub fn register_definition(&mut self, definition: ToolDefinition) { - self.definitions - .insert(definition.name().to_string(), definition); - } - - /// Registers a tool handler. - pub fn register_handler(&mut self, name: impl Into, handler: ToolHandler) { - self.handlers.insert(name.into(), handler); - } - - /// Registers both definition and handler. - pub fn register(&mut self, definition: ToolDefinition, handler: ToolHandler) { - let name = definition.name().to_string(); - self.definitions.insert(name.clone(), definition); - self.handlers.insert(name, handler); - } - - /// Returns a tool definition by name. - pub fn get_definition(&self, name: &str) -> Option<&ToolDefinition> { - self.definitions.get(name) - } - - /// Returns all tool definitions. - pub fn definitions(&self) -> impl Iterator { - self.definitions.values() - } - - /// Returns all tool definitions as a vector. - pub fn definitions_vec(&self) -> Vec { - self.definitions.values().cloned().collect() - } - - /// Returns whether a tool exists. - pub fn has_tool(&self, name: &str) -> bool { - self.definitions.contains_key(name) - } - - /// Returns whether a tool has a handler. - pub fn has_handler(&self, name: &str) -> bool { - self.handlers.contains_key(name) - } - - /// Executes a tool call. - pub async fn execute(&self, call: &ToolCall) -> ToolResult { - let Some(handler) = self.handlers.get(&call.name) else { - return ToolResult::failure(call.id, format!("tool '{}' not found", call.name)); - }; - - let input = ToolInput { - call_id: call.id, - arguments: call.arguments.clone(), - }; - - match handler(input).await { - Ok(output) => ToolResult::success(call.id, output), - Err(e) => ToolResult::failure(call.id, e.to_string()), - } - } - - /// Returns the number of registered tools. - pub fn len(&self) -> usize { - self.definitions.len() - } - - /// Returns whether the registry is empty. - pub fn is_empty(&self) -> bool { - self.definitions.is_empty() - } -} - -impl std::fmt::Debug for ToolRegistry { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ToolRegistry") - .field("definitions", &self.definitions.keys().collect::>()) - .field("handlers", &self.handlers.keys().collect::>()) - .finish() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn registry_with_defaults() { - let registry = ToolRegistry::with_defaults(); - - assert!(registry.has_tool("search")); - assert!(registry.has_tool("read")); - assert!(registry.has_tool("extract")); - assert!(registry.has_tool("edit")); - assert!(registry.has_tool("insert")); - assert!(registry.has_tool("redact")); - } - - #[test] - fn registry_register_definition() { - let mut registry = ToolRegistry::new(); - - registry.register_definition(ToolDefinition::new( - "custom", - "A custom tool", - serde_json::json!({}), - )); - - assert!(registry.has_tool("custom")); - assert!(!registry.has_handler("custom")); - } -} diff --git a/crates/nvisy-rig/src/tool/types.rs b/crates/nvisy-rig/src/tool/types.rs deleted file mode 100644 index c7a3a44..0000000 --- a/crates/nvisy-rig/src/tool/types.rs +++ /dev/null @@ -1,183 +0,0 @@ -//! Tool input/output types. - -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use uuid::Uuid; - -/// Input to a tool execution. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ToolInput { - /// The call ID. - pub call_id: Uuid, - - /// Arguments from the tool call. - pub arguments: Value, -} - -impl ToolInput { - /// Gets a string argument. - pub fn get_string(&self, key: &str) -> Option<&str> { - self.arguments.get(key).and_then(|v| v.as_str()) - } - - /// Gets an integer argument. - pub fn get_i64(&self, key: &str) -> Option { - self.arguments.get(key).and_then(|v| v.as_i64()) - } - - /// Gets a boolean argument. - pub fn get_bool(&self, key: &str) -> Option { - self.arguments.get(key).and_then(|v| v.as_bool()) - } - - /// Gets an object argument. - pub fn get_object(&self, key: &str) -> Option<&serde_json::Map> { - self.arguments.get(key).and_then(|v| v.as_object()) - } - - /// Gets an array argument. - pub fn get_array(&self, key: &str) -> Option<&Vec> { - self.arguments.get(key).and_then(|v| v.as_array()) - } - - /// Deserializes the arguments to a typed struct. - pub fn parse Deserialize<'de>>(&self) -> Result { - serde_json::from_value(self.arguments.clone()) - } -} - -/// Output from a tool execution. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "snake_case")] -pub enum ToolOutput { - /// Plain text output. - Text { content: String }, - - /// JSON output. - Json { data: Value }, - - /// Binary data (base64 encoded). - Binary { data: String, mime_type: String }, - - /// Proposed edit output. - Edit { - edit_id: Uuid, - description: String, - preview: Option, - }, - - /// Multiple outputs. - Multiple { outputs: Vec }, - - /// Empty output. - Empty, -} - -impl ToolOutput { - /// Creates a text output. - pub fn text(content: impl Into) -> Self { - Self::Text { - content: content.into(), - } - } - - /// Creates a JSON output. - pub fn json(data: Value) -> Self { - Self::Json { data } - } - - /// Creates a binary output. - pub fn binary(data: impl Into, mime_type: impl Into) -> Self { - Self::Binary { - data: data.into(), - mime_type: mime_type.into(), - } - } - - /// Creates an edit output. - pub fn edit(edit_id: Uuid, description: impl Into) -> Self { - Self::Edit { - edit_id, - description: description.into(), - preview: None, - } - } - - /// Creates an edit output with preview. - pub fn edit_with_preview( - edit_id: Uuid, - description: impl Into, - preview: impl Into, - ) -> Self { - Self::Edit { - edit_id, - description: description.into(), - preview: Some(preview.into()), - } - } - - /// Creates a multiple output. - pub fn multiple(outputs: Vec) -> Self { - Self::Multiple { outputs } - } - - /// Creates an empty output. - pub fn empty() -> Self { - Self::Empty - } - - /// Converts to a string representation for the LLM. - pub fn to_llm_string(&self) -> String { - match self { - Self::Text { content } => content.clone(), - Self::Json { data } => serde_json::to_string_pretty(data).unwrap_or_default(), - Self::Binary { mime_type, .. } => format!("[Binary data: {mime_type}]"), - Self::Edit { - edit_id, - description, - preview, - } => { - if let Some(p) = preview { - format!("[Edit proposed: {edit_id}]\n{description}\n\nPreview:\n{p}") - } else { - format!("[Edit proposed: {edit_id}]\n{description}") - } - } - Self::Multiple { outputs } => outputs - .iter() - .map(|o| o.to_llm_string()) - .collect::>() - .join("\n---\n"), - Self::Empty => "[No output]".to_string(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn tool_input_get_string() { - let input = ToolInput { - call_id: Uuid::now_v7(), - arguments: serde_json::json!({ - "query": "test", - "count": 5 - }), - }; - - assert_eq!(input.get_string("query"), Some("test")); - assert_eq!(input.get_i64("count"), Some(5)); - assert_eq!(input.get_string("missing"), None); - } - - #[test] - fn tool_output_to_llm_string() { - let text = ToolOutput::text("hello"); - assert_eq!(text.to_llm_string(), "hello"); - - let empty = ToolOutput::empty(); - assert_eq!(empty.to_llm_string(), "[No output]"); - } -} diff --git a/crates/nvisy-runtime/README.md b/crates/nvisy-runtime/README.md index b8814dd..7cf334b 100644 --- a/crates/nvisy-runtime/README.md +++ b/crates/nvisy-runtime/README.md @@ -1,5 +1,7 @@ # nvisy-runtime +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Workflow definitions and execution engine for Nvisy pipelines. This crate provides the core abstractions for defining and executing diff --git a/crates/nvisy-runtime/src/engine/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs index 9d36e29..f711db8 100644 --- a/crates/nvisy-runtime/src/engine/compiler.rs +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -12,7 +12,9 @@ use std::collections::HashMap; -use nvisy_rig::agent::Agents; +use nvisy_rig::agent::{ + StructuredOutputAgent, TableAgent, TextAnalysisAgent, TextGenerationAgent, VisionAgent, +}; use nvisy_rig::provider::CompletionProvider; use petgraph::graph::{DiGraph, NodeIndex}; @@ -346,25 +348,34 @@ impl<'a> WorkflowCompiler<'a> { ))) } Transformer::Enrich(e) => { - let agents = self.create_agents(&e.provider).await?; - Ok(CompiledTransform::Enrich(EnrichProcessor::new( - agents, + let provider = self.create_completion_provider(&e.provider).await?; + let vision_agent = VisionAgent::new(provider.clone(), false); + let table_agent = TableAgent::new(provider, false); + Ok(CompiledTransform::Enrich(Box::new(EnrichProcessor::new( + vision_agent, + table_agent, e.task.clone(), e.override_prompt.clone(), - ))) + )))) } Transformer::Extract(e) => { - let agents = self.create_agents(&e.provider).await?; - Ok(CompiledTransform::Extract(ExtractProcessor::new( - agents, + let provider = self.create_completion_provider(&e.provider).await?; + let text_analysis_agent = TextAnalysisAgent::new(provider.clone(), false); + let table_agent = TableAgent::new(provider.clone(), false); + let structured_output_agent = StructuredOutputAgent::new(provider, false); + Ok(CompiledTransform::Extract(Box::new(ExtractProcessor::new( + text_analysis_agent, + table_agent, + structured_output_agent, e.task.clone(), e.override_prompt.clone(), - ))) + )))) } Transformer::Derive(d) => { - let agents = self.create_agents(&d.provider).await?; + let provider = self.create_completion_provider(&d.provider).await?; + let agent = TextGenerationAgent::new(provider, false); Ok(CompiledTransform::Derive(DeriveProcessor::new( - agents, + agent, d.task, d.override_prompt.clone(), ))) @@ -384,12 +395,6 @@ impl<'a> WorkflowCompiler<'a> { .await } - /// Creates agents from completion provider parameters. - async fn create_agents(&self, params: &CompletionProviderParams) -> Result { - let provider = self.create_completion_provider(params).await?; - Ok(Agents::new(provider)) - } - /// Creates a completion provider from parameters. async fn create_completion_provider( &self, diff --git a/crates/nvisy-runtime/src/graph/transform/chunk.rs b/crates/nvisy-runtime/src/graph/transform/chunk.rs index a6c47ac..0c632b7 100644 --- a/crates/nvisy-runtime/src/graph/transform/chunk.rs +++ b/crates/nvisy-runtime/src/graph/transform/chunk.rs @@ -1,7 +1,7 @@ //! Chunk processor. use nvisy_dal::AnyDataValue; -use nvisy_rig::agent::Agents; +use nvisy_rig::agent::TextGenerationAgent; use super::Process; use crate::definition::ChunkStrategy; @@ -13,8 +13,8 @@ pub struct ChunkProcessor { strategy: ChunkStrategy, /// Whether to use LLM-powered contextual chunking. contextual_chunking: bool, - /// Agents for contextual chunking (if enabled). - agents: Option, + /// Agent for contextual chunking (if enabled). + agent: Option, } impl ChunkProcessor { @@ -23,16 +23,16 @@ impl ChunkProcessor { Self { strategy, contextual_chunking: false, - agents: None, + agent: None, } } /// Creates a new chunk processor with contextual chunking enabled. - pub fn with_contextual_chunking(strategy: ChunkStrategy, agents: Agents) -> Self { + pub fn with_contextual_chunking(strategy: ChunkStrategy, agent: TextGenerationAgent) -> Self { Self { strategy, contextual_chunking: true, - agents: Some(agents), + agent: Some(agent), } } @@ -60,7 +60,7 @@ impl std::fmt::Debug for ChunkProcessor { f.debug_struct("ChunkProcessor") .field("strategy", &self.strategy) .field("contextual_chunking", &self.contextual_chunking) - .field("has_agents", &self.agents.is_some()) + .field("has_agent", &self.agent.is_some()) .finish() } } diff --git a/crates/nvisy-runtime/src/graph/transform/derive.rs b/crates/nvisy-runtime/src/graph/transform/derive.rs index f8f8714..4de85fa 100644 --- a/crates/nvisy-runtime/src/graph/transform/derive.rs +++ b/crates/nvisy-runtime/src/graph/transform/derive.rs @@ -1,7 +1,7 @@ //! Derive processor. use nvisy_dal::AnyDataValue; -use nvisy_rig::agent::Agents; +use nvisy_rig::agent::TextGenerationAgent; use super::Process; use crate::definition::DeriveTask; @@ -9,8 +9,8 @@ use crate::error::Result; /// Processor for generating new content from input. pub struct DeriveProcessor { - /// Agents for derivation tasks. - agents: Agents, + /// Agent for text generation (summarization, titles). + agent: TextGenerationAgent, /// The derivation task to perform. task: DeriveTask, /// Optional prompt override. @@ -19,9 +19,13 @@ pub struct DeriveProcessor { impl DeriveProcessor { /// Creates a new derive processor. - pub fn new(agents: Agents, task: DeriveTask, override_prompt: Option) -> Self { + pub fn new( + agent: TextGenerationAgent, + task: DeriveTask, + override_prompt: Option, + ) -> Self { Self { - agents, + agent, task, override_prompt, } @@ -40,9 +44,9 @@ impl DeriveProcessor { impl Process for DeriveProcessor { async fn process(&self, input: Vec) -> Result> { - // TODO: Implement derivation using agents - // Use self.agents.text_generation_agent for summarization and title generation - let _ = &self.agents; // Suppress unused warning + // TODO: Implement derivation using agent + // Use self.agent for summarization and title generation + let _ = &self.agent; Ok(input) } } diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs index 5bafb1a..4fe6fa9 100644 --- a/crates/nvisy-runtime/src/graph/transform/enrich.rs +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -1,7 +1,7 @@ //! Enrich processor. use nvisy_dal::AnyDataValue; -use nvisy_rig::agent::Agents; +use nvisy_rig::agent::{TableAgent, VisionAgent}; use super::Process; use crate::definition::EnrichTask; @@ -9,8 +9,10 @@ use crate::error::Result; /// Processor for enriching elements with metadata/descriptions. pub struct EnrichProcessor { - /// Agents for enrichment tasks. - agents: Agents, + /// Agent for vision/image tasks. + vision_agent: VisionAgent, + /// Agent for table processing. + table_agent: TableAgent, /// The enrichment task to perform. task: EnrichTask, /// Optional prompt override. @@ -19,9 +21,15 @@ pub struct EnrichProcessor { impl EnrichProcessor { /// Creates a new enrich processor. - pub fn new(agents: Agents, task: EnrichTask, override_prompt: Option) -> Self { + pub fn new( + vision_agent: VisionAgent, + table_agent: TableAgent, + task: EnrichTask, + override_prompt: Option, + ) -> Self { Self { - agents, + vision_agent, + table_agent, task, override_prompt, } @@ -41,9 +49,9 @@ impl EnrichProcessor { impl Process for EnrichProcessor { async fn process(&self, input: Vec) -> Result> { // TODO: Implement enrichment using agents - // Use self.agents.vision_agent for image tasks - // Use self.agents.table_agent for table tasks - let _ = &self.agents; // Suppress unused warning + // Use self.vision_agent for image tasks + // Use self.table_agent for table tasks + let _ = (&self.vision_agent, &self.table_agent); // Suppress unused warning Ok(input) } } diff --git a/crates/nvisy-runtime/src/graph/transform/extract.rs b/crates/nvisy-runtime/src/graph/transform/extract.rs index 37aeb63..ee2864a 100644 --- a/crates/nvisy-runtime/src/graph/transform/extract.rs +++ b/crates/nvisy-runtime/src/graph/transform/extract.rs @@ -1,7 +1,7 @@ //! Extract processor. use nvisy_dal::AnyDataValue; -use nvisy_rig::agent::Agents; +use nvisy_rig::agent::{StructuredOutputAgent, TableAgent, TextAnalysisAgent}; use super::Process; use crate::definition::ExtractTask; @@ -9,8 +9,12 @@ use crate::error::Result; /// Processor for extracting structured data or converting formats. pub struct ExtractProcessor { - /// Agents for extraction tasks. - agents: Agents, + /// Agent for text analysis (NER, keywords, classification, sentiment). + text_analysis_agent: TextAnalysisAgent, + /// Agent for table processing. + table_agent: TableAgent, + /// Agent for structured output extraction. + structured_output_agent: StructuredOutputAgent, /// The extraction task to perform. task: ExtractTask, /// Optional prompt override. @@ -19,9 +23,17 @@ pub struct ExtractProcessor { impl ExtractProcessor { /// Creates a new extract processor. - pub fn new(agents: Agents, task: ExtractTask, override_prompt: Option) -> Self { + pub fn new( + text_analysis_agent: TextAnalysisAgent, + table_agent: TableAgent, + structured_output_agent: StructuredOutputAgent, + task: ExtractTask, + override_prompt: Option, + ) -> Self { Self { - agents, + text_analysis_agent, + table_agent, + structured_output_agent, task, override_prompt, } @@ -41,10 +53,14 @@ impl ExtractProcessor { impl Process for ExtractProcessor { async fn process(&self, input: Vec) -> Result> { // TODO: Implement extraction using agents - // Use self.agents.text_analysis_agent for NER, keywords, classification, sentiment - // Use self.agents.table_agent for table conversion - // Use self.agents.structured_output_agent for JSON conversion - let _ = &self.agents; // Suppress unused warning + // Use self.text_analysis_agent for NER, keywords, classification, sentiment + // Use self.table_agent for table conversion + // Use self.structured_output_agent for JSON conversion + let _ = ( + &self.text_analysis_agent, + &self.table_agent, + &self.structured_output_agent, + ); Ok(input) } } diff --git a/crates/nvisy-runtime/src/graph/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs index 9cf77d3..93818d6 100644 --- a/crates/nvisy-runtime/src/graph/transform/mod.rs +++ b/crates/nvisy-runtime/src/graph/transform/mod.rs @@ -45,6 +45,8 @@ pub trait Process: Send + Sync { /// /// Each variant wraps a dedicated processor that encapsulates /// the transform logic and any required external dependencies. +/// +/// Large processor variants are boxed to avoid enum size bloat. #[derive(Debug)] pub enum CompiledTransform { /// Partition documents into elements. @@ -54,9 +56,9 @@ pub enum CompiledTransform { /// Generate vector embeddings. Embedding(EmbeddingProcessor), /// Enrich elements with metadata/descriptions. - Enrich(EnrichProcessor), + Enrich(Box), /// Extract structured data or convert formats. - Extract(ExtractProcessor), + Extract(Box), /// Generate new content from input. Derive(DeriveProcessor), } diff --git a/crates/nvisy-server/README.md b/crates/nvisy-server/README.md index 3b5df5e..f3ecc9d 100644 --- a/crates/nvisy-server/README.md +++ b/crates/nvisy-server/README.md @@ -1,11 +1,10 @@ -# api.nvisy.com/server +# nvisy-server + +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) High-performance HTTP API server for the Nvisy document redaction platform, built with Axum and Tokio. -[![rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![axum](https://img.shields.io/badge/Axum-0.8+-000000?style=flat-square&logo=rust&logoColor=white)](https://github.com/tokio-rs/axum) - ## Features - **Async HTTP Server** - Built with Axum web framework on Tokio runtime diff --git a/crates/nvisy-webhook/README.md b/crates/nvisy-webhook/README.md index cf498b8..0cf73fd 100644 --- a/crates/nvisy-webhook/README.md +++ b/crates/nvisy-webhook/README.md @@ -1,5 +1,7 @@ # nvisy-webhook +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Webhook delivery types and traits for nvisy services. ## Features From e44e82f37b992694cf78b830ccf3577dd6ccdc3e Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 26 Jan 2026 09:50:39 +0100 Subject: [PATCH 24/28] docs: standardize crate documentation and metadata - Update all license mentions from MIT to Apache 2.0 - Add changelog/license/support sections to all crate READMEs - Add build badge to crates/README.md - Standardize Cargo.toml structure: name, description, readme, keywords, categories first - Add keywords and categories to all crate Cargo.toml files --- LICENSE.txt | 222 ++++++++++++++++++++++++++++--- README.md | 2 +- crates/README.md | 2 + crates/nvisy-cli/Cargo.toml | 6 +- crates/nvisy-cli/README.md | 15 +++ crates/nvisy-core/Cargo.toml | 6 +- crates/nvisy-core/README.md | 15 +++ crates/nvisy-dal/Cargo.toml | 2 + crates/nvisy-dal/README.md | 13 +- crates/nvisy-dal/src/lib.rs | 4 +- crates/nvisy-nats/Cargo.toml | 6 +- crates/nvisy-nats/README.md | 15 +++ crates/nvisy-postgres/Cargo.toml | 6 +- crates/nvisy-postgres/README.md | 15 +++ crates/nvisy-rig/Cargo.toml | 14 +- crates/nvisy-rig/README.md | 13 +- crates/nvisy-runtime/Cargo.toml | 6 +- crates/nvisy-runtime/README.md | 15 +++ crates/nvisy-server/Cargo.toml | 6 +- crates/nvisy-server/README.md | 15 +++ crates/nvisy-webhook/Cargo.toml | 6 +- crates/nvisy-webhook/README.md | 15 +++ 22 files changed, 380 insertions(+), 39 deletions(-) diff --git a/LICENSE.txt b/LICENSE.txt index 8015683..b91b9eb 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,21 +1,201 @@ -MIT License - -Copyright (c) 2025 Nvisy Software - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2026 Nvisy Software + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index d3b48f7..2405be6 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ See [CHANGELOG.md](CHANGELOG.md) for release notes and version history. ## License -MIT License - see [LICENSE.txt](LICENSE.txt) +Apache 2.0 License - see [LICENSE.txt](LICENSE.txt) ## Support diff --git a/crates/README.md b/crates/README.md index f0892c6..d092687 100644 --- a/crates/README.md +++ b/crates/README.md @@ -1,5 +1,7 @@ # Crates +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + This directory contains the workspace crates for Nvisy Server. ## Core diff --git a/crates/nvisy-cli/Cargo.toml b/crates/nvisy-cli/Cargo.toml index 94afff5..943df77 100644 --- a/crates/nvisy-cli/Cargo.toml +++ b/crates/nvisy-cli/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-cli" +description = "Command-line interface and HTTP server for the Nvisy platform" +readme = "./README.md" +keywords = ["cli", "server", "http", "nvisy", "document"] +categories = ["command-line-utilities", "web-programming::http-server"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-cli/README.md b/crates/nvisy-cli/README.md index d3683a2..3afecd6 100644 --- a/crates/nvisy-cli/README.md +++ b/crates/nvisy-cli/README.md @@ -22,3 +22,18 @@ Command-line interface and HTTP server for the Nvisy platform. - **tls** - HTTPS support with rustls - **dotenv** - Load configuration from `.env` files + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml index 2a5272c..4986871 100644 --- a/crates/nvisy-core/Cargo.toml +++ b/crates/nvisy-core/Cargo.toml @@ -1,11 +1,15 @@ [package] name = "nvisy-core" +description = "Core types and utilities shared across nvisy crates" +readme = "./README.md" +keywords = ["core", "types", "error", "utilities", "nvisy"] +categories = ["rust-patterns", "development-tools"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-core/README.md b/crates/nvisy-core/README.md index de8e9e6..e8bddd5 100644 --- a/crates/nvisy-core/README.md +++ b/crates/nvisy-core/README.md @@ -22,3 +22,18 @@ use nvisy_core::types::{ServiceHealth, ServiceStatus}; ## Features - `schema` - Enable JSON Schema derives for API documentation + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-dal/Cargo.toml b/crates/nvisy-dal/Cargo.toml index cce2611..76a0578 100644 --- a/crates/nvisy-dal/Cargo.toml +++ b/crates/nvisy-dal/Cargo.toml @@ -4,6 +4,8 @@ name = "nvisy-dal" description = "Data abstraction layer for workflow inputs and outputs" readme = "./README.md" +keywords = ["dal", "storage", "database", "vector", "workflow"] +categories = ["database", "api-bindings"] version = { workspace = true } rust-version = { workspace = true } diff --git a/crates/nvisy-dal/README.md b/crates/nvisy-dal/README.md index 9367536..1c12d87 100644 --- a/crates/nvisy-dal/README.md +++ b/crates/nvisy-dal/README.md @@ -177,6 +177,17 @@ let ctx = Context::new() .with_limit(100); // Maximum items to read ``` +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + ## License -MIT +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-dal/src/lib.rs b/crates/nvisy-dal/src/lib.rs index 3905303..e8c95fd 100644 --- a/crates/nvisy-dal/src/lib.rs +++ b/crates/nvisy-dal/src/lib.rs @@ -13,8 +13,8 @@ pub mod provider; mod error; pub use core::{ - DataInput, DataOutput, InputStream, Provider, ItemSink, ItemStream, ObjectContext, - OutputStream, RelationalContext, VectorContext, + DataInput, DataOutput, InputStream, ItemSink, ItemStream, ObjectContext, OutputStream, + Provider, RelationalContext, VectorContext, }; pub use datatype::{AnyDataValue, DataTypeId}; diff --git a/crates/nvisy-nats/Cargo.toml b/crates/nvisy-nats/Cargo.toml index 7e24721..5f43ba0 100644 --- a/crates/nvisy-nats/Cargo.toml +++ b/crates/nvisy-nats/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-nats" +description = "Task-focused NATS client for the Nvisy platform with comprehensive JetStream support" +readme = "./README.md" +keywords = ["nats", "messaging", "jetstream", "pubsub", "streaming"] +categories = ["network-programming", "api-bindings"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-nats/README.md b/crates/nvisy-nats/README.md index ccc1ab2..d6966d2 100644 --- a/crates/nvisy-nats/README.md +++ b/crates/nvisy-nats/README.md @@ -32,3 +32,18 @@ The crate provides specialized modules for common NATS use cases: All modules maintain type safety through generic parameters and provide access to the underlying NATS client for extensibility. + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-postgres/Cargo.toml b/crates/nvisy-postgres/Cargo.toml index 8b9194e..b15d4c8 100644 --- a/crates/nvisy-postgres/Cargo.toml +++ b/crates/nvisy-postgres/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-postgres" +description = "Type-safe PostgreSQL database layer for the Nvisy platform with async connection pooling" +readme = "./README.md" +keywords = ["postgres", "database", "orm", "diesel", "async"] +categories = ["database", "development-tools"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-postgres/README.md b/crates/nvisy-postgres/README.md index f7d58ce..98c7851 100644 --- a/crates/nvisy-postgres/README.md +++ b/crates/nvisy-postgres/README.md @@ -30,3 +30,18 @@ make generate-migrations The generated schema is located at `src/schema.rs` and provides type-safe table definitions for Diesel queries. + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-rig/Cargo.toml b/crates/nvisy-rig/Cargo.toml index baccd0b..fe80dd4 100644 --- a/crates/nvisy-rig/Cargo.toml +++ b/crates/nvisy-rig/Cargo.toml @@ -1,18 +1,20 @@ [package] name = "nvisy-rig" +description = "Rig AI framework integration for nvisy" +readme = "./README.md" +keywords = ["rig", "llm", "ai", "client", "rag"] +categories = ["api-bindings", "web-programming::http-client"] + version = { workspace = true } -edition = { workspace = true } rust-version = { workspace = true } +edition = { workspace = true } license = { workspace = true } +publish = { workspace = true } + authors = { workspace = true } repository = { workspace = true } homepage = { workspace = true } documentation = { workspace = true } -publish = { workspace = true } - -description = "Rig AI framework integration for nvisy" -keywords = ["rig", "llm", "ai", "client", "rag"] -categories = ["api-bindings", "web-programming::http-client"] [features] ## Default feature set includes Ollama for local development diff --git a/crates/nvisy-rig/README.md b/crates/nvisy-rig/README.md index f8d93e9..2c194d5 100644 --- a/crates/nvisy-rig/README.md +++ b/crates/nvisy-rig/README.md @@ -46,6 +46,17 @@ let service = RigService::new(providers, tools, sessions); - `edit` - Proposed edit operations - `service` - High-level service API +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + ## License -MIT +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-runtime/Cargo.toml b/crates/nvisy-runtime/Cargo.toml index f0b4bf8..660b2c4 100644 --- a/crates/nvisy-runtime/Cargo.toml +++ b/crates/nvisy-runtime/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-runtime" +description = "Workflow definitions and execution engine for Nvisy pipelines" +readme = "./README.md" +keywords = ["workflow", "runtime", "engine", "pipeline", "execution"] +categories = ["development-tools", "algorithms"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-runtime/README.md b/crates/nvisy-runtime/README.md index 7cf334b..6dac0c7 100644 --- a/crates/nvisy-runtime/README.md +++ b/crates/nvisy-runtime/README.md @@ -44,3 +44,18 @@ let registry = CredentialsRegistry::default(); let ctx = nvisy_dal::core::Context::default(); let result = engine.execute(workflow, registry, ctx).await?; ``` + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-server/Cargo.toml b/crates/nvisy-server/Cargo.toml index b6c0546..b664f48 100644 --- a/crates/nvisy-server/Cargo.toml +++ b/crates/nvisy-server/Cargo.toml @@ -2,12 +2,16 @@ [package] name = "nvisy-server" +description = "High-performance HTTP API server for the Nvisy document redaction platform" +readme = "./README.md" +keywords = ["http", "server", "api", "web", "axum"] +categories = ["web-programming::http-server"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-server/README.md b/crates/nvisy-server/README.md index f3ecc9d..7d89619 100644 --- a/crates/nvisy-server/README.md +++ b/crates/nvisy-server/README.md @@ -28,3 +28,18 @@ When running, the server exposes interactive documentation at: - **Swagger UI**: `/api/swagger` - **Scalar UI**: `/api/scalar` - **OpenAPI JSON**: `/api/openapi.json` + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-webhook/Cargo.toml b/crates/nvisy-webhook/Cargo.toml index 21df97c..8f7845e 100644 --- a/crates/nvisy-webhook/Cargo.toml +++ b/crates/nvisy-webhook/Cargo.toml @@ -1,11 +1,15 @@ [package] name = "nvisy-webhook" +description = "Webhook delivery types and traits for nvisy services" +readme = "./README.md" +keywords = ["webhook", "http", "delivery", "notifications", "events"] +categories = ["api-bindings", "network-programming"] + version = { workspace = true } rust-version = { workspace = true } edition = { workspace = true } license = { workspace = true } publish = { workspace = true } -readme = "./README.md" authors = { workspace = true } repository = { workspace = true } diff --git a/crates/nvisy-webhook/README.md b/crates/nvisy-webhook/README.md index 0cf73fd..c54f20b 100644 --- a/crates/nvisy-webhook/README.md +++ b/crates/nvisy-webhook/README.md @@ -23,3 +23,18 @@ let service = WebhookService::new(my_provider); let request = WebhookRequest::new(url, event, payload, webhook_id, workspace_id); let response = service.deliver(&request).await?; ``` + +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) From cbf1349d73ff79676c4c9eafb15734657d927219 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 26 Jan 2026 10:18:07 +0100 Subject: [PATCH 25/28] feat: fix github actions and remove security vulnerabilities ## GitHub Actions Fixes - Fix broken intra-doc links in nvisy-runtime documentation - Update documentation to reference public Engine API instead of private compiler - All CI checks now pass (format, check, clippy, tests, docs) ## Security Improvements - Remove Milvus provider entirely - eliminates 4+ critical security vulnerabilities - Remove milvus-sdk-rust dependency and all related security advisories: - RUSTSEC-2025-0009 (ring AES panic with overflow checking) - RUSTSEC-2025-0010 (unmaintained ring versions) - RUSTSEC-2023-0071 (rsa Marvin Attack timing vulnerability) - RUSTSEC-2024-0336 (rustls infinite loop vulnerability) - Update deny.toml configuration for remaining dependencies - cargo deny check now passes with only duplicate dependency warnings ## Code Quality - Fix clippy warning about large enum variants using Box<> for providers - Keep Qdrant, Pinecone, and pgvector as vector database alternatives - Maintain all functionality while removing security risks ## Documentation Cleanup - Abstract DAL README to remove specific database names and code examples - Make provider documentation more generic and focused on concepts - Remove implementation details that expose internal architecture All 198 tests pass, documentation builds cleanly, and security posture significantly improved. --- Cargo.lock | 501 ++++-------------- Cargo.toml | 2 +- crates/nvisy-dal/Cargo.toml | 2 +- crates/nvisy-dal/README.md | 140 +---- crates/nvisy-dal/src/core/vector_context.rs | 2 +- .../nvisy-dal/src/provider/milvus/config.rs | 37 -- crates/nvisy-dal/src/provider/milvus/mod.rs | 204 ------- .../nvisy-dal/src/provider/milvus/output.rs | 60 --- crates/nvisy-dal/src/provider/mod.rs | 4 +- crates/nvisy-runtime/src/definition/mod.rs | 2 +- crates/nvisy-runtime/src/engine/compiler.rs | 4 +- crates/nvisy-runtime/src/graph/mod.rs | 2 +- crates/nvisy-runtime/src/provider/mod.rs | 6 +- crates/nvisy-runtime/src/provider/outputs.rs | 58 +- deny.toml | 11 +- 15 files changed, 142 insertions(+), 893 deletions(-) delete mode 100644 crates/nvisy-dal/src/provider/milvus/config.rs delete mode 100644 crates/nvisy-dal/src/provider/milvus/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/milvus/output.rs diff --git a/Cargo.lock b/Cargo.lock index 3163278..92b3ce0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,7 +83,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -231,9 +231,9 @@ dependencies = [ "portable-atomic", "rand 0.8.5", "regex", - "ring 0.17.14", + "ring", "rustls-native-certs 0.7.3", - "rustls-pemfile 2.2.0", + "rustls-pemfile", "rustls-webpki 0.102.8", "serde", "serde_json", @@ -270,7 +270,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -281,7 +281,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -308,7 +308,7 @@ dependencies = [ "derive_utils", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -553,7 +553,7 @@ checksum = "604fde5e028fea851ce1d8570bbdc034bec850d157f7569d10f347d06808c05c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -571,7 +571,7 @@ dependencies = [ "hyper-util", "pin-project-lite", "rustls 0.23.36", - "rustls-pemfile 2.2.0", + "rustls-pemfile", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", @@ -633,12 +633,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "base64" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" - [[package]] name = "base64" version = "0.21.7" @@ -919,7 +913,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1193,7 +1187,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1227,7 +1221,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn", ] [[package]] @@ -1241,7 +1235,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn", ] [[package]] @@ -1252,7 +1246,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1263,7 +1257,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1338,7 +1332,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1348,7 +1342,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.114", + "syn", ] [[package]] @@ -1370,7 +1364,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.114", + "syn", "unicode-xid", ] @@ -1382,7 +1376,7 @@ checksum = "ccfae181bab5ab6c5478b2ccb69e4c68a02f8c3ec72f6616bfec9dbc599d2ee0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1431,7 +1425,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1444,7 +1438,7 @@ dependencies = [ "dsl_auto_type", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1464,7 +1458,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe2444076b48641147115697648dc743c2c00b61adade0f01ce67133c7babe8c" dependencies = [ - "syn 2.0.114", + "syn", ] [[package]] @@ -1504,7 +1498,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1539,7 +1533,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1689,7 +1683,7 @@ checksum = "f464e1e518bc97a6749590758411784df7dda4f36384e1fb11a58f040c1d0459" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -1791,7 +1785,7 @@ checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" dependencies = [ "futures-core", "futures-sink", - "spin 0.9.8", + "spin", ] [[package]] @@ -1923,7 +1917,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -2679,15 +2673,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.12.1" @@ -2754,7 +2739,7 @@ checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -2826,7 +2811,7 @@ dependencies = [ "base64 0.22.1", "js-sys", "pem", - "ring 0.17.14", + "ring", "serde", "serde_json", "simple_asn1", @@ -2855,7 +2840,7 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" dependencies = [ - "spin 0.9.8", + "spin", ] [[package]] @@ -2897,12 +2882,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -3037,25 +3016,6 @@ dependencies = [ "quote", ] -[[package]] -name = "milvus-sdk-rust" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e291050787c486091140a968f4d90759638a55f0883bcf30acc8f0470efaf0" -dependencies = [ - "anyhow", - "base64 0.21.7", - "prost 0.11.9", - "serde", - "serde_json", - "strum 0.24.1", - "strum_macros 0.24.3", - "thiserror 1.0.69", - "tokio", - "tonic 0.8.3", - "tonic-build 0.8.4", -] - [[package]] name = "mime" version = "0.3.17" @@ -3112,16 +3072,10 @@ dependencies = [ "httparse", "memchr", "mime", - "spin 0.9.8", + "spin", "version_check", ] -[[package]] -name = "multimap" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" - [[package]] name = "multimap" version = "0.10.1" @@ -3330,7 +3284,7 @@ dependencies = [ "schemars 0.9.0", "serde", "serde_json", - "strum 0.27.2", + "strum", "thiserror 2.0.18", "tracing", ] @@ -3347,7 +3301,6 @@ dependencies = [ "diesel-async", "futures", "jiff", - "milvus-sdk-rust", "nvisy-core", "opendal", "pgvector", @@ -3405,7 +3358,7 @@ dependencies = [ "schemars 0.9.0", "serde", "serde_json", - "strum 0.27.2", + "strum", "thiserror 2.0.18", "tokio", "tracing", @@ -3432,7 +3385,7 @@ dependencies = [ "serde", "serde_json", "sha2", - "strum 0.27.2", + "strum", "text-splitter", "thiserror 2.0.18", "tokio", @@ -3450,7 +3403,7 @@ dependencies = [ "derive_more", "flate2", "nvisy-rt-core", - "strum 0.27.2", + "strum", "tar", "tempfile", "tokio", @@ -3471,7 +3424,7 @@ dependencies = [ "jiff", "serde", "sha2", - "strum 0.27.2", + "strum", "thiserror 2.0.18", "tokio", "uuid", @@ -3490,7 +3443,7 @@ dependencies = [ "nvisy-rt-core", "serde", "serde_json", - "strum 0.27.2", + "strum", "thiserror 2.0.18", "tokio", "uuid", @@ -3579,7 +3532,7 @@ dependencies = [ "semver", "serde", "serde_json", - "strum 0.27.2", + "strum", "thiserror 2.0.18", "tokio", "tracing", @@ -3617,7 +3570,7 @@ dependencies = [ "serde", "serde_json", "sha2", - "strum 0.27.2", + "strum", "tempfile", "thiserror 2.0.18", "tokio", @@ -3731,7 +3684,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -3947,7 +3900,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -3981,7 +3934,7 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tonic 0.11.0", - "tonic-build 0.11.0", + "tonic-build", "url", "uuid", ] @@ -4127,16 +4080,6 @@ dependencies = [ "yansi", ] -[[package]] -name = "prettyplease" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" -dependencies = [ - "proc-macro2", - "syn 1.0.109", -] - [[package]] name = "prettyplease" version = "0.2.37" @@ -4144,7 +4087,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn", ] [[package]] @@ -4166,7 +4109,7 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -4178,16 +4121,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "prost" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" -dependencies = [ - "bytes", - "prost-derive 0.11.9", -] - [[package]] name = "prost" version = "0.12.6" @@ -4208,28 +4141,6 @@ dependencies = [ "prost-derive 0.13.5", ] -[[package]] -name = "prost-build" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" -dependencies = [ - "bytes", - "heck 0.4.1", - "itertools 0.10.5", - "lazy_static", - "log", - "multimap 0.8.3", - "petgraph 0.6.5", - "prettyplease 0.1.25", - "prost 0.11.9", - "prost-types 0.11.9", - "regex", - "syn 1.0.109", - "tempfile", - "which", -] - [[package]] name = "prost-build" version = "0.12.6" @@ -4240,30 +4151,17 @@ dependencies = [ "heck 0.5.0", "itertools 0.12.1", "log", - "multimap 0.10.1", + "multimap", "once_cell", "petgraph 0.6.5", - "prettyplease 0.2.37", + "prettyplease", "prost 0.12.6", "prost-types 0.12.6", "regex", - "syn 2.0.114", + "syn", "tempfile", ] -[[package]] -name = "prost-derive" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" -dependencies = [ - "anyhow", - "itertools 0.10.5", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "prost-derive" version = "0.12.6" @@ -4274,7 +4172,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -4287,16 +4185,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.114", -] - -[[package]] -name = "prost-types" -version = "0.11.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" -dependencies = [ - "prost 0.11.9", + "syn", ] [[package]] @@ -4379,7 +4268,7 @@ dependencies = [ "getrandom 0.3.4", "lru-slab", "rand 0.9.2", - "ring 0.17.14", + "ring", "rustc-hash", "rustls 0.23.36", "rustls-pki-types", @@ -4531,7 +4420,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -4697,21 +4586,6 @@ dependencies = [ "url", ] -[[package]] -name = "ring" -version = "0.16.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - [[package]] name = "ring" version = "0.17.14" @@ -4793,19 +4667,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags 2.10.0", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", -] - [[package]] name = "rustix" version = "1.1.3" @@ -4815,22 +4676,10 @@ dependencies = [ "bitflags 2.10.0", "errno", "libc", - "linux-raw-sys 0.11.0", + "linux-raw-sys", "windows-sys 0.61.2", ] -[[package]] -name = "rustls" -version = "0.20.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" -dependencies = [ - "log", - "ring 0.16.20", - "sct", - "webpki", -] - [[package]] name = "rustls" version = "0.22.4" @@ -4838,7 +4687,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", - "ring 0.17.14", + "ring", "rustls-pki-types", "rustls-webpki 0.102.8", "subtle", @@ -4854,25 +4703,13 @@ dependencies = [ "aws-lc-rs", "log", "once_cell", - "ring 0.17.14", + "ring", "rustls-pki-types", "rustls-webpki 0.103.9", "subtle", "zeroize", ] -[[package]] -name = "rustls-native-certs" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" -dependencies = [ - "openssl-probe 0.1.6", - "rustls-pemfile 1.0.4", - "schannel", - "security-framework 2.11.1", -] - [[package]] name = "rustls-native-certs" version = "0.7.3" @@ -4880,7 +4717,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" dependencies = [ "openssl-probe 0.1.6", - "rustls-pemfile 2.2.0", + "rustls-pemfile", "rustls-pki-types", "schannel", "security-framework 2.11.1", @@ -4898,15 +4735,6 @@ dependencies = [ "security-framework 3.5.1", ] -[[package]] -name = "rustls-pemfile" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" -dependencies = [ - "base64 0.21.7", -] - [[package]] name = "rustls-pemfile" version = "2.2.0" @@ -4932,7 +4760,7 @@ version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ - "ring 0.17.14", + "ring", "rustls-pki-types", "untrusted 0.9.0", ] @@ -4944,7 +4772,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "aws-lc-rs", - "ring 0.17.14", + "ring", "rustls-pki-types", "untrusted 0.9.0", ] @@ -5017,7 +4845,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.114", + "syn", ] [[package]] @@ -5029,7 +4857,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.114", + "syn", ] [[package]] @@ -5064,16 +4892,6 @@ dependencies = [ "sha2", ] -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring 0.17.14", - "untrusted 0.9.0", -] - [[package]] name = "security-framework" version = "2.11.1" @@ -5147,7 +4965,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5158,7 +4976,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5228,7 +5046,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5378,7 +5196,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5401,12 +5219,6 @@ dependencies = [ "windows-sys 0.60.2", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -5485,7 +5297,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.114", + "syn", ] [[package]] @@ -5508,7 +5320,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.114", + "syn", "tokio", "url", ] @@ -5639,32 +5451,13 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" - [[package]] name = "strum" version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ - "strum_macros 0.27.2", -] - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", + "strum_macros", ] [[package]] @@ -5676,7 +5469,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5685,17 +5478,6 @@ version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.114" @@ -5730,7 +5512,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5774,7 +5556,7 @@ dependencies = [ "fastrand", "getrandom 0.3.4", "once_cell", - "rustix 1.1.3", + "rustix", "windows-sys 0.61.2", ] @@ -5791,7 +5573,7 @@ dependencies = [ "icu_segmenter", "itertools 0.14.0", "memchr", - "strum 0.27.2", + "strum", "thiserror 2.0.18", ] @@ -5821,7 +5603,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5832,7 +5614,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5945,7 +5727,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -5984,17 +5766,6 @@ dependencies = [ "whoami 2.0.2", ] -[[package]] -name = "tokio-rustls" -version = "0.23.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" -dependencies = [ - "rustls 0.20.9", - "tokio", - "webpki", -] - [[package]] name = "tokio-rustls" version = "0.25.0" @@ -6053,7 +5824,7 @@ dependencies = [ "http 1.4.0", "httparse", "rand 0.8.5", - "ring 0.17.14", + "ring", "rustls-pki-types", "tokio", "tokio-rustls 0.26.4", @@ -6092,41 +5863,6 @@ dependencies = [ "winnow", ] -[[package]] -name = "tonic" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb" -dependencies = [ - "async-stream", - "async-trait", - "axum 0.6.20", - "base64 0.13.1", - "bytes", - "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.32", - "hyper-timeout 0.4.1", - "percent-encoding", - "pin-project", - "prost 0.11.9", - "prost-derive 0.11.9", - "rustls-native-certs 0.6.3", - "rustls-pemfile 1.0.4", - "tokio", - "tokio-rustls 0.23.4", - "tokio-stream", - "tokio-util", - "tower 0.4.13", - "tower-layer", - "tower-service", - "tracing", - "tracing-futures", -] - [[package]] name = "tonic" version = "0.11.0" @@ -6147,7 +5883,7 @@ dependencies = [ "pin-project", "prost 0.12.6", "rustls-native-certs 0.7.3", - "rustls-pemfile 2.2.0", + "rustls-pemfile", "rustls-pki-types", "tokio", "tokio-rustls 0.25.0", @@ -6181,7 +5917,7 @@ dependencies = [ "pin-project", "prost 0.13.5", "rustls-native-certs 0.8.3", - "rustls-pemfile 2.2.0", + "rustls-pemfile", "socket2 0.5.10", "tokio", "tokio-rustls 0.26.4", @@ -6192,30 +5928,17 @@ dependencies = [ "tracing", ] -[[package]] -name = "tonic-build" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" -dependencies = [ - "prettyplease 0.1.25", - "proc-macro2", - "prost-build 0.11.9", - "quote", - "syn 1.0.109", -] - [[package]] name = "tonic-build" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4ef6dd70a610078cb4e338a0f79d06bc759ff1b22d2120c2ff02ae264ba9c2" dependencies = [ - "prettyplease 0.2.37", + "prettyplease", "proc-macro2", - "prost-build 0.12.6", + "prost-build", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -6321,7 +6044,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -6437,7 +6160,7 @@ checksum = "27a7a9b72ba121f6f1f6c3632b85604cac41aedb5ddc70accbebb6cac83de846" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -6578,7 +6301,7 @@ dependencies = [ "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -6699,7 +6422,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", + "syn", "wasm-bindgen-shared", ] @@ -6745,16 +6468,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.22.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" -dependencies = [ - "ring 0.17.14", - "untrusted 0.9.0", -] - [[package]] name = "webpki-roots" version = "0.26.11" @@ -6773,18 +6486,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix 0.38.44", -] - [[package]] name = "whoami" version = "1.6.1" @@ -6806,28 +6507,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-core" version = "0.62.2" @@ -6849,7 +6528,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -6860,7 +6539,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -7155,7 +6834,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", - "rustix 1.1.3", + "rustix", ] [[package]] @@ -7192,7 +6871,7 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", "synstructure", ] @@ -7213,7 +6892,7 @@ checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -7233,7 +6912,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", "synstructure", ] @@ -7254,7 +6933,7 @@ checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] @@ -7288,7 +6967,7 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c2f0f51..39c8c33 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -143,5 +143,5 @@ rig-core = { version = "0.29", default-features = false, features = ["reqwest-ru opendal = { version = "0.53", features = [] } qdrant-client = { version = "1.13", features = [] } pinecone-sdk = { version = "0.1", features = [] } -milvus-sdk-rust = { version = "0.1", features = [] } + prost-types = { version = "0.12", features = [] } diff --git a/crates/nvisy-dal/Cargo.toml b/crates/nvisy-dal/Cargo.toml index 76a0578..01246ac 100644 --- a/crates/nvisy-dal/Cargo.toml +++ b/crates/nvisy-dal/Cargo.toml @@ -50,7 +50,7 @@ opendal = { workspace = true, features = ["services-s3", "services-gcs", "servic # Vector store clients qdrant-client = { workspace = true, features = [] } pinecone-sdk = { workspace = true, features = [] } -milvus-sdk-rust = { workspace = true, features = [] } + prost-types = { workspace = true, features = [] } # Database (for pgvector) diff --git a/crates/nvisy-dal/README.md b/crates/nvisy-dal/README.md index 1c12d87..97e08af 100644 --- a/crates/nvisy-dal/README.md +++ b/crates/nvisy-dal/README.md @@ -6,7 +6,7 @@ Data Abstraction Layer for workflow inputs and outputs. ## Overview -This crate provides a unified interface for reading and writing data across various storage backends. It supports blob storage (S3, GCS, Azure Blob), relational databases (PostgreSQL, MySQL), and vector databases (Qdrant, Pinecone, Milvus, pgvector). +This crate provides a unified interface for reading and writing data across various storage backends. It supports blob storage, relational databases, and vector databases. ## Modules @@ -27,155 +27,27 @@ All types implement the `DataType` marker trait: - **Graph** - Graph structures with nodes and edges - **Message** - Messages for queue-based systems -## Providers - -### Storage Providers (OpenDAL-based) - -| Provider | Config | Data Type | -|----------|--------|-----------| -| `S3Provider` | `S3Config` | `Blob` | -| `GcsProvider` | `GcsConfig` | `Blob` | -| `AzblobProvider` | `AzblobConfig` | `Blob` | - -### Database Providers (OpenDAL-based) - -| Provider | Config | Data Type | -|----------|--------|-----------| -| `PostgresProvider` | `PostgresConfig` | `Record` | -| `MysqlProvider` | `MysqlConfig` | `Record` | - -### Vector Providers - -| Provider | Config | Data Type | -|----------|--------|-----------| -| `QdrantProvider` | `QdrantConfig` | `Embedding` | -| `PineconeProvider` | `PineconeConfig` | `Embedding` | -| `MilvusProvider` | `MilvusConfig` | `Embedding` | -| `PgVectorProvider` | `PgVectorConfig` | `Embedding` | - ## Streams -The DAL uses wrapped stream types for better ergonomics: - -```rust -use nvisy_dal::stream::{InputStream, OutputStream, ItemStream}; - -// InputStream wraps a BoxStream with optional pagination cursor -let input: InputStream = provider.read(&ctx).await?; -let cursor = input.cursor(); // Get pagination cursor - -// OutputStream wraps a Sink for streaming writes -``` +The DAL uses wrapped stream types for better ergonomics with pagination support and streaming I/O operations. ## Usage -### Storage Example - -```rust -use nvisy_dal::{Context, DataInput, DataOutput}; -use nvisy_dal::provider::{S3Config, S3Provider}; -use nvisy_dal::datatype::Blob; -use futures::StreamExt; - -// Create provider -let config = S3Config::new("my-bucket", "us-east-1") - .with_credentials("access_key", "secret_key"); -let provider = S3Provider::new(&config)?; - -// Read blobs -let ctx = Context::new().with_target("data/"); -let mut stream = provider.read(&ctx).await?; - -while let Some(result) = stream.next().await { - let blob = result?; - println!("Read: {}", blob.path); -} - -// Write blobs -let blob = Blob::new("output/file.txt", b"Hello, world!".to_vec()); -provider.write(&ctx, vec![blob]).await?; -``` - -### Database Example - -```rust -use nvisy_dal::{Context, DataInput, DataOutput}; -use nvisy_dal::provider::{PostgresConfig, PostgresProvider}; -use nvisy_dal::datatype::Record; - -// Create provider -let config = PostgresConfig::new("postgresql://user:pass@localhost/db") - .with_table("my_table"); -let provider = PostgresProvider::new(&config)?; - -// Read records -let ctx = Context::new(); -let stream = provider.read(&ctx).await?; - -// Write records -let record = Record::new() - .set("name", "Alice") - .set("age", 30); -provider.write(&ctx, vec![record]).await?; -``` - -### Vector Example - -```rust -use nvisy_dal::{Context, DataOutput}; -use nvisy_dal::provider::{QdrantConfig, QdrantProvider}; -use nvisy_dal::datatype::Embedding; - -// Create provider -let config = QdrantConfig::new("http://localhost:6334"); -let provider = QdrantProvider::new(&config).await?; - -// Write embeddings -let ctx = Context::new().with_target("my_collection"); -let embedding = Embedding::new("doc1", vec![0.1, 0.2, 0.3]); -provider.write(&ctx, vec![embedding]).await?; - -// Search (provider-specific method) -let results = provider.search( - "my_collection", - vec![0.1, 0.2, 0.3], - 10, - true, // include_vectors - true, // include_metadata - None, // filter -).await?; -``` +The DAL provides a consistent interface across all provider types. Create a provider with appropriate credentials and configuration, then use the `DataInput` and `DataOutput` traits for reading and writing data with proper context and stream handling. ## Traits ### DataInput -```rust -#[async_trait] -pub trait DataInput: Send + Sync { - async fn read(&self, ctx: &Context) -> Result>; -} -``` +Provides async read operations that return paginated streams of data. ### DataOutput -```rust -#[async_trait] -pub trait DataOutput: Send + Sync { - async fn write(&self, ctx: &Context, items: Vec) -> Result<()>; -} -``` +Provides async write operations for batches of data items. ## Context -The `Context` struct provides configuration for read/write operations: - -```rust -let ctx = Context::new() - .with_target("my_collection") // Collection, table, bucket prefix, etc. - .with_cursor("abc123") // Pagination cursor - .with_limit(100); // Maximum items to read -``` +The `Context` struct provides configuration for read/write operations including target specification (collection, table, bucket prefix), pagination cursors, and data limits. ## Changelog diff --git a/crates/nvisy-dal/src/core/vector_context.rs b/crates/nvisy-dal/src/core/vector_context.rs index 73ddc22..50987da 100644 --- a/crates/nvisy-dal/src/core/vector_context.rs +++ b/crates/nvisy-dal/src/core/vector_context.rs @@ -1,6 +1,6 @@ //! Context for vector database operations. -/// Context for vector database operations (Qdrant, Pinecone, Milvus, pgvector). +/// Context for vector database operations (Qdrant, Pinecone, pgvector). #[derive(Debug, Clone, Default)] pub struct VectorContext { /// Target collection name. diff --git a/crates/nvisy-dal/src/provider/milvus/config.rs b/crates/nvisy-dal/src/provider/milvus/config.rs deleted file mode 100644 index 1b91a5f..0000000 --- a/crates/nvisy-dal/src/provider/milvus/config.rs +++ /dev/null @@ -1,37 +0,0 @@ -//! Milvus configuration types. - -use serde::{Deserialize, Serialize}; - -/// Default Milvus port. -fn default_port() -> u16 { - 19530 -} - -/// Milvus credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MilvusCredentials { - /// Milvus server host. - pub host: String, - /// Milvus server port. - #[serde(default = "default_port")] - pub port: u16, - /// Username for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub username: Option, - /// Password for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub password: Option, -} - -/// Milvus parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MilvusParams { - /// Collection name. - pub collection: String, - /// Database name. - #[serde(skip_serializing_if = "Option::is_none")] - pub database: Option, - /// Vector dimensions. - #[serde(skip_serializing_if = "Option::is_none")] - pub dimensions: Option, -} diff --git a/crates/nvisy-dal/src/provider/milvus/mod.rs b/crates/nvisy-dal/src/provider/milvus/mod.rs deleted file mode 100644 index e9a7fa6..0000000 --- a/crates/nvisy-dal/src/provider/milvus/mod.rs +++ /dev/null @@ -1,204 +0,0 @@ -//! Milvus vector store provider. - -mod config; -mod output; - -use std::borrow::Cow; -use std::collections::HashMap; - -pub use config::{MilvusCredentials, MilvusParams}; -use milvus::client::Client; -use milvus::collection::SearchOption; -use milvus::index::{IndexParams, IndexType, MetricType}; -use milvus::schema::{CollectionSchemaBuilder, FieldSchema}; -use milvus::value::Value; - -use crate::core::Provider; -use crate::error::{Error, Result}; - -/// Milvus provider for vector storage. -pub struct MilvusProvider { - client: Client, - params: MilvusParams, -} - -#[async_trait::async_trait] -impl Provider for MilvusProvider { - type Credentials = MilvusCredentials; - type Params = MilvusParams; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let url = format!("http://{}:{}", credentials.host, credentials.port); - - let client = Client::new(url) - .await - .map_err(|e| Error::connection(e.to_string()))?; - - Ok(Self { client, params }) - } -} - -impl MilvusProvider { - /// Returns the configured collection name. - pub fn collection(&self) -> &str { - &self.params.collection - } - - /// Ensures a collection exists, creating it if necessary. - pub(crate) async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { - let exists = self - .client - .has_collection(name) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - if exists { - return Ok(()); - } - - let mut builder = CollectionSchemaBuilder::new(name, "Vector collection"); - builder.add_field(FieldSchema::new_primary_int64("_id", "primary key", true)); - builder.add_field(FieldSchema::new_varchar("id", "string id", 256)); - builder.add_field(FieldSchema::new_float_vector( - "vector", - "embedding vector", - dimensions as i64, - )); - builder.add_field(FieldSchema::new_varchar("metadata", "json metadata", 65535)); - - let schema = builder - .build() - .map_err(|e| Error::provider(e.to_string()))?; - - self.client - .create_collection(schema, None) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let index_params = IndexParams::new( - "vector_index".to_string(), - IndexType::IvfFlat, - MetricType::L2, - HashMap::from([("nlist".to_string(), "128".to_string())]), - ); - - let collection = self - .client - .get_collection(name) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - collection - .create_index("vector", index_params) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - collection - .load(1) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - Ok(()) - } - - /// Searches for similar vectors. - pub async fn search( - &self, - collection: &str, - query: Vec, - limit: usize, - ) -> Result> { - let coll = self - .client - .get_collection(collection) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let mut search_option = SearchOption::new(); - search_option.add_param("nprobe", serde_json::json!(16)); - - let query_value = Value::FloatArray(Cow::Owned(query)); - - let results = coll - .search( - vec![query_value], - "vector", - limit as i32, - MetricType::L2, - vec!["id", "metadata"], - &search_option, - ) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let mut search_results = Vec::new(); - - for result in results { - for i in 0..result.size as usize { - let id = match result.id.get(i) { - Some(Value::String(s)) => s.to_string(), - Some(Value::Long(l)) => l.to_string(), - _ => continue, - }; - - let score = result.score.get(i).copied().unwrap_or(0.0); - - let metadata_str = result - .field - .iter() - .find(|f| f.name == "metadata") - .and_then(|f| f.get(i)) - .and_then(|v| match v { - Value::String(s) => Some(s.to_string()), - _ => None, - }); - - let metadata: HashMap = metadata_str - .and_then(|s| serde_json::from_str(&s).ok()) - .unwrap_or_default(); - - let string_id = result - .field - .iter() - .find(|f| f.name == "id") - .and_then(|f| f.get(i)) - .and_then(|v| match v { - Value::String(s) => Some(s.to_string()), - _ => None, - }) - .unwrap_or(id); - - search_results.push(SearchResult { - id: string_id, - score, - vector: None, - metadata, - }); - } - } - - Ok(search_results) - } -} - -/// Result from a vector similarity search. -#[derive(Debug, Clone)] -pub struct SearchResult { - /// The ID of the matched vector. - pub id: String, - /// Similarity score. - pub score: f32, - /// The vector data, if requested. - pub vector: Option>, - /// Metadata associated with this vector. - pub metadata: HashMap, -} - -impl std::fmt::Debug for MilvusProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MilvusProvider").finish() - } -} diff --git a/crates/nvisy-dal/src/provider/milvus/output.rs b/crates/nvisy-dal/src/provider/milvus/output.rs deleted file mode 100644 index b7315e1..0000000 --- a/crates/nvisy-dal/src/provider/milvus/output.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! Milvus DataOutput implementation. - -use async_trait::async_trait; -use milvus::data::FieldColumn; -use milvus::schema::FieldSchema; -use milvus::value::ValueVec; - -use super::MilvusProvider; -use crate::core::DataOutput; -use crate::datatype::Embedding; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for MilvusProvider { - type Item = Embedding; - - async fn write(&self, items: Vec) -> Result<()> { - if items.is_empty() { - return Ok(()); - } - - let collection = self.collection(); - - let dim = items.first().map(|v| v.vector.len()).unwrap_or(0); - - self.ensure_collection(collection, dim).await?; - - let coll = self - .client - .get_collection(collection) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let ids: Vec = items.iter().map(|v| v.id.clone()).collect(); - let embeddings: Vec = items - .iter() - .flat_map(|v| v.vector.iter().copied()) - .collect(); - let metadata: Vec = items - .iter() - .map(|v| serde_json::to_string(&v.metadata).unwrap_or_default()) - .collect(); - - let id_schema = FieldSchema::new_varchar("id", "string id", 256); - let vector_schema = FieldSchema::new_float_vector("vector", "embedding vector", dim as i64); - let metadata_schema = FieldSchema::new_varchar("metadata", "json metadata", 65535); - - let columns = vec![ - FieldColumn::new(&id_schema, ValueVec::String(ids)), - FieldColumn::new(&vector_schema, ValueVec::Float(embeddings)), - FieldColumn::new(&metadata_schema, ValueVec::String(metadata)), - ]; - - coll.insert(columns, None) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/provider/mod.rs b/crates/nvisy-dal/src/provider/mod.rs index f2b01cc..8a51028 100644 --- a/crates/nvisy-dal/src/provider/mod.rs +++ b/crates/nvisy-dal/src/provider/mod.rs @@ -2,7 +2,7 @@ mod azblob; mod gcs; -mod milvus; + mod mysql; mod pgvector; mod pinecone; @@ -14,7 +14,7 @@ mod s3; pub use azblob::{AzblobCredentials, AzblobParams, AzblobProvider}; pub use gcs::{GcsCredentials, GcsParams, GcsProvider}; // Vector database providers -pub use milvus::{MilvusCredentials, MilvusParams, MilvusProvider}; + // Relational database providers pub use mysql::{MysqlCredentials, MysqlParams, MysqlProvider}; pub use pgvector::{ diff --git a/crates/nvisy-runtime/src/definition/mod.rs b/crates/nvisy-runtime/src/definition/mod.rs index 2a2d503..2a148a1 100644 --- a/crates/nvisy-runtime/src/definition/mod.rs +++ b/crates/nvisy-runtime/src/definition/mod.rs @@ -7,7 +7,7 @@ //! - Storage in databases //! //! To execute a workflow, definitions must be compiled into runtime types -//! using the [`crate::engine::compiler`] module. +//! using the [`crate::engine::Engine`]. use std::collections::HashMap; diff --git a/crates/nvisy-runtime/src/engine/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs index f711db8..643f6c3 100644 --- a/crates/nvisy-runtime/src/engine/compiler.rs +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -1,7 +1,7 @@ //! Workflow compiler for transforming definitions into executable graphs. //! -//! The compiler takes a [`WorkflowDefinition`] and a [`CredentialsRegistry`] -//! and produces a [`CompiledGraph`] that can be executed by the engine. +//! The compiler takes workflow definitions and credentials registry +//! and produces compiled graphs that can be executed by the engine. //! //! # Compilation Process //! diff --git a/crates/nvisy-runtime/src/graph/mod.rs b/crates/nvisy-runtime/src/graph/mod.rs index b9d6647..81cc5eb 100644 --- a/crates/nvisy-runtime/src/graph/mod.rs +++ b/crates/nvisy-runtime/src/graph/mod.rs @@ -7,7 +7,7 @@ //! - Pre-resolved cache slots //! - Pre-instantiated providers and agents //! -//! To create compiled types, use the [`crate::engine::compiler`] module. +//! To create compiled types, use the [`crate::engine::Engine`]. use std::collections::HashMap; diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs index 5800544..eb09fb2 100644 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ b/crates/nvisy-runtime/src/provider/mod.rs @@ -18,8 +18,8 @@ use derive_more::From; pub use inputs::{InputProvider, InputProviderConfig, InputProviderParams}; // Re-export dal credentials pub use nvisy_dal::provider::{ - AzblobCredentials, GcsCredentials, MilvusCredentials, MysqlCredentials, PgVectorCredentials, - PineconeCredentials, PostgresCredentials, QdrantCredentials, S3Credentials, + AzblobCredentials, GcsCredentials, MysqlCredentials, PgVectorCredentials, PineconeCredentials, + PostgresCredentials, QdrantCredentials, S3Credentials, }; // Re-export rig types pub use nvisy_rig::provider::{ @@ -56,8 +56,6 @@ pub enum ProviderCredentials { Qdrant(QdrantCredentials), /// Pinecone credentials. Pinecone(PineconeCredentials), - /// Milvus credentials. - Milvus(MilvusCredentials), /// pgvector credentials. PgVector(PgVectorCredentials), diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs index aa7d03a..dc654d3 100644 --- a/crates/nvisy-runtime/src/provider/outputs.rs +++ b/crates/nvisy-runtime/src/provider/outputs.rs @@ -8,9 +8,9 @@ use derive_more::From; use futures::Sink; use nvisy_core::Provider; use nvisy_dal::provider::{ - AzblobParams, AzblobProvider, GcsParams, GcsProvider, MilvusParams, MilvusProvider, - MysqlParams, MysqlProvider, PgVectorParams, PgVectorProvider, PineconeParams, PineconeProvider, - PostgresParams, PostgresProvider, QdrantParams, QdrantProvider, S3Params, S3Provider, + AzblobParams, AzblobProvider, GcsParams, GcsProvider, MysqlParams, MysqlProvider, + PgVectorParams, PgVectorProvider, PineconeParams, PineconeProvider, PostgresParams, + PostgresProvider, QdrantParams, QdrantProvider, S3Params, S3Provider, }; use nvisy_dal::{AnyDataValue, DataTypeId}; use serde::{Deserialize, Serialize}; @@ -76,8 +76,6 @@ pub enum OutputProviderParams { Qdrant(QdrantParams), /// Pinecone vector database. Pinecone(PineconeParams), - /// Milvus vector database. - Milvus(MilvusParams), /// pgvector (PostgreSQL extension). PgVector(PgVectorParams), } @@ -93,9 +91,7 @@ impl OutputProviderParams { match self { Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, - Self::Qdrant(_) | Self::Pinecone(_) | Self::Milvus(_) | Self::PgVector(_) => { - DataTypeId::Embedding - } + Self::Qdrant(_) | Self::Pinecone(_) | Self::PgVector(_) => DataTypeId::Embedding, } } @@ -127,21 +123,21 @@ impl OutputProviderParams { .await .map_err(|e| Error::Internal(e.to_string()))?, )), - (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => Ok(OutputProvider::Qdrant( - QdrantProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => Ok(OutputProvider::Pinecone( - PineconeProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Milvus(p), ProviderCredentials::Milvus(c)) => Ok(OutputProvider::Milvus( - MilvusProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), + (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => { + Ok(OutputProvider::Qdrant(Box::new( + QdrantProvider::connect(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + ))) + } + (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => { + Ok(OutputProvider::Pinecone(Box::new( + PineconeProvider::connect(p, c) + .await + .map_err(|e| Error::Internal(e.to_string()))?, + ))) + } + (Self::PgVector(p), ProviderCredentials::PgVector(c)) => Ok(OutputProvider::PgVector( PgVectorProvider::connect(p, c) .await @@ -164,9 +160,9 @@ pub enum OutputProvider { Azblob(AzblobProvider), Postgres(PostgresProvider), Mysql(MysqlProvider), - Qdrant(QdrantProvider), - Pinecone(PineconeProvider), - Milvus(MilvusProvider), + Qdrant(Box), + Pinecone(Box), + PgVector(PgVectorProvider), } @@ -176,9 +172,7 @@ impl OutputProvider { match self { Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, - Self::Qdrant(_) | Self::Pinecone(_) | Self::Milvus(_) | Self::PgVector(_) => { - DataTypeId::Embedding - } + Self::Qdrant(_) | Self::Pinecone(_) | Self::PgVector(_) => DataTypeId::Embedding, } } @@ -198,9 +192,9 @@ impl OutputProvider { Self::Azblob(p) => write_data!(p, data, Blob, into_blob), Self::Postgres(p) => write_data!(p, data, Record, into_record), Self::Mysql(p) => write_data!(p, data, Record, into_record), - Self::Qdrant(p) => write_data!(p, data, Embedding, into_embedding), - Self::Pinecone(p) => write_data!(p, data, Embedding, into_embedding), - Self::Milvus(p) => write_data!(p, data, Embedding, into_embedding), + Self::Qdrant(p) => write_data!(**p, data, Embedding, into_embedding), + Self::Pinecone(p) => write_data!(**p, data, Embedding, into_embedding), + Self::PgVector(p) => write_data!(p, data, Embedding, into_embedding), } } diff --git a/deny.toml b/deny.toml index 90db265..67c9163 100644 --- a/deny.toml +++ b/deny.toml @@ -26,11 +26,15 @@ ignore = [ # https://github.com/nats-io/nats.rs/pull/1492 # https://github.com/programatik29/axum-server/pull/178 "RUSTSEC-2025-0134", + + # RSA Marvin Attack timing sidechannel vulnerability - no patch available + # Comes from opendal/sqlx dependencies for cloud storage and MySQL + "RUSTSEC-2023-0071", ] [licenses] # Confidence threshold for detecting a license from a license text (higher = stricter) -confidence-threshold = 0.9 +confidence-threshold = 0.8 # Private licenses are not allowed private = { ignore = false, registries = [] } # Warn if an allowed license is not used in the dependency graph @@ -39,6 +43,7 @@ unused-allowed-license = "warn" # List of explicitly allowed licenses (single licenses only) allow = [ "MIT", + "MIT-0", "Apache-2.0", "Apache-2.0 WITH LLVM-exception", "BSD-2-Clause", @@ -87,4 +92,6 @@ unknown-git = "deny" # List of URLs for allowed crate registries allow-registry = ["https://github.com/rust-lang/crates.io-index"] # List of URLs for allowed Git repositories -allow-git = [] +allow-git = [ + "https://github.com/nvisycom/runtime.git", +] From e655424d4c31a34380d6f9bf1b51aecc59e1dc8c Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 26 Jan 2026 17:15:27 +0100 Subject: [PATCH 26/28] feat: add Python packages (nvisy-dal, nvisy-rig) and update dependabot --- .github/dependabot.yml | 45 +- packages/nvisy-dal/py.typed | 0 packages/nvisy-dal/pyproject.toml | 49 + packages/nvisy-dal/pyrightconfig.json | 4 + packages/nvisy-dal/src/nvisy_dal/__init__.py | 12 + .../src/nvisy_dal/_generated/__init__.py | 13 + .../src/nvisy_dal/_generated/contexts.py | 31 + packages/nvisy-dal/src/nvisy_dal/errors.py | 35 + packages/nvisy-dal/src/nvisy_dal/protocols.py | 42 + .../src/nvisy_dal/providers/__init__.py | 1 + packages/nvisy-dal/uv.lock | 1361 +++++++++++++++++ packages/nvisy-rig/py.typed | 0 packages/nvisy-rig/pyproject.toml | 51 + packages/nvisy-rig/pyrightconfig.json | 4 + packages/nvisy-rig/src/nvisy_rig/__init__.py | 1 + .../src/nvisy_rig/_generated/__init__.py | 1 + .../src/nvisy_rig/agents/__init__.py | 1 + packages/nvisy-rig/uv.lock | 858 +++++++++++ 18 files changed, 2507 insertions(+), 2 deletions(-) create mode 100644 packages/nvisy-dal/py.typed create mode 100644 packages/nvisy-dal/pyproject.toml create mode 100644 packages/nvisy-dal/pyrightconfig.json create mode 100644 packages/nvisy-dal/src/nvisy_dal/__init__.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/_generated/__init__.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/_generated/contexts.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/errors.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/protocols.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/providers/__init__.py create mode 100644 packages/nvisy-dal/uv.lock create mode 100644 packages/nvisy-rig/py.typed create mode 100644 packages/nvisy-rig/pyproject.toml create mode 100644 packages/nvisy-rig/pyrightconfig.json create mode 100644 packages/nvisy-rig/src/nvisy_rig/__init__.py create mode 100644 packages/nvisy-rig/src/nvisy_rig/_generated/__init__.py create mode 100644 packages/nvisy-rig/src/nvisy_rig/agents/__init__.py create mode 100644 packages/nvisy-rig/uv.lock diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 6e02f93..189df3d 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -16,7 +16,6 @@ updates: prefix-development: "chore(deps-dev)" rebase-strategy: "auto" versioning-strategy: "auto" - # Group patch and minor updates together to reduce PR noise groups: rust-dependencies: patterns: @@ -38,8 +37,50 @@ updates: - "chore" commit-message: prefix: "chore(actions)" - # Group all GitHub Actions updates together to reduce PR noise groups: github-actions: patterns: - "*" + + # Version updates for Python packages + - package-ecosystem: "pip" + directory: "/packages/nvisy-dal" + schedule: + interval: "weekly" + timezone: "Europe/Berlin" + day: "monday" + time: "04:00" + open-pull-requests-limit: 5 + labels: + - "chore" + - "python" + commit-message: + prefix: "chore(deps)" + groups: + python-dependencies: + patterns: + - "*" + update-types: + - "minor" + - "patch" + + - package-ecosystem: "pip" + directory: "/packages/nvisy-rig" + schedule: + interval: "weekly" + timezone: "Europe/Berlin" + day: "monday" + time: "04:00" + open-pull-requests-limit: 5 + labels: + - "chore" + - "python" + commit-message: + prefix: "chore(deps)" + groups: + python-dependencies: + patterns: + - "*" + update-types: + - "minor" + - "patch" diff --git a/packages/nvisy-dal/py.typed b/packages/nvisy-dal/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/nvisy-dal/pyproject.toml b/packages/nvisy-dal/pyproject.toml new file mode 100644 index 0000000..c0ce344 --- /dev/null +++ b/packages/nvisy-dal/pyproject.toml @@ -0,0 +1,49 @@ +[project] +name = "nvisy-dal" +version = "0.1.0" +description = "Data abstraction layer for external integrations" +requires-python = ">=3.12" +dependencies = ["pydantic>=2.10"] + +[project.optional-dependencies] +s3 = ["boto3>=1.35", "types-boto3"] +gcs = ["google-cloud-storage>=2.18"] +azure = ["azure-storage-blob>=12.23"] +postgres = ["asyncpg>=0.30"] +mysql = ["aiomysql>=0.2"] +qdrant = ["qdrant-client>=1.12"] +pinecone = ["pinecone-client>=5.0"] +all = ["nvisy-dal[s3,gcs,azure,postgres,mysql,qdrant,pinecone]"] +dev = ["nvisy-dal[all]", "pytest>=8.0", "pytest-asyncio>=0.24", "moto>=5.0"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/nvisy_dal"] + +[tool.ruff] +target-version = "py312" +line-length = 100 + +[tool.ruff.lint] +select = ["ALL"] +ignore = ["D", "COM812", "ISC001"] + +[tool.ruff.lint.isort] +known-first-party = ["nvisy_dal"] + +[tool.basedpyright] +pythonVersion = "3.12" +typeCheckingMode = "strict" + +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" + +[dependency-groups] +dev = [ + "pyright>=1.1.408", + "ruff>=0.14.14", +] diff --git a/packages/nvisy-dal/pyrightconfig.json b/packages/nvisy-dal/pyrightconfig.json new file mode 100644 index 0000000..8fd8643 --- /dev/null +++ b/packages/nvisy-dal/pyrightconfig.json @@ -0,0 +1,4 @@ +{ + "venvPath": ".", + "venv": ".venv" +} diff --git a/packages/nvisy-dal/src/nvisy_dal/__init__.py b/packages/nvisy-dal/src/nvisy_dal/__init__.py new file mode 100644 index 0000000..0eb72d3 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/__init__.py @@ -0,0 +1,12 @@ +"""Data abstraction layer for external integrations.""" + +from nvisy_dal.errors import DalError, ErrorKind +from nvisy_dal.protocols import DataInput, DataOutput, Provider + +__all__ = [ + "DalError", + "DataInput", + "DataOutput", + "ErrorKind", + "Provider", +] diff --git a/packages/nvisy-dal/src/nvisy_dal/_generated/__init__.py b/packages/nvisy-dal/src/nvisy_dal/_generated/__init__.py new file mode 100644 index 0000000..b405340 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/_generated/__init__.py @@ -0,0 +1,13 @@ +"""Generated types from Rust JSON schemas. + +This module contains Pydantic models generated from the Rust schema definitions. +Do not edit manually - regenerate with `make codegen`. +""" + +from nvisy_dal._generated.contexts import ObjectContext, RelationalContext, VectorContext + +__all__ = [ + "ObjectContext", + "RelationalContext", + "VectorContext", +] diff --git a/packages/nvisy-dal/src/nvisy_dal/_generated/contexts.py b/packages/nvisy-dal/src/nvisy_dal/_generated/contexts.py new file mode 100644 index 0000000..cd325f6 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/_generated/contexts.py @@ -0,0 +1,31 @@ +"""Context types for provider operations. + +Generated from Rust schemas. Do not edit manually. +""" + +from pydantic import BaseModel + + +class ObjectContext(BaseModel, frozen=True): + """Context for object storage operations.""" + + prefix: str | None = None + continuation_token: str | None = None + limit: int | None = None + + +class RelationalContext(BaseModel, frozen=True): + """Context for relational database operations.""" + + table: str + cursor: str | None = None + tiebreaker: str | None = None + limit: int | None = None + + +class VectorContext(BaseModel, frozen=True): + """Context for vector store operations.""" + + collection: str + cursor: str | None = None + limit: int | None = None diff --git a/packages/nvisy-dal/src/nvisy_dal/errors.py b/packages/nvisy-dal/src/nvisy_dal/errors.py new file mode 100644 index 0000000..a05f6ac --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/errors.py @@ -0,0 +1,35 @@ +"""Error types for provider operations.""" + +from enum import StrEnum +from typing import final + + +class ErrorKind(StrEnum): + """Classification of provider errors.""" + + CONNECTION = "connection" + NOT_FOUND = "not_found" + INVALID_INPUT = "invalid_input" + TIMEOUT = "timeout" + PROVIDER = "provider" + + +@final +class DalError(Exception): + """Base error for all provider operations.""" + + __slots__ = ("kind", "message", "source") + + def __init__( + self, + message: str, + kind: ErrorKind = ErrorKind.PROVIDER, + source: BaseException | None = None, + ) -> None: + super().__init__(message) + self.message = message + self.kind = kind + self.source = source + + def __repr__(self) -> str: + return f"DalError({self.message!r}, kind={self.kind!r})" diff --git a/packages/nvisy-dal/src/nvisy_dal/protocols.py b/packages/nvisy-dal/src/nvisy_dal/protocols.py new file mode 100644 index 0000000..c8a9a42 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/protocols.py @@ -0,0 +1,42 @@ +"""Core protocols for data providers.""" + +from collections.abc import AsyncIterator, Sequence +from typing import Protocol, Self, TypeVar, runtime_checkable + +T_co = TypeVar("T_co", covariant=True) +T_contra = TypeVar("T_contra", contravariant=True) +Ctx_contra = TypeVar("Ctx_contra", contravariant=True) +Cred_contra = TypeVar("Cred_contra", contravariant=True) +Params_contra = TypeVar("Params_contra", contravariant=True) + + +@runtime_checkable +class DataInput(Protocol[T_co, Ctx_contra]): + """Protocol for reading data from external sources.""" + + async def read(self, ctx: Ctx_contra) -> AsyncIterator[T_co]: + """Yield items from the source based on context.""" + ... + + +@runtime_checkable +class DataOutput(Protocol[T_contra, Ctx_contra]): + """Protocol for writing data to external sinks.""" + + async def write(self, ctx: Ctx_contra, items: Sequence[T_contra]) -> None: + """Write a batch of items to the sink.""" + ... + + +@runtime_checkable +class Provider(Protocol[Cred_contra, Params_contra]): + """Protocol for provider lifecycle management.""" + + @classmethod + async def connect(cls, credentials: Cred_contra, params: Params_contra) -> Self: + """Establish connection to the external service.""" + ... + + async def disconnect(self) -> None: + """Release resources and close connections.""" + ... diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py b/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py new file mode 100644 index 0000000..a4e3347 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py @@ -0,0 +1 @@ +"""Provider implementations for external services.""" diff --git a/packages/nvisy-dal/uv.lock b/packages/nvisy-dal/uv.lock new file mode 100644 index 0000000..b2f9501 --- /dev/null +++ b/packages/nvisy-dal/uv.lock @@ -0,0 +1,1361 @@ +version = 1 +revision = 1 +requires-python = ">=3.12" +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version < '3.13'", +] + +[[package]] +name = "aiomysql" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pymysql" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/29/e0/302aeffe8d90853556f47f3106b89c16cc2ec2a4d269bdfd82e3f4ae12cc/aiomysql-0.3.2.tar.gz", hash = "sha256:72d15ef5cfc34c03468eb41e1b90adb9fd9347b0b589114bd23ead569a02ac1a", size = 108311 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/af/aae0153c3e28712adaf462328f6c7a3c196a1c1c27b491de4377dd3e6b52/aiomysql-0.3.2-py3-none-any.whl", hash = "sha256:c82c5ba04137d7afd5c693a258bea8ead2aad77101668044143a991e04632eb2", size = 71834 }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592 }, +] + +[[package]] +name = "asyncpg" +version = "0.31.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/cc/d18065ce2380d80b1bcce927c24a2642efd38918e33fd724bc4bca904877/asyncpg-0.31.0.tar.gz", hash = "sha256:c989386c83940bfbd787180f2b1519415e2d3d6277a70d9d0f0145ac73500735", size = 993667 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/a6/59d0a146e61d20e18db7396583242e32e0f120693b67a8de43f1557033e2/asyncpg-0.31.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b44c31e1efc1c15188ef183f287c728e2046abb1d26af4d20858215d50d91fad", size = 662042 }, + { url = "https://files.pythonhosted.org/packages/36/01/ffaa189dcb63a2471720615e60185c3f6327716fdc0fc04334436fbb7c65/asyncpg-0.31.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0c89ccf741c067614c9b5fc7f1fc6f3b61ab05ae4aaa966e6fd6b93097c7d20d", size = 638504 }, + { url = "https://files.pythonhosted.org/packages/9f/62/3f699ba45d8bd24c5d65392190d19656d74ff0185f42e19d0bbd973bb371/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:12b3b2e39dc5470abd5e98c8d3373e4b1d1234d9fbdedf538798b2c13c64460a", size = 3426241 }, + { url = "https://files.pythonhosted.org/packages/8c/d1/a867c2150f9c6e7af6462637f613ba67f78a314b00db220cd26ff559d532/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:aad7a33913fb8bcb5454313377cc330fbb19a0cd5faa7272407d8a0c4257b671", size = 3520321 }, + { url = "https://files.pythonhosted.org/packages/7a/1a/cce4c3f246805ecd285a3591222a2611141f1669d002163abef999b60f98/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3df118d94f46d85b2e434fd62c84cb66d5834d5a890725fe625f498e72e4d5ec", size = 3316685 }, + { url = "https://files.pythonhosted.org/packages/40/ae/0fc961179e78cc579e138fad6eb580448ecae64908f95b8cb8ee2f241f67/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5b6efff3c17c3202d4b37189969acf8927438a238c6257f66be3c426beba20", size = 3471858 }, + { url = "https://files.pythonhosted.org/packages/52/b2/b20e09670be031afa4cbfabd645caece7f85ec62d69c312239de568e058e/asyncpg-0.31.0-cp312-cp312-win32.whl", hash = "sha256:027eaa61361ec735926566f995d959ade4796f6a49d3bde17e5134b9964f9ba8", size = 527852 }, + { url = "https://files.pythonhosted.org/packages/b5/f0/f2ed1de154e15b107dc692262395b3c17fc34eafe2a78fc2115931561730/asyncpg-0.31.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d6bdcbc93d608a1158f17932de2321f68b1a967a13e014998db87a72ed3186", size = 597175 }, + { url = "https://files.pythonhosted.org/packages/95/11/97b5c2af72a5d0b9bc3fa30cd4b9ce22284a9a943a150fdc768763caf035/asyncpg-0.31.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c204fab1b91e08b0f47e90a75d1b3c62174dab21f670ad6c5d0f243a228f015b", size = 661111 }, + { url = "https://files.pythonhosted.org/packages/1b/71/157d611c791a5e2d0423f09f027bd499935f0906e0c2a416ce712ba51ef3/asyncpg-0.31.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:54a64f91839ba59008eccf7aad2e93d6e3de688d796f35803235ea1c4898ae1e", size = 636928 }, + { url = "https://files.pythonhosted.org/packages/2e/fc/9e3486fb2bbe69d4a867c0b76d68542650a7ff1574ca40e84c3111bb0c6e/asyncpg-0.31.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e0822b1038dc7253b337b0f3f676cadc4ac31b126c5d42691c39691962e403", size = 3424067 }, + { url = "https://files.pythonhosted.org/packages/12/c6/8c9d076f73f07f995013c791e018a1cd5f31823c2a3187fc8581706aa00f/asyncpg-0.31.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bef056aa502ee34204c161c72ca1f3c274917596877f825968368b2c33f585f4", size = 3518156 }, + { url = "https://files.pythonhosted.org/packages/ae/3b/60683a0baf50fbc546499cfb53132cb6835b92b529a05f6a81471ab60d0c/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0bfbcc5b7ffcd9b75ab1558f00db2ae07db9c80637ad1b2469c43df79d7a5ae2", size = 3319636 }, + { url = "https://files.pythonhosted.org/packages/50/dc/8487df0f69bd398a61e1792b3cba0e47477f214eff085ba0efa7eac9ce87/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22bc525ebbdc24d1261ecbf6f504998244d4e3be1721784b5f64664d61fbe602", size = 3472079 }, + { url = "https://files.pythonhosted.org/packages/13/a1/c5bbeeb8531c05c89135cb8b28575ac2fac618bcb60119ee9696c3faf71c/asyncpg-0.31.0-cp313-cp313-win32.whl", hash = "sha256:f890de5e1e4f7e14023619399a471ce4b71f5418cd67a51853b9910fdfa73696", size = 527606 }, + { url = "https://files.pythonhosted.org/packages/91/66/b25ccb84a246b470eb943b0107c07edcae51804912b824054b3413995a10/asyncpg-0.31.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc5f2fa9916f292e5c5c8b2ac2813763bcd7f58e130055b4ad8a0531314201ab", size = 596569 }, + { url = "https://files.pythonhosted.org/packages/3c/36/e9450d62e84a13aea6580c83a47a437f26c7ca6fa0f0fd40b6670793ea30/asyncpg-0.31.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6b56b91bb0ffc328c4e3ed113136cddd9deefdf5f79ab448598b9772831df44", size = 660867 }, + { url = "https://files.pythonhosted.org/packages/82/4b/1d0a2b33b3102d210439338e1beea616a6122267c0df459ff0265cd5807a/asyncpg-0.31.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:334dec28cf20d7f5bb9e45b39546ddf247f8042a690bff9b9573d00086e69cb5", size = 638349 }, + { url = "https://files.pythonhosted.org/packages/41/aa/e7f7ac9a7974f08eff9183e392b2d62516f90412686532d27e196c0f0eeb/asyncpg-0.31.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98cc158c53f46de7bb677fd20c417e264fc02b36d901cc2a43bd6cb0dc6dbfd2", size = 3410428 }, + { url = "https://files.pythonhosted.org/packages/6f/de/bf1b60de3dede5c2731e6788617a512bc0ebd9693eac297ee74086f101d7/asyncpg-0.31.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9322b563e2661a52e3cdbc93eed3be7748b289f792e0011cb2720d278b366ce2", size = 3471678 }, + { url = "https://files.pythonhosted.org/packages/46/78/fc3ade003e22d8bd53aaf8f75f4be48f0b460fa73738f0391b9c856a9147/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19857a358fc811d82227449b7ca40afb46e75b33eb8897240c3839dd8b744218", size = 3313505 }, + { url = "https://files.pythonhosted.org/packages/bf/e9/73eb8a6789e927816f4705291be21f2225687bfa97321e40cd23055e903a/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5f8886e850882ff2c2ace5732300e99193823e8107e2c53ef01c1ebfa1e85d", size = 3434744 }, + { url = "https://files.pythonhosted.org/packages/08/4b/f10b880534413c65c5b5862f79b8e81553a8f364e5238832ad4c0af71b7f/asyncpg-0.31.0-cp314-cp314-win32.whl", hash = "sha256:cea3a0b2a14f95834cee29432e4ddc399b95700eb1d51bbc5bfee8f31fa07b2b", size = 532251 }, + { url = "https://files.pythonhosted.org/packages/d3/2d/7aa40750b7a19efa5d66e67fc06008ca0f27ba1bd082e457ad82f59aba49/asyncpg-0.31.0-cp314-cp314-win_amd64.whl", hash = "sha256:04d19392716af6b029411a0264d92093b6e5e8285ae97a39957b9a9c14ea72be", size = 604901 }, + { url = "https://files.pythonhosted.org/packages/ce/fe/b9dfe349b83b9dee28cc42360d2c86b2cdce4cb551a2c2d27e156bcac84d/asyncpg-0.31.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bdb957706da132e982cc6856bb2f7b740603472b54c3ebc77fe60ea3e57e1bd2", size = 702280 }, + { url = "https://files.pythonhosted.org/packages/6a/81/e6be6e37e560bd91e6c23ea8a6138a04fd057b08cf63d3c5055c98e81c1d/asyncpg-0.31.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6d11b198111a72f47154fa03b85799f9be63701e068b43f84ac25da0bda9cb31", size = 682931 }, + { url = "https://files.pythonhosted.org/packages/a6/45/6009040da85a1648dd5bc75b3b0a062081c483e75a1a29041ae63a0bf0dc/asyncpg-0.31.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18c83b03bc0d1b23e6230f5bf8d4f217dc9bc08644ce0502a9d91dc9e634a9c7", size = 3581608 }, + { url = "https://files.pythonhosted.org/packages/7e/06/2e3d4d7608b0b2b3adbee0d0bd6a2d29ca0fc4d8a78f8277df04e2d1fd7b/asyncpg-0.31.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e009abc333464ff18b8f6fd146addffd9aaf63e79aa3bb40ab7a4c332d0c5e9e", size = 3498738 }, + { url = "https://files.pythonhosted.org/packages/7d/aa/7d75ede780033141c51d83577ea23236ba7d3a23593929b32b49db8ed36e/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3b1fbcb0e396a5ca435a8826a87e5c2c2cc0c8c68eb6fadf82168056b0e53a8c", size = 3401026 }, + { url = "https://files.pythonhosted.org/packages/ba/7a/15e37d45e7f7c94facc1e9148c0e455e8f33c08f0b8a0b1deb2c5171771b/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8df714dba348efcc162d2adf02d213e5fab1bd9f557e1305633e851a61814a7a", size = 3429426 }, + { url = "https://files.pythonhosted.org/packages/13/d5/71437c5f6ae5f307828710efbe62163974e71237d5d46ebd2869ea052d10/asyncpg-0.31.0-cp314-cp314t-win32.whl", hash = "sha256:1b41f1afb1033f2b44f3234993b15096ddc9cd71b21a42dbd87fc6a57b43d65d", size = 614495 }, + { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062 }, +] + +[[package]] +name = "azure-core" +version = "1.38.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/1b/e503e08e755ea94e7d3419c9242315f888fc664211c90d032e40479022bf/azure_core-1.38.0.tar.gz", hash = "sha256:8194d2682245a3e4e3151a667c686464c3786fed7918b394d035bdcd61bb5993", size = 363033 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/d8/b8fcba9464f02b121f39de2db2bf57f0b216fe11d014513d666e8634380d/azure_core-1.38.0-py3-none-any.whl", hash = "sha256:ab0c9b2cd71fecb1842d52c965c95285d3cfb38902f6766e4a471f1cd8905335", size = 217825 }, +] + +[[package]] +name = "azure-storage-blob" +version = "12.28.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "cryptography" }, + { name = "isodate" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/24/072ba8e27b0e2d8fec401e9969b429d4f5fc4c8d4f0f05f4661e11f7234a/azure_storage_blob-12.28.0.tar.gz", hash = "sha256:e7d98ea108258d29aa0efbfd591b2e2075fa1722a2fae8699f0b3c9de11eff41", size = 604225 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d8/3a/6ef2047a072e54e1142718d433d50e9514c999a58f51abfff7902f3a72f8/azure_storage_blob-12.28.0-py3-none-any.whl", hash = "sha256:00fb1db28bf6a7b7ecaa48e3b1d5c83bfadacc5a678b77826081304bd87d6461", size = 431499 }, +] + +[[package]] +name = "boto3" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d0/69/c0d4cc77add3cdf66f8573555d71dc23ba32dfe77df40e1c91385f7a9bdc/boto3-1.42.34.tar.gz", hash = "sha256:75d7443c81a029283442fad138629be1eefaa3e6d430c28118a0f4cdbd57855d", size = 112876 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/55/25c543864abc270f5fdd7814fa7b69fd23de1c40fb3d7993f4b6391f8d3b/boto3-1.42.34-py3-none-any.whl", hash = "sha256:db3fb539e3f806b911ec4ca991f2f8bff333c5f0b87132a82e28b521fc5ec164", size = 140574 }, +] + +[[package]] +name = "botocore" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/f0/5702b704844e8920e01ce865cde0da574827163fbd7c0207d351ff6eea2c/botocore-1.42.34.tar.gz", hash = "sha256:92e44747da7890270d8dcc494ecc61fc315438440c55e00dc37a57d402b1bb66", size = 14907713 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/29/99/226fb4b2d141d7ac59465e3cdd2ca3a9a2917d85e1a3160884a78b097bbb/botocore-1.42.34-py3-none-any.whl", hash = "sha256:94099b5d09d0c4bfa6414fb3cffd54275ce6e51d7ba016f17a0e79f9274f68f7", size = 14579956 }, +] + +[[package]] +name = "botocore-stubs" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-awscrt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8f/1e/024e45fb46a21d085b541ce0ad8f1bef97ce17c5e72d1dc0e4d09d29e399/botocore_stubs-1.42.34.tar.gz", hash = "sha256:f3d1c5b45c2cbe16f63719abe639b23a1eeb3fec9c3ea0a72688585b462e8ce3", size = 42408 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/c8/3845c17b89ff19e2c2474801a6737d1766ee8e80cf38d7d97e1fedc28537/botocore_stubs-1.42.34-py3-none-any.whl", hash = "sha256:afc08661122eff6939d88cd250084ac148e392f8a1a389d51a31a4b9dab59358", size = 66760 }, +] + +[[package]] +name = "certifi" +version = "2026.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900 }, +] + +[[package]] +name = "cffi" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser", marker = "implementation_name != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271 }, + { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048 }, + { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529 }, + { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097 }, + { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983 }, + { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519 }, + { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572 }, + { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963 }, + { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361 }, + { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932 }, + { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557 }, + { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762 }, + { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230 }, + { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043 }, + { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446 }, + { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101 }, + { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948 }, + { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422 }, + { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499 }, + { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928 }, + { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302 }, + { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909 }, + { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402 }, + { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780 }, + { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320 }, + { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487 }, + { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049 }, + { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793 }, + { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300 }, + { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244 }, + { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828 }, + { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926 }, + { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328 }, + { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650 }, + { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687 }, + { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773 }, + { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013 }, + { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593 }, + { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354 }, + { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480 }, + { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584 }, + { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443 }, + { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437 }, + { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487 }, + { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726 }, + { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425 }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162 }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558 }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497 }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240 }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471 }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864 }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647 }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110 }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839 }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667 }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535 }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816 }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694 }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131 }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390 }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091 }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936 }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180 }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346 }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874 }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076 }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601 }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376 }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825 }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583 }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366 }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300 }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465 }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404 }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092 }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408 }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746 }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889 }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641 }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779 }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035 }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542 }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524 }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395 }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680 }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045 }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687 }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014 }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044 }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940 }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104 }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743 }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "cryptography" +version = "46.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/42/9c391dd801d6cf0d561b5890549d4b27bafcc53b39c31a817e69d87c625b/cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a", size = 7225004 }, + { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667 }, + { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807 }, + { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615 }, + { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800 }, + { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707 }, + { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541 }, + { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464 }, + { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838 }, + { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596 }, + { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782 }, + { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381 }, + { url = "https://files.pythonhosted.org/packages/96/92/8a6a9525893325fc057a01f654d7efc2c64b9de90413adcf605a85744ff4/cryptography-46.0.3-cp311-abi3-win32.whl", hash = "sha256:f260d0d41e9b4da1ed1e0f1ce571f97fe370b152ab18778e9e8f67d6af432018", size = 3055988 }, + { url = "https://files.pythonhosted.org/packages/7e/bf/80fbf45253ea585a1e492a6a17efcb93467701fa79e71550a430c5e60df0/cryptography-46.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:a9a3008438615669153eb86b26b61e09993921ebdd75385ddd748702c5adfddb", size = 3514451 }, + { url = "https://files.pythonhosted.org/packages/2e/af/9b302da4c87b0beb9db4e756386a7c6c5b8003cd0e742277888d352ae91d/cryptography-46.0.3-cp311-abi3-win_arm64.whl", hash = "sha256:5d7f93296ee28f68447397bf5198428c9aeeab45705a55d53a6343455dcb2c3c", size = 2928007 }, + { url = "https://files.pythonhosted.org/packages/f5/e2/a510aa736755bffa9d2f75029c229111a1d02f8ecd5de03078f4c18d91a3/cryptography-46.0.3-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:00a5e7e87938e5ff9ff5447ab086a5706a957137e6e433841e9d24f38a065217", size = 7158012 }, + { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728 }, + { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078 }, + { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460 }, + { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237 }, + { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344 }, + { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564 }, + { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415 }, + { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457 }, + { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074 }, + { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569 }, + { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941 }, + { url = "https://files.pythonhosted.org/packages/fd/30/27654c1dbaf7e4a3531fa1fc77986d04aefa4d6d78259a62c9dc13d7ad36/cryptography-46.0.3-cp314-cp314t-win32.whl", hash = "sha256:8a6e050cb6164d3f830453754094c086ff2d0b2f3a897a1d9820f6139a1f0914", size = 3022339 }, + { url = "https://files.pythonhosted.org/packages/f6/30/640f34ccd4d2a1bc88367b54b926b781b5a018d65f404d409aba76a84b1c/cryptography-46.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:760f83faa07f8b64e9c33fc963d790a2edb24efb479e3520c14a45741cd9b2db", size = 3494315 }, + { url = "https://files.pythonhosted.org/packages/ba/8b/88cc7e3bd0a8e7b861f26981f7b820e1f46aa9d26cc482d0feba0ecb4919/cryptography-46.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:516ea134e703e9fe26bcd1277a4b59ad30586ea90c365a87781d7887a646fe21", size = 2919331 }, + { url = "https://files.pythonhosted.org/packages/fd/23/45fe7f376a7df8daf6da3556603b36f53475a99ce4faacb6ba2cf3d82021/cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936", size = 7218248 }, + { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089 }, + { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029 }, + { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222 }, + { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280 }, + { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958 }, + { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714 }, + { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970 }, + { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236 }, + { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642 }, + { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126 }, + { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573 }, + { url = "https://files.pythonhosted.org/packages/0a/6e/1c8331ddf91ca4730ab3086a0f1be19c65510a33b5a441cb334e7a2d2560/cryptography-46.0.3-cp38-abi3-win32.whl", hash = "sha256:6276eb85ef938dc035d59b87c8a7dc559a232f954962520137529d77b18ff1df", size = 3036695 }, + { url = "https://files.pythonhosted.org/packages/90/45/b0d691df20633eff80955a0fc7695ff9051ffce8b69741444bd9ed7bd0db/cryptography-46.0.3-cp38-abi3-win_amd64.whl", hash = "sha256:416260257577718c05135c55958b674000baef9a1c7d9e8f306ec60d71db850f", size = 3501720 }, + { url = "https://files.pythonhosted.org/packages/e8/cb/2da4cc83f5edb9c3257d09e1e7ab7b23f049c7962cae8d842bbef0a9cec9/cryptography-46.0.3-cp38-abi3-win_arm64.whl", hash = "sha256:d89c3468de4cdc4f08a57e214384d0471911a3830fcdaf7a8cc587e42a866372", size = 2918740 }, +] + +[[package]] +name = "google-api-core" +version = "2.29.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0d/10/05572d33273292bac49c2d1785925f7bc3ff2fe50e3044cf1062c1dde32e/google_api_core-2.29.0.tar.gz", hash = "sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7", size = 177828 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/b6/85c4d21067220b9a78cfb81f516f9725ea6befc1544ec9bd2c1acd97c324/google_api_core-2.29.0-py3-none-any.whl", hash = "sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9", size = 173906 }, +] + +[[package]] +name = "google-auth" +version = "2.47.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1-modules" }, + { name = "rsa" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/3c/ec64b9a275ca22fa1cd3b6e77fefcf837b0732c890aa32d2bd21313d9b33/google_auth-2.47.0.tar.gz", hash = "sha256:833229070a9dfee1a353ae9877dcd2dec069a8281a4e72e72f77d4a70ff945da", size = 323719 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/18/79e9008530b79527e0d5f79e7eef08d3b179b7f851cfd3a2f27822fbdfa9/google_auth-2.47.0-py3-none-any.whl", hash = "sha256:c516d68336bfde7cf0da26aab674a36fedcf04b37ac4edd59c597178760c3498", size = 234867 }, +] + +[[package]] +name = "google-cloud-core" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/03/ef0bc99d0e0faf4fdbe67ac445e18cdaa74824fd93cd069e7bb6548cb52d/google_cloud_core-2.5.0.tar.gz", hash = "sha256:7c1b7ef5c92311717bd05301aa1a91ffbc565673d3b0b4163a52d8413a186963", size = 36027 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/20/bfa472e327c8edee00f04beecc80baeddd2ab33ee0e86fd7654da49d45e9/google_cloud_core-2.5.0-py3-none-any.whl", hash = "sha256:67d977b41ae6c7211ee830c7912e41003ea8194bff15ae7d72fd6f51e57acabc", size = 29469 }, +] + +[[package]] +name = "google-cloud-storage" +version = "3.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-cloud-core" }, + { name = "google-crc32c" }, + { name = "google-resumable-media" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/90/4398cecc2704cb066bc7dee6111a5c93c59bcd6fb751f0541315655774a8/google_cloud_storage-3.8.0.tar.gz", hash = "sha256:cc67952dce84ebc9d44970e24647a58260630b7b64d72360cedaf422d6727f28", size = 17273792 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/db/326279870d349fb9592263343dca4ad76088c17c88ba97b0f64c1088276c/google_cloud_storage-3.8.0-py3-none-any.whl", hash = "sha256:78cfeae7cac2ca9441d0d0271c2eb4ebfa21aa4c6944dd0ccac0389e81d955a7", size = 312430 }, +] + +[[package]] +name = "google-crc32c" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300 }, + { url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867 }, + { url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364 }, + { url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740 }, + { url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437 }, + { url = "https://files.pythonhosted.org/packages/d1/db/000f15b41724589b0e7bc24bc7a8967898d8d3bc8caf64c513d91ef1f6c0/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b", size = 31297 }, + { url = "https://files.pythonhosted.org/packages/d7/0d/8ebed0c39c53a7e838e2a486da8abb0e52de135f1b376ae2f0b160eb4c1a/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27", size = 30867 }, + { url = "https://files.pythonhosted.org/packages/ce/42/b468aec74a0354b34c8cbf748db20d6e350a68a2b0912e128cabee49806c/google_crc32c-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa", size = 33344 }, + { url = "https://files.pythonhosted.org/packages/1c/e8/b33784d6fc77fb5062a8a7854e43e1e618b87d5ddf610a88025e4de6226e/google_crc32c-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8", size = 33694 }, + { url = "https://files.pythonhosted.org/packages/92/b1/d3cbd4d988afb3d8e4db94ca953df429ed6db7282ed0e700d25e6c7bfc8d/google_crc32c-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f", size = 34435 }, + { url = "https://files.pythonhosted.org/packages/21/88/8ecf3c2b864a490b9e7010c84fd203ec8cf3b280651106a3a74dd1b0ca72/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697", size = 31301 }, + { url = "https://files.pythonhosted.org/packages/36/c6/f7ff6c11f5ca215d9f43d3629163727a272eabc356e5c9b2853df2bfe965/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651", size = 30868 }, + { url = "https://files.pythonhosted.org/packages/56/15/c25671c7aad70f8179d858c55a6ae8404902abe0cdcf32a29d581792b491/google_crc32c-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2", size = 33381 }, + { url = "https://files.pythonhosted.org/packages/42/fa/f50f51260d7b0ef5d4898af122d8a7ec5a84e2984f676f746445f783705f/google_crc32c-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21", size = 33734 }, + { url = "https://files.pythonhosted.org/packages/08/a5/7b059810934a09fb3ccb657e0843813c1fee1183d3bc2c8041800374aa2c/google_crc32c-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2", size = 34878 }, +] + +[[package]] +name = "google-resumable-media" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-crc32c" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/d7/520b62a35b23038ff005e334dba3ffc75fcf583bee26723f1fd8fd4b6919/google_resumable_media-2.8.0.tar.gz", hash = "sha256:f1157ed8b46994d60a1bc432544db62352043113684d4e030ee02e77ebe9a1ae", size = 2163265 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/0b/93afde9cfe012260e9fe1522f35c9b72d6ee222f316586b1f23ecf44d518/google_resumable_media-2.8.0-py3-none-any.whl", hash = "sha256:dd14a116af303845a8d932ddae161a26e86cc229645bc98b39f026f9b1717582", size = 81340 }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.72.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e5/7b/adfd75544c415c487b33061fe7ae526165241c1ea133f9a9125a56b39fd8/googleapis_common_protos-1.72.0.tar.gz", hash = "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5", size = 147433 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515 }, +] + +[[package]] +name = "grpcio" +version = "1.76.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b6/e0/318c1ce3ae5a17894d5791e87aea147587c9e702f24122cc7a5c8bbaeeb1/grpcio-1.76.0.tar.gz", hash = "sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73", size = 12785182 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/05/8e29121994b8d959ffa0afd28996d452f291b48cfc0875619de0bde2c50c/grpcio-1.76.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8", size = 5799718 }, + { url = "https://files.pythonhosted.org/packages/d9/75/11d0e66b3cdf998c996489581bdad8900db79ebd83513e45c19548f1cba4/grpcio-1.76.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280", size = 11825627 }, + { url = "https://files.pythonhosted.org/packages/28/50/2f0aa0498bc188048f5d9504dcc5c2c24f2eb1a9337cd0fa09a61a2e75f0/grpcio-1.76.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4", size = 6359167 }, + { url = "https://files.pythonhosted.org/packages/66/e5/bbf0bb97d29ede1d59d6588af40018cfc345b17ce979b7b45424628dc8bb/grpcio-1.76.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11", size = 7044267 }, + { url = "https://files.pythonhosted.org/packages/f5/86/f6ec2164f743d9609691115ae8ece098c76b894ebe4f7c94a655c6b03e98/grpcio-1.76.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6", size = 6573963 }, + { url = "https://files.pythonhosted.org/packages/60/bc/8d9d0d8505feccfdf38a766d262c71e73639c165b311c9457208b56d92ae/grpcio-1.76.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8", size = 7164484 }, + { url = "https://files.pythonhosted.org/packages/67/e6/5d6c2fc10b95edf6df9b8f19cf10a34263b7fd48493936fffd5085521292/grpcio-1.76.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980", size = 8127777 }, + { url = "https://files.pythonhosted.org/packages/3f/c8/dce8ff21c86abe025efe304d9e31fdb0deaaa3b502b6a78141080f206da0/grpcio-1.76.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882", size = 7594014 }, + { url = "https://files.pythonhosted.org/packages/e0/42/ad28191ebf983a5d0ecef90bab66baa5a6b18f2bfdef9d0a63b1973d9f75/grpcio-1.76.0-cp312-cp312-win32.whl", hash = "sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958", size = 3984750 }, + { url = "https://files.pythonhosted.org/packages/9e/00/7bd478cbb851c04a48baccaa49b75abaa8e4122f7d86da797500cccdd771/grpcio-1.76.0-cp312-cp312-win_amd64.whl", hash = "sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347", size = 4704003 }, + { url = "https://files.pythonhosted.org/packages/fc/ed/71467ab770effc9e8cef5f2e7388beb2be26ed642d567697bb103a790c72/grpcio-1.76.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:26ef06c73eb53267c2b319f43e6634c7556ea37672029241a056629af27c10e2", size = 5807716 }, + { url = "https://files.pythonhosted.org/packages/2c/85/c6ed56f9817fab03fa8a111ca91469941fb514e3e3ce6d793cb8f1e1347b/grpcio-1.76.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:45e0111e73f43f735d70786557dc38141185072d7ff8dc1829d6a77ac1471468", size = 11821522 }, + { url = "https://files.pythonhosted.org/packages/ac/31/2b8a235ab40c39cbc141ef647f8a6eb7b0028f023015a4842933bc0d6831/grpcio-1.76.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:83d57312a58dcfe2a3a0f9d1389b299438909a02db60e2f2ea2ae2d8034909d3", size = 6362558 }, + { url = "https://files.pythonhosted.org/packages/bd/64/9784eab483358e08847498ee56faf8ff6ea8e0a4592568d9f68edc97e9e9/grpcio-1.76.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:3e2a27c89eb9ac3d81ec8835e12414d73536c6e620355d65102503064a4ed6eb", size = 7049990 }, + { url = "https://files.pythonhosted.org/packages/2b/94/8c12319a6369434e7a184b987e8e9f3b49a114c489b8315f029e24de4837/grpcio-1.76.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:61f69297cba3950a524f61c7c8ee12e55c486cb5f7db47ff9dcee33da6f0d3ae", size = 6575387 }, + { url = "https://files.pythonhosted.org/packages/15/0f/f12c32b03f731f4a6242f771f63039df182c8b8e2cf8075b245b409259d4/grpcio-1.76.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6a15c17af8839b6801d554263c546c69c4d7718ad4321e3166175b37eaacca77", size = 7166668 }, + { url = "https://files.pythonhosted.org/packages/ff/2d/3ec9ce0c2b1d92dd59d1c3264aaec9f0f7c817d6e8ac683b97198a36ed5a/grpcio-1.76.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:25a18e9810fbc7e7f03ec2516addc116a957f8cbb8cbc95ccc80faa072743d03", size = 8124928 }, + { url = "https://files.pythonhosted.org/packages/1a/74/fd3317be5672f4856bcdd1a9e7b5e17554692d3db9a3b273879dc02d657d/grpcio-1.76.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:931091142fd8cc14edccc0845a79248bc155425eee9a98b2db2ea4f00a235a42", size = 7589983 }, + { url = "https://files.pythonhosted.org/packages/45/bb/ca038cf420f405971f19821c8c15bcbc875505f6ffadafe9ffd77871dc4c/grpcio-1.76.0-cp313-cp313-win32.whl", hash = "sha256:5e8571632780e08526f118f74170ad8d50fb0a48c23a746bef2a6ebade3abd6f", size = 3984727 }, + { url = "https://files.pythonhosted.org/packages/41/80/84087dc56437ced7cdd4b13d7875e7439a52a261e3ab4e06488ba6173b0a/grpcio-1.76.0-cp313-cp313-win_amd64.whl", hash = "sha256:f9f7bd5faab55f47231ad8dba7787866b69f5e93bc306e3915606779bbfb4ba8", size = 4702799 }, + { url = "https://files.pythonhosted.org/packages/b4/46/39adac80de49d678e6e073b70204091e76631e03e94928b9ea4ecf0f6e0e/grpcio-1.76.0-cp314-cp314-linux_armv7l.whl", hash = "sha256:ff8a59ea85a1f2191a0ffcc61298c571bc566332f82e5f5be1b83c9d8e668a62", size = 5808417 }, + { url = "https://files.pythonhosted.org/packages/9c/f5/a4531f7fb8b4e2a60b94e39d5d924469b7a6988176b3422487be61fe2998/grpcio-1.76.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:06c3d6b076e7b593905d04fdba6a0525711b3466f43b3400266f04ff735de0cd", size = 11828219 }, + { url = "https://files.pythonhosted.org/packages/4b/1c/de55d868ed7a8bd6acc6b1d6ddc4aa36d07a9f31d33c912c804adb1b971b/grpcio-1.76.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fd5ef5932f6475c436c4a55e4336ebbe47bd3272be04964a03d316bbf4afbcbc", size = 6367826 }, + { url = "https://files.pythonhosted.org/packages/59/64/99e44c02b5adb0ad13ab3adc89cb33cb54bfa90c74770f2607eea629b86f/grpcio-1.76.0-cp314-cp314-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:b331680e46239e090f5b3cead313cc772f6caa7d0fc8de349337563125361a4a", size = 7049550 }, + { url = "https://files.pythonhosted.org/packages/43/28/40a5be3f9a86949b83e7d6a2ad6011d993cbe9b6bd27bea881f61c7788b6/grpcio-1.76.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2229ae655ec4e8999599469559e97630185fdd53ae1e8997d147b7c9b2b72cba", size = 6575564 }, + { url = "https://files.pythonhosted.org/packages/4b/a9/1be18e6055b64467440208a8559afac243c66a8b904213af6f392dc2212f/grpcio-1.76.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:490fa6d203992c47c7b9e4a9d39003a0c2bcc1c9aa3c058730884bbbb0ee9f09", size = 7176236 }, + { url = "https://files.pythonhosted.org/packages/0f/55/dba05d3fcc151ce6e81327541d2cc8394f442f6b350fead67401661bf041/grpcio-1.76.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:479496325ce554792dba6548fae3df31a72cef7bad71ca2e12b0e58f9b336bfc", size = 8125795 }, + { url = "https://files.pythonhosted.org/packages/4a/45/122df922d05655f63930cf42c9e3f72ba20aadb26c100ee105cad4ce4257/grpcio-1.76.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1c9b93f79f48b03ada57ea24725d83a30284a012ec27eab2cf7e50a550cbbbcc", size = 7592214 }, + { url = "https://files.pythonhosted.org/packages/4a/6e/0b899b7f6b66e5af39e377055fb4a6675c9ee28431df5708139df2e93233/grpcio-1.76.0-cp314-cp314-win32.whl", hash = "sha256:747fa73efa9b8b1488a95d0ba1039c8e2dca0f741612d80415b1e1c560febf4e", size = 4062961 }, + { url = "https://files.pythonhosted.org/packages/19/41/0b430b01a2eb38ee887f88c1f07644a1df8e289353b78e82b37ef988fb64/grpcio-1.76.0-cp314-cp314-win_amd64.whl", hash = "sha256:922fa70ba549fce362d2e2871ab542082d66e2aaf0c19480ea453905b01f384e", size = 4834462 }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, +] + +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779 }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357 }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007 }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008 }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, +] + +[[package]] +name = "isodate" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, +] + +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419 }, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615 }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020 }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332 }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947 }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962 }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760 }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529 }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015 }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540 }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105 }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906 }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622 }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029 }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374 }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980 }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990 }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784 }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588 }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041 }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543 }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113 }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911 }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658 }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066 }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639 }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569 }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284 }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801 }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769 }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642 }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612 }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200 }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973 }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619 }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029 }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408 }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005 }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048 }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821 }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606 }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043 }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747 }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341 }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073 }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661 }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069 }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670 }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598 }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261 }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835 }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733 }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672 }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819 }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426 }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146 }, +] + +[[package]] +name = "moto" +version = "5.1.20" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "boto3" }, + { name = "botocore" }, + { name = "cryptography" }, + { name = "jinja2" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "responses" }, + { name = "werkzeug" }, + { name = "xmltodict" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b4/93/6b696aab5174721696a17716a488086e21f7b2547b4c9517f799a9b25e9e/moto-5.1.20.tar.gz", hash = "sha256:6d12d781e26a550d80e4b7e01d5538178e3adec6efbdec870e06e84750f13ec0", size = 8318716 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/2f/f50892fdb28097917b87d358a5fcefd30976289884ff142893edcb0243ba/moto-5.1.20-py3-none-any.whl", hash = "sha256:58c82c8e6b2ef659ef3a562fa415dce14da84bc7a797943245d9a338496ea0ea", size = 6392751 }, +] + +[[package]] +name = "nodeenv" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438 }, +] + +[[package]] +name = "numpy" +version = "2.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/7f/ec53e32bf10c813604edf07a3682616bd931d026fcde7b6d13195dfb684a/numpy-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d3703409aac693fa82c0aee023a1ae06a6e9d065dba10f5e8e80f642f1e9d0a2", size = 16656888 }, + { url = "https://files.pythonhosted.org/packages/b8/e0/1f9585d7dae8f14864e948fd7fa86c6cb72dee2676ca2748e63b1c5acfe0/numpy-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7211b95ca365519d3596a1d8688a95874cc94219d417504d9ecb2df99fa7bfa8", size = 12373956 }, + { url = "https://files.pythonhosted.org/packages/8e/43/9762e88909ff2326f5e7536fa8cb3c49fb03a7d92705f23e6e7f553d9cb3/numpy-2.4.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5adf01965456a664fc727ed69cc71848f28d063217c63e1a0e200a118d5eec9a", size = 5202567 }, + { url = "https://files.pythonhosted.org/packages/4b/ee/34b7930eb61e79feb4478800a4b95b46566969d837546aa7c034c742ef98/numpy-2.4.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26f0bcd9c79a00e339565b303badc74d3ea2bd6d52191eeca5f95936cad107d0", size = 6549459 }, + { url = "https://files.pythonhosted.org/packages/79/e3/5f115fae982565771be994867c89bcd8d7208dbfe9469185497d70de5ddf/numpy-2.4.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0093e85df2960d7e4049664b26afc58b03236e967fb942354deef3208857a04c", size = 14404859 }, + { url = "https://files.pythonhosted.org/packages/d9/7d/9c8a781c88933725445a859cac5d01b5871588a15969ee6aeb618ba99eee/numpy-2.4.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ad270f438cbdd402c364980317fb6b117d9ec5e226fff5b4148dd9aa9fc6e02", size = 16371419 }, + { url = "https://files.pythonhosted.org/packages/a6/d2/8aa084818554543f17cf4162c42f162acbd3bb42688aefdba6628a859f77/numpy-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:297c72b1b98100c2e8f873d5d35fb551fce7040ade83d67dd51d38c8d42a2162", size = 16182131 }, + { url = "https://files.pythonhosted.org/packages/60/db/0425216684297c58a8df35f3284ef56ec4a043e6d283f8a59c53562caf1b/numpy-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cf6470d91d34bf669f61d515499859fa7a4c2f7c36434afb70e82df7217933f9", size = 18295342 }, + { url = "https://files.pythonhosted.org/packages/31/4c/14cb9d86240bd8c386c881bafbe43f001284b7cce3bc01623ac9475da163/numpy-2.4.1-cp312-cp312-win32.whl", hash = "sha256:b6bcf39112e956594b3331316d90c90c90fb961e39696bda97b89462f5f3943f", size = 5959015 }, + { url = "https://files.pythonhosted.org/packages/51/cf/52a703dbeb0c65807540d29699fef5fda073434ff61846a564d5c296420f/numpy-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:e1a27bb1b2dee45a2a53f5ca6ff2d1a7f135287883a1689e930d44d1ff296c87", size = 12310730 }, + { url = "https://files.pythonhosted.org/packages/69/80/a828b2d0ade5e74a9fe0f4e0a17c30fdc26232ad2bc8c9f8b3197cf7cf18/numpy-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:0e6e8f9d9ecf95399982019c01223dc130542960a12edfa8edd1122dfa66a8a8", size = 10312166 }, + { url = "https://files.pythonhosted.org/packages/04/68/732d4b7811c00775f3bd522a21e8dd5a23f77eb11acdeb663e4a4ebf0ef4/numpy-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d797454e37570cfd61143b73b8debd623c3c0952959adb817dd310a483d58a1b", size = 16652495 }, + { url = "https://files.pythonhosted.org/packages/20/ca/857722353421a27f1465652b2c66813eeeccea9d76d5f7b74b99f298e60e/numpy-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82c55962006156aeef1629b953fd359064aa47e4d82cfc8e67f0918f7da3344f", size = 12368657 }, + { url = "https://files.pythonhosted.org/packages/81/0d/2377c917513449cc6240031a79d30eb9a163d32a91e79e0da47c43f2c0c8/numpy-2.4.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:71abbea030f2cfc3092a0ff9f8c8fdefdc5e0bf7d9d9c99663538bb0ecdac0b9", size = 5197256 }, + { url = "https://files.pythonhosted.org/packages/17/39/569452228de3f5de9064ac75137082c6214be1f5c532016549a7923ab4b5/numpy-2.4.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b55aa56165b17aaf15520beb9cbd33c9039810e0d9643dd4379e44294c7303e", size = 6545212 }, + { url = "https://files.pythonhosted.org/packages/8c/a4/77333f4d1e4dac4395385482557aeecf4826e6ff517e32ca48e1dafbe42a/numpy-2.4.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0faba4a331195bfa96f93dd9dfaa10b2c7aa8cda3a02b7fd635e588fe821bf5", size = 14402871 }, + { url = "https://files.pythonhosted.org/packages/ba/87/d341e519956273b39d8d47969dd1eaa1af740615394fe67d06f1efa68773/numpy-2.4.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3e3087f53e2b4428766b54932644d148613c5a595150533ae7f00dab2f319a8", size = 16359305 }, + { url = "https://files.pythonhosted.org/packages/32/91/789132c6666288eaa20ae8066bb99eba1939362e8f1a534949a215246e97/numpy-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:49e792ec351315e16da54b543db06ca8a86985ab682602d90c60ef4ff4db2a9c", size = 16181909 }, + { url = "https://files.pythonhosted.org/packages/cf/b8/090b8bd27b82a844bb22ff8fdf7935cb1980b48d6e439ae116f53cdc2143/numpy-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:79e9e06c4c2379db47f3f6fc7a8652e7498251789bf8ff5bd43bf478ef314ca2", size = 18284380 }, + { url = "https://files.pythonhosted.org/packages/67/78/722b62bd31842ff029412271556a1a27a98f45359dea78b1548a3a9996aa/numpy-2.4.1-cp313-cp313-win32.whl", hash = "sha256:3d1a100e48cb266090a031397863ff8a30050ceefd798f686ff92c67a486753d", size = 5957089 }, + { url = "https://files.pythonhosted.org/packages/da/a6/cf32198b0b6e18d4fbfa9a21a992a7fca535b9bb2b0cdd217d4a3445b5ca/numpy-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:92a0e65272fd60bfa0d9278e0484c2f52fe03b97aedc02b357f33fe752c52ffb", size = 12307230 }, + { url = "https://files.pythonhosted.org/packages/44/6c/534d692bfb7d0afe30611320c5fb713659dcb5104d7cc182aff2aea092f5/numpy-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:20d4649c773f66cc2fc36f663e091f57c3b7655f936a4c681b4250855d1da8f5", size = 10313125 }, + { url = "https://files.pythonhosted.org/packages/da/a1/354583ac5c4caa566de6ddfbc42744409b515039e085fab6e0ff942e0df5/numpy-2.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f93bc6892fe7b0663e5ffa83b61aab510aacffd58c16e012bb9352d489d90cb7", size = 12496156 }, + { url = "https://files.pythonhosted.org/packages/51/b0/42807c6e8cce58c00127b1dc24d365305189991f2a7917aa694a109c8d7d/numpy-2.4.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:178de8f87948163d98a4c9ab5bee4ce6519ca918926ec8df195af582de28544d", size = 5324663 }, + { url = "https://files.pythonhosted.org/packages/fe/55/7a621694010d92375ed82f312b2f28017694ed784775269115323e37f5e2/numpy-2.4.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:98b35775e03ab7f868908b524fc0a84d38932d8daf7b7e1c3c3a1b6c7a2c9f15", size = 6645224 }, + { url = "https://files.pythonhosted.org/packages/50/96/9fa8635ed9d7c847d87e30c834f7109fac5e88549d79ef3324ab5c20919f/numpy-2.4.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:941c2a93313d030f219f3a71fd3d91a728b82979a5e8034eb2e60d394a2b83f9", size = 14462352 }, + { url = "https://files.pythonhosted.org/packages/03/d1/8cf62d8bb2062da4fb82dd5d49e47c923f9c0738032f054e0a75342faba7/numpy-2.4.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:529050522e983e00a6c1c6b67411083630de8b57f65e853d7b03d9281b8694d2", size = 16407279 }, + { url = "https://files.pythonhosted.org/packages/86/1c/95c86e17c6b0b31ce6ef219da00f71113b220bcb14938c8d9a05cee0ff53/numpy-2.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2302dc0224c1cbc49bb94f7064f3f923a971bfae45c33870dcbff63a2a550505", size = 16248316 }, + { url = "https://files.pythonhosted.org/packages/30/b4/e7f5ff8697274c9d0fa82398b6a372a27e5cef069b37df6355ccb1f1db1a/numpy-2.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9171a42fcad32dcf3fa86f0a4faa5e9f8facefdb276f54b8b390d90447cff4e2", size = 18329884 }, + { url = "https://files.pythonhosted.org/packages/37/a4/b073f3e9d77f9aec8debe8ca7f9f6a09e888ad1ba7488f0c3b36a94c03ac/numpy-2.4.1-cp313-cp313t-win32.whl", hash = "sha256:382ad67d99ef49024f11d1ce5dcb5ad8432446e4246a4b014418ba3a1175a1f4", size = 6081138 }, + { url = "https://files.pythonhosted.org/packages/16/16/af42337b53844e67752a092481ab869c0523bc95c4e5c98e4dac4e9581ac/numpy-2.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:62fea415f83ad8fdb6c20840578e5fbaf5ddd65e0ec6c3c47eda0f69da172510", size = 12447478 }, + { url = "https://files.pythonhosted.org/packages/6c/f8/fa85b2eac68ec631d0b631abc448552cb17d39afd17ec53dcbcc3537681a/numpy-2.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a7870e8c5fc11aef57d6fea4b4085e537a3a60ad2cdd14322ed531fdca68d261", size = 10382981 }, + { url = "https://files.pythonhosted.org/packages/1b/a7/ef08d25698e0e4b4efbad8d55251d20fe2a15f6d9aa7c9b30cd03c165e6f/numpy-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:3869ea1ee1a1edc16c29bbe3a2f2a4e515cc3a44d43903ad41e0cacdbaf733dc", size = 16652046 }, + { url = "https://files.pythonhosted.org/packages/8f/39/e378b3e3ca13477e5ac70293ec027c438d1927f18637e396fe90b1addd72/numpy-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e867df947d427cdd7a60e3e271729090b0f0df80f5f10ab7dd436f40811699c3", size = 12378858 }, + { url = "https://files.pythonhosted.org/packages/c3/74/7ec6154f0006910ed1fdbb7591cf4432307033102b8a22041599935f8969/numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:e3bd2cb07841166420d2fa7146c96ce00cb3410664cbc1a6be028e456c4ee220", size = 5207417 }, + { url = "https://files.pythonhosted.org/packages/f7/b7/053ac11820d84e42f8feea5cb81cc4fcd1091499b45b1ed8c7415b1bf831/numpy-2.4.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:f0a90aba7d521e6954670550e561a4cb925713bd944445dbe9e729b71f6cabee", size = 6542643 }, + { url = "https://files.pythonhosted.org/packages/c0/c4/2e7908915c0e32ca636b92e4e4a3bdec4cb1e7eb0f8aedf1ed3c68a0d8cd/numpy-2.4.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d558123217a83b2d1ba316b986e9248a1ed1971ad495963d555ccd75dcb1556", size = 14418963 }, + { url = "https://files.pythonhosted.org/packages/eb/c0/3ed5083d94e7ffd7c404e54619c088e11f2e1939a9544f5397f4adb1b8ba/numpy-2.4.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f44de05659b67d20499cbc96d49f2650769afcb398b79b324bb6e297bfe3844", size = 16363811 }, + { url = "https://files.pythonhosted.org/packages/0e/68/42b66f1852bf525050a67315a4fb94586ab7e9eaa541b1bef530fab0c5dd/numpy-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:69e7419c9012c4aaf695109564e3387f1259f001b4326dfa55907b098af082d3", size = 16197643 }, + { url = "https://files.pythonhosted.org/packages/d2/40/e8714fc933d85f82c6bfc7b998a0649ad9769a32f3494ba86598aaf18a48/numpy-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2ffd257026eb1b34352e749d7cc1678b5eeec3e329ad8c9965a797e08ccba205", size = 18289601 }, + { url = "https://files.pythonhosted.org/packages/80/9a/0d44b468cad50315127e884802351723daca7cf1c98d102929468c81d439/numpy-2.4.1-cp314-cp314-win32.whl", hash = "sha256:727c6c3275ddefa0dc078524a85e064c057b4f4e71ca5ca29a19163c607be745", size = 6005722 }, + { url = "https://files.pythonhosted.org/packages/7e/bb/c6513edcce5a831810e2dddc0d3452ce84d208af92405a0c2e58fd8e7881/numpy-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:7d5d7999df434a038d75a748275cd6c0094b0ecdb0837342b332a82defc4dc4d", size = 12438590 }, + { url = "https://files.pythonhosted.org/packages/e9/da/a598d5cb260780cf4d255102deba35c1d072dc028c4547832f45dd3323a8/numpy-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:ce9ce141a505053b3c7bce3216071f3bf5c182b8b28930f14cd24d43932cd2df", size = 10596180 }, + { url = "https://files.pythonhosted.org/packages/de/bc/ea3f2c96fcb382311827231f911723aeff596364eb6e1b6d1d91128aa29b/numpy-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e53170557d37ae404bf8d542ca5b7c629d6efa1117dac6a83e394142ea0a43f", size = 12498774 }, + { url = "https://files.pythonhosted.org/packages/aa/ab/ef9d939fe4a812648c7a712610b2ca6140b0853c5efea361301006c02ae5/numpy-2.4.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:a73044b752f5d34d4232f25f18160a1cc418ea4507f5f11e299d8ac36875f8a0", size = 5327274 }, + { url = "https://files.pythonhosted.org/packages/bd/31/d381368e2a95c3b08b8cf7faac6004849e960f4a042d920337f71cef0cae/numpy-2.4.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:fb1461c99de4d040666ca0444057b06541e5642f800b71c56e6ea92d6a853a0c", size = 6648306 }, + { url = "https://files.pythonhosted.org/packages/c8/e5/0989b44ade47430be6323d05c23207636d67d7362a1796ccbccac6773dd2/numpy-2.4.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:423797bdab2eeefbe608d7c1ec7b2b4fd3c58d51460f1ee26c7500a1d9c9ee93", size = 14464653 }, + { url = "https://files.pythonhosted.org/packages/10/a7/cfbe475c35371cae1358e61f20c5f075badc18c4797ab4354140e1d283cf/numpy-2.4.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52b5f61bdb323b566b528899cc7db2ba5d1015bda7ea811a8bcf3c89c331fa42", size = 16405144 }, + { url = "https://files.pythonhosted.org/packages/f8/a3/0c63fe66b534888fa5177cc7cef061541064dbe2b4b60dcc60ffaf0d2157/numpy-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42d7dd5fa36d16d52a84f821eb96031836fd405ee6955dd732f2023724d0aa01", size = 16247425 }, + { url = "https://files.pythonhosted.org/packages/6b/2b/55d980cfa2c93bd40ff4c290bf824d792bd41d2fe3487b07707559071760/numpy-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7b6b5e28bbd47b7532698e5db2fe1db693d84b58c254e4389d99a27bb9b8f6b", size = 18330053 }, + { url = "https://files.pythonhosted.org/packages/23/12/8b5fc6b9c487a09a7957188e0943c9ff08432c65e34567cabc1623b03a51/numpy-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:5de60946f14ebe15e713a6f22850c2372fa72f4ff9a432ab44aa90edcadaa65a", size = 6152482 }, + { url = "https://files.pythonhosted.org/packages/00/a5/9f8ca5856b8940492fc24fbe13c1bc34d65ddf4079097cf9e53164d094e1/numpy-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:8f085da926c0d491ffff3096f91078cc97ea67e7e6b65e490bc8dcda65663be2", size = 12627117 }, + { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121 }, +] + +[[package]] +name = "nvisy-dal" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "pydantic" }, +] + +[package.optional-dependencies] +all = [ + { name = "aiomysql" }, + { name = "asyncpg" }, + { name = "azure-storage-blob" }, + { name = "boto3" }, + { name = "google-cloud-storage" }, + { name = "pinecone-client" }, + { name = "qdrant-client" }, + { name = "types-boto3" }, +] +azure = [ + { name = "azure-storage-blob" }, +] +dev = [ + { name = "aiomysql" }, + { name = "asyncpg" }, + { name = "azure-storage-blob" }, + { name = "boto3" }, + { name = "google-cloud-storage" }, + { name = "moto" }, + { name = "pinecone-client" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "qdrant-client" }, + { name = "types-boto3" }, +] +gcs = [ + { name = "google-cloud-storage" }, +] +mysql = [ + { name = "aiomysql" }, +] +pinecone = [ + { name = "pinecone-client" }, +] +postgres = [ + { name = "asyncpg" }, +] +qdrant = [ + { name = "qdrant-client" }, +] +s3 = [ + { name = "boto3" }, + { name = "types-boto3" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pyright" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiomysql", marker = "extra == 'mysql'", specifier = ">=0.2" }, + { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30" }, + { name = "azure-storage-blob", marker = "extra == 'azure'", specifier = ">=12.23" }, + { name = "boto3", marker = "extra == 's3'", specifier = ">=1.35" }, + { name = "google-cloud-storage", marker = "extra == 'gcs'", specifier = ">=2.18" }, + { name = "moto", marker = "extra == 'dev'", specifier = ">=5.0" }, + { name = "nvisy-dal", extras = ["all"], marker = "extra == 'dev'" }, + { name = "nvisy-dal", extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone"], marker = "extra == 'all'" }, + { name = "pinecone-client", marker = "extra == 'pinecone'", specifier = ">=5.0" }, + { name = "pydantic", specifier = ">=2.10" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24" }, + { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.12" }, + { name = "types-boto3", marker = "extra == 's3'" }, +] +provides-extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone", "all", "dev"] + +[package.metadata.requires-dev] +dev = [ + { name = "pyright", specifier = ">=1.1.408" }, + { name = "ruff", specifier = ">=0.14.14" }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366 }, +] + +[[package]] +name = "pinecone-client" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "pinecone-plugin-interface" }, + { name = "python-dateutil" }, + { name = "typing-extensions" }, + { name = "urllib3", marker = "python_full_version < '4.0'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6c/ab/3ab3b81e8ad82fbfcaa4f446c7f962b18968d61543c8c9e2c38bd777c056/pinecone_client-6.0.0.tar.gz", hash = "sha256:f224fc999205e4858c4737c40922bdf42d178b361c8859bc486ec00d45b359a9", size = 7004 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/e4/7780cd631dc6dad0172a245e958b41b28a70779594c0790fa08b952aa97f/pinecone_client-6.0.0-py3-none-any.whl", hash = "sha256:d81a9e73cae441e4ab6dfc9c1d8b51c9895dae2488cda64f3e21b9dfc10c8d94", size = 6654 }, +] + +[[package]] +name = "pinecone-plugin-interface" +version = "0.0.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/fb/e8a4063264953ead9e2b24d9b390152c60f042c951c47f4592e9996e57ff/pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846", size = 3370 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/1d/a21fdfcd6d022cb64cef5c2a29ee6691c6c103c4566b41646b080b7536a5/pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8", size = 6249 }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, +] + +[[package]] +name = "portalocker" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/a6/38c8e2f318bf67d338f4d629e93b0b4b9af331f455f0390ea8ce4a099b26/portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968", size = 22424 }, +] + +[[package]] +name = "proto-plus" +version = "1.27.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/89/9cbe2f4bba860e149108b683bc2efec21f14d5f7ed6e25562ad86acbc373/proto_plus-1.27.0.tar.gz", hash = "sha256:873af56dd0d7e91836aee871e5799e1c6f1bda86ac9a983e0bb9f0c266a568c4", size = 56158 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cd/24/3b7a0818484df9c28172857af32c2397b6d8fcd99d9468bd4684f98ebf0a/proto_plus-1.27.0-py3-none-any.whl", hash = "sha256:1baa7f81cf0f8acb8bc1f6d085008ba4171eaf669629d1b6d1673b21ed1c0a82", size = 50205 }, +] + +[[package]] +name = "protobuf" +version = "6.33.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/b8/cda15d9d46d03d4aa3a67cb6bffe05173440ccf86a9541afaf7ac59a1b6b/protobuf-6.33.4.tar.gz", hash = "sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91", size = 444346 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/be/24ef9f3095bacdf95b458543334d0c4908ccdaee5130420bf064492c325f/protobuf-6.33.4-cp310-abi3-win32.whl", hash = "sha256:918966612c8232fc6c24c78e1cd89784307f5814ad7506c308ee3cf86662850d", size = 425612 }, + { url = "https://files.pythonhosted.org/packages/31/ad/e5693e1974a28869e7cd244302911955c1cebc0161eb32dfa2b25b6e96f0/protobuf-6.33.4-cp310-abi3-win_amd64.whl", hash = "sha256:8f11ffae31ec67fc2554c2ef891dcb561dae9a2a3ed941f9e134c2db06657dbc", size = 436962 }, + { url = "https://files.pythonhosted.org/packages/66/15/6ee23553b6bfd82670207ead921f4d8ef14c107e5e11443b04caeb5ab5ec/protobuf-6.33.4-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2fe67f6c014c84f655ee06f6f66213f9254b3a8b6bda6cda0ccd4232c73c06f0", size = 427612 }, + { url = "https://files.pythonhosted.org/packages/2b/48/d301907ce6d0db75f959ca74f44b475a9caa8fcba102d098d3c3dd0f2d3f/protobuf-6.33.4-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:757c978f82e74d75cba88eddec479df9b99a42b31193313b75e492c06a51764e", size = 324484 }, + { url = "https://files.pythonhosted.org/packages/92/1c/e53078d3f7fe710572ab2dcffd993e1e3b438ae71cfc031b71bae44fcb2d/protobuf-6.33.4-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c7c64f259c618f0bef7bee042075e390debbf9682334be2b67408ec7c1c09ee6", size = 339256 }, + { url = "https://files.pythonhosted.org/packages/e8/8e/971c0edd084914f7ee7c23aa70ba89e8903918adca179319ee94403701d5/protobuf-6.33.4-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:3df850c2f8db9934de4cf8f9152f8dc2558f49f298f37f90c517e8e5c84c30e9", size = 323311 }, + { url = "https://files.pythonhosted.org/packages/75/b1/1dc83c2c661b4c62d56cc081706ee33a4fc2835bd90f965baa2663ef7676/protobuf-6.33.4-py3-none-any.whl", hash = "sha256:1fe3730068fcf2e595816a6c34fe66eeedd37d51d0400b72fabc848811fdc1bc", size = 170532 }, +] + +[[package]] +name = "pyasn1" +version = "0.6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/b6/6e630dff89739fcd427e3f72b3d905ce0acb85a45d4ec3e2678718a3487f/pyasn1-0.6.2.tar.gz", hash = "sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b", size = 146586 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/b5/a96872e5184f354da9c84ae119971a0a4c221fe9b27a4d94bd43f2596727/pyasn1-0.6.2-py3-none-any.whl", hash = "sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf", size = 83371 }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259 }, +] + +[[package]] +name = "pycparser" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172 }, +] + +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580 }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990 }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003 }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200 }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578 }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504 }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816 }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366 }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698 }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603 }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591 }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068 }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908 }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145 }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179 }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403 }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206 }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307 }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258 }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917 }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186 }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164 }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146 }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788 }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133 }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852 }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679 }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766 }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005 }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622 }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725 }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040 }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691 }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897 }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302 }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877 }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680 }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960 }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102 }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039 }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126 }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489 }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288 }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255 }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760 }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092 }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385 }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832 }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585 }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078 }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914 }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560 }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244 }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955 }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906 }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607 }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769 }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, +] + +[[package]] +name = "pymysql" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/ae/1fe3fcd9f959efa0ebe200b8de88b5a5ce3e767e38c7ac32fb179f16a388/pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03", size = 48258 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/4c/ad33b92b9864cbde84f259d5df035a6447f91891f5be77788e2a3892bce3/pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9", size = 45300 }, +] + +[[package]] +name = "pyright" +version = "1.1.408" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nodeenv" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/b2/5db700e52554b8f025faa9c3c624c59f1f6c8841ba81ab97641b54322f16/pyright-1.1.408.tar.gz", hash = "sha256:f28f2321f96852fa50b5829ea492f6adb0e6954568d1caa3f3af3a5f555eb684", size = 4400578 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/82/a2c93e32800940d9573fb28c346772a14778b84ba7524e691b324620ab89/pyright-1.1.408-py3-none-any.whl", hash = "sha256:090b32865f4fdb1e0e6cd82bf5618480d48eecd2eb2e70f960982a3d9a4c17c1", size = 6399144 }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801 }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075 }, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, +] + +[[package]] +name = "pywin32" +version = "311" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543 }, + { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040 }, + { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102 }, + { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700 }, + { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700 }, + { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318 }, + { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714 }, + { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800 }, + { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669 }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252 }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081 }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159 }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626 }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613 }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115 }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427 }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090 }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246 }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814 }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809 }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454 }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355 }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175 }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228 }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194 }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429 }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912 }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108 }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641 }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901 }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132 }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261 }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272 }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923 }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062 }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 }, +] + +[[package]] +name = "qdrant-client" +version = "1.16.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "httpx", extra = ["http2"] }, + { name = "numpy" }, + { name = "portalocker" }, + { name = "protobuf" }, + { name = "pydantic" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ca/7d/3cd10e26ae97b35cf856ca1dc67576e42414ae39502c51165bb36bb1dff8/qdrant_client-1.16.2.tar.gz", hash = "sha256:ca4ef5f9be7b5eadeec89a085d96d5c723585a391eb8b2be8192919ab63185f0", size = 331112 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/13/8ce16f808297e16968269de44a14f4fef19b64d9766be1d6ba5ba78b579d/qdrant_client-1.16.2-py3-none-any.whl", hash = "sha256:442c7ef32ae0f005e88b5d3c0783c63d4912b97ae756eb5e052523be682f17d3", size = 377186 }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738 }, +] + +[[package]] +name = "responses" +version = "0.25.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/95/89c054ad70bfef6da605338b009b2e283485835351a9935c7bfbfaca7ffc/responses-0.25.8.tar.gz", hash = "sha256:9374d047a575c8f781b94454db5cab590b6029505f488d12899ddb10a4af1cf4", size = 79320 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1c/4c/cc276ce57e572c102d9542d383b2cfd551276581dc60004cb94fe8774c11/responses-0.25.8-py3-none-any.whl", hash = "sha256:0c710af92def29c8352ceadff0c3fe340ace27cf5af1bbe46fb71275bcd2831c", size = 34769 }, +] + +[[package]] +name = "rsa" +version = "4.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/da/8a/22b7beea3ee0d44b1916c0c1cb0ee3af23b700b6da9f04991899d0c555d4/rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75", size = 29034 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/8d/0133e4eb4beed9e425d9a98ed6e081a55d195481b7632472be1af08d2f6b/rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762", size = 34696 }, +] + +[[package]] +name = "ruff" +version = "0.14.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650 }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245 }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273 }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753 }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052 }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637 }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761 }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701 }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455 }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882 }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549 }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416 }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491 }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525 }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626 }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442 }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486 }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448 }, +] + +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830 }, +] + +[[package]] +name = "six" +version = "1.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 }, +] + +[[package]] +name = "types-awscrt" +version = "0.31.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/97/be/589b7bba42b5681a72bac4d714287afef4e1bb84d07c859610ff631d449e/types_awscrt-0.31.1.tar.gz", hash = "sha256:08b13494f93f45c1a92eb264755fce50ed0d1dc75059abb5e31670feb9a09724", size = 17839 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/fd/ddca80617f230bd833f99b4fb959abebffd8651f520493cae2e96276b1bd/types_awscrt-0.31.1-py3-none-any.whl", hash = "sha256:7e4364ac635f72bd57f52b093883640b1448a6eded0ecbac6e900bf4b1e4777b", size = 42516 }, +] + +[[package]] +name = "types-boto3" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore-stubs" }, + { name = "types-s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/59/d7/3e2722311c9405cfcb0105d87b3e17a1c6ad6a5caab76d58b73d2983597e/types_boto3-1.42.34.tar.gz", hash = "sha256:86caec7ba201047ec78b170f87442cfe8ce288ce61199ea53bad255b33e8e00b", size = 101284 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/af/93/3edb443030240e0a452bb16420516ccca451fca38f8dc22ac95cf96fea29/types_boto3-1.42.34-py3-none-any.whl", hash = "sha256:a1ec9aad643b0f8455257ba12141f4f4f38875c586f00ff0fc96dfe75aa9d1b5", size = 69674 }, +] + +[[package]] +name = "types-s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/64/42689150509eb3e6e82b33ee3d89045de1592488842ddf23c56957786d05/types_s3transfer-0.16.0.tar.gz", hash = "sha256:b4636472024c5e2b62278c5b759661efeb52a81851cde5f092f24100b1ecb443", size = 13557 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/27/e88220fe6274eccd3bdf95d9382918716d312f6f6cef6a46332d1ee2feff/types_s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:1c0cd111ecf6e21437cb410f5cddb631bfb2263b77ad973e79b9c6d0cb24e0ef", size = 19247 }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611 }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584 }, +] + +[[package]] +name = "werkzeug" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/70/1469ef1d3542ae7c2c7b72bd5e3a4e6ee69d7978fa8a3af05a38eca5becf/werkzeug-3.1.5.tar.gz", hash = "sha256:6a548b0e88955dd07ccb25539d7d0cc97417ee9e179677d22c7041c8f078ce67", size = 864754 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025 }, +] + +[[package]] +name = "xmltodict" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/aa/917ceeed4dbb80d2f04dbd0c784b7ee7bba8ae5a54837ef0e5e062cd3cfb/xmltodict-1.0.2.tar.gz", hash = "sha256:54306780b7c2175a3967cad1db92f218207e5bc1aba697d887807c0fb68b7649", size = 25725 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/20/69a0e6058bc5ea74892d089d64dfc3a62ba78917ec5e2cfa70f7c92ba3a5/xmltodict-1.0.2-py3-none-any.whl", hash = "sha256:62d0fddb0dcbc9f642745d8bbf4d81fd17d6dfaec5a15b5c1876300aad92af0d", size = 13893 }, +] diff --git a/packages/nvisy-rig/py.typed b/packages/nvisy-rig/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/nvisy-rig/pyproject.toml b/packages/nvisy-rig/pyproject.toml new file mode 100644 index 0000000..ed2c6a9 --- /dev/null +++ b/packages/nvisy-rig/pyproject.toml @@ -0,0 +1,51 @@ +[project] +name = "nvisy-rig" +version = "0.1.0" +description = "AI/LLM orchestration layer" +requires-python = ">=3.12" +dependencies = [ + "pydantic>=2.10", + "nvisy-dal", +] + +[project.optional-dependencies] +openai = ["openai>=1.60"] +anthropic = ["anthropic>=0.40"] +cohere = ["cohere>=5.13"] +all = ["nvisy-rig[openai,anthropic,cohere]"] +dev = ["nvisy-rig[all]", "pytest>=8.0", "pytest-asyncio>=0.24"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/nvisy_rig"] + +[tool.uv.sources] +nvisy-dal = { path = "../nvisy-dal", editable = true } + +[tool.ruff] +target-version = "py312" +line-length = 100 + +[tool.ruff.lint] +select = ["ALL"] +ignore = ["D", "COM812", "ISC001"] + +[tool.ruff.lint.isort] +known-first-party = ["nvisy_rig", "nvisy_dal"] + +[tool.basedpyright] +pythonVersion = "3.12" +typeCheckingMode = "strict" + +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" + +[dependency-groups] +dev = [ + "pyright>=1.1.408", + "ruff>=0.14.14", +] diff --git a/packages/nvisy-rig/pyrightconfig.json b/packages/nvisy-rig/pyrightconfig.json new file mode 100644 index 0000000..8fd8643 --- /dev/null +++ b/packages/nvisy-rig/pyrightconfig.json @@ -0,0 +1,4 @@ +{ + "venvPath": ".", + "venv": ".venv" +} diff --git a/packages/nvisy-rig/src/nvisy_rig/__init__.py b/packages/nvisy-rig/src/nvisy_rig/__init__.py new file mode 100644 index 0000000..bf3ebc2 --- /dev/null +++ b/packages/nvisy-rig/src/nvisy_rig/__init__.py @@ -0,0 +1 @@ +"""AI/LLM orchestration layer.""" diff --git a/packages/nvisy-rig/src/nvisy_rig/_generated/__init__.py b/packages/nvisy-rig/src/nvisy_rig/_generated/__init__.py new file mode 100644 index 0000000..5aa7e6e --- /dev/null +++ b/packages/nvisy-rig/src/nvisy_rig/_generated/__init__.py @@ -0,0 +1 @@ +"""Generated types from Rust JSON schemas.""" diff --git a/packages/nvisy-rig/src/nvisy_rig/agents/__init__.py b/packages/nvisy-rig/src/nvisy_rig/agents/__init__.py new file mode 100644 index 0000000..511d50c --- /dev/null +++ b/packages/nvisy-rig/src/nvisy_rig/agents/__init__.py @@ -0,0 +1 @@ +"""Agent implementations.""" diff --git a/packages/nvisy-rig/uv.lock b/packages/nvisy-rig/uv.lock new file mode 100644 index 0000000..ddb9851 --- /dev/null +++ b/packages/nvisy-rig/uv.lock @@ -0,0 +1,858 @@ +version = 1 +revision = 1 +requires-python = ">=3.12" + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, +] + +[[package]] +name = "anthropic" +version = "0.76.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "docstring-parser" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6e/be/d11abafaa15d6304826438170f7574d750218f49a106c54424a40cef4494/anthropic-0.76.0.tar.gz", hash = "sha256:e0cae6a368986d5cf6df743dfbb1b9519e6a9eee9c6c942ad8121c0b34416ffe", size = 495483 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/70/7b0fd9c1a738f59d3babe2b4212031c34ab7d0fda4ffef15b58a55c5bcea/anthropic-0.76.0-py3-none-any.whl", hash = "sha256:81efa3113901192af2f0fe977d3ec73fdadb1e691586306c4256cd6d5ccc331c", size = 390309 }, +] + +[[package]] +name = "anyio" +version = "4.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592 }, +] + +[[package]] +name = "certifi" +version = "2026.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425 }, + { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162 }, + { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558 }, + { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497 }, + { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240 }, + { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471 }, + { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864 }, + { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647 }, + { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110 }, + { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839 }, + { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667 }, + { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535 }, + { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816 }, + { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694 }, + { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131 }, + { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390 }, + { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091 }, + { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936 }, + { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180 }, + { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346 }, + { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874 }, + { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076 }, + { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601 }, + { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376 }, + { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825 }, + { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583 }, + { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366 }, + { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300 }, + { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465 }, + { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404 }, + { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092 }, + { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408 }, + { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746 }, + { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889 }, + { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641 }, + { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779 }, + { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035 }, + { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542 }, + { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524 }, + { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395 }, + { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680 }, + { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045 }, + { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687 }, + { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014 }, + { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044 }, + { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940 }, + { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104 }, + { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743 }, + { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402 }, +] + +[[package]] +name = "click" +version = "8.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274 }, +] + +[[package]] +name = "cohere" +version = "5.20.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastavro" }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "pydantic-core" }, + { name = "requests" }, + { name = "tokenizers" }, + { name = "types-requests" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/52/08564d1820970010d30421cd6e36f2e4ca552646504d3fe532eef282c88d/cohere-5.20.2.tar.gz", hash = "sha256:0aa9f3735626b70eedf15c231c61f3a58e7f8bbe5f0509fe7b2e6606c5d420f1", size = 180820 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/10/d76f045eefe42fb3f4e271d17ab41b5e73a3b6de69c98e15ab1cb0c8e6f6/cohere-5.20.2-py3-none-any.whl", hash = "sha256:26156d83bf3e3e4475e4caa1d8c4148475c5b0a253aee6066d83c643e9045be6", size = 318986 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, +] + +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896 }, +] + +[[package]] +name = "fastavro" +version = "1.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/8b/fa2d3287fd2267be6261d0177c6809a7fa12c5600ddb33490c8dc29e77b2/fastavro-1.12.1.tar.gz", hash = "sha256:2f285be49e45bc047ab2f6bed040bb349da85db3f3c87880e4b92595ea093b2b", size = 1025661 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/f0/10bd1a3d08667fa0739e2b451fe90e06df575ec8b8ba5d3135c70555c9bd/fastavro-1.12.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:509818cb24b98a804fc80be9c5fed90f660310ae3d59382fc811bfa187122167", size = 1009057 }, + { url = "https://files.pythonhosted.org/packages/78/ad/0d985bc99e1fa9e74c636658000ba38a5cd7f5ab2708e9c62eaf736ecf1a/fastavro-1.12.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:089e155c0c76e0d418d7e79144ce000524dd345eab3bc1e9c5ae69d500f71b14", size = 3391866 }, + { url = "https://files.pythonhosted.org/packages/0d/9e/b4951dc84ebc34aac69afcbfbb22ea4a91080422ec2bfd2c06076ff1d419/fastavro-1.12.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44cbff7518901c91a82aab476fcab13d102e4999499df219d481b9e15f61af34", size = 3458005 }, + { url = "https://files.pythonhosted.org/packages/af/f8/5a8df450a9f55ca8441f22ea0351d8c77809fc121498b6970daaaf667a21/fastavro-1.12.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a275e48df0b1701bb764b18a8a21900b24cf882263cb03d35ecdba636bbc830b", size = 3295258 }, + { url = "https://files.pythonhosted.org/packages/99/b2/40f25299111d737e58b85696e91138a66c25b7334f5357e7ac2b0e8966f8/fastavro-1.12.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2de72d786eb38be6b16d556b27232b1bf1b2797ea09599507938cdb7a9fe3e7c", size = 3430328 }, + { url = "https://files.pythonhosted.org/packages/e0/07/85157a7c57c5f8b95507d7829b5946561e5ee656ff80e9dd9a757f53ddaf/fastavro-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:9090f0dee63fe022ee9cc5147483366cc4171c821644c22da020d6b48f576b4f", size = 444140 }, + { url = "https://files.pythonhosted.org/packages/bb/57/26d5efef9182392d5ac9f253953c856ccb66e4c549fd3176a1e94efb05c9/fastavro-1.12.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:78df838351e4dff9edd10a1c41d1324131ffecbadefb9c297d612ef5363c049a", size = 1000599 }, + { url = "https://files.pythonhosted.org/packages/33/cb/8ab55b21d018178eb126007a56bde14fd01c0afc11d20b5f2624fe01e698/fastavro-1.12.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:780476c23175d2ae457c52f45b9ffa9d504593499a36cd3c1929662bf5b7b14b", size = 3335933 }, + { url = "https://files.pythonhosted.org/packages/fe/03/9c94ec9bf873eb1ffb0aa694f4e71940154e6e9728ddfdc46046d7e8ced4/fastavro-1.12.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0714b285160fcd515eb0455540f40dd6dac93bdeacdb03f24e8eac3d8aa51f8d", size = 3402066 }, + { url = "https://files.pythonhosted.org/packages/75/c8/cb472347c5a584ccb8777a649ebb28278fccea39d005fc7df19996f41df8/fastavro-1.12.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a8bc2dcec5843d499f2489bfe0747999108f78c5b29295d877379f1972a3d41a", size = 3240038 }, + { url = "https://files.pythonhosted.org/packages/e1/77/569ce9474c40304b3a09e109494e020462b83e405545b78069ddba5f614e/fastavro-1.12.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3b1921ac35f3d89090a5816b626cf46e67dbecf3f054131f84d56b4e70496f45", size = 3369398 }, + { url = "https://files.pythonhosted.org/packages/4a/1f/9589e35e9ea68035385db7bdbf500d36b8891db474063fb1ccc8215ee37c/fastavro-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:5aa777b8ee595b50aa084104cd70670bf25a7bbb9fd8bb5d07524b0785ee1699", size = 444220 }, + { url = "https://files.pythonhosted.org/packages/6c/d2/78435fe737df94bd8db2234b2100f5453737cffd29adee2504a2b013de84/fastavro-1.12.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:c3d67c47f177e486640404a56f2f50b165fe892cc343ac3a34673b80cc7f1dd6", size = 1086611 }, + { url = "https://files.pythonhosted.org/packages/b6/be/428f99b10157230ddac77ec8cc167005b29e2bd5cbe228345192bb645f30/fastavro-1.12.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5217f773492bac43dae15ff2931432bce2d7a80be7039685a78d3fab7df910bd", size = 3541001 }, + { url = "https://files.pythonhosted.org/packages/16/08/a2eea4f20b85897740efe44887e1ac08f30dfa4bfc3de8962bdcbb21a5a1/fastavro-1.12.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:469fecb25cba07f2e1bfa4c8d008477cd6b5b34a59d48715e1b1a73f6160097d", size = 3432217 }, + { url = "https://files.pythonhosted.org/packages/87/bb/b4c620b9eb6e9838c7f7e4b7be0762834443adf9daeb252a214e9ad3178c/fastavro-1.12.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d71c8aa841ef65cfab709a22bb887955f42934bced3ddb571e98fdbdade4c609", size = 3366742 }, + { url = "https://files.pythonhosted.org/packages/3d/d1/e69534ccdd5368350646fea7d93be39e5f77c614cca825c990bd9ca58f67/fastavro-1.12.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:b81fc04e85dfccf7c028e0580c606e33aa8472370b767ef058aae2c674a90746", size = 3383743 }, + { url = "https://files.pythonhosted.org/packages/58/54/b7b4a0c3fb5fcba38128542da1b26c4e6d69933c923f493548bdfd63ab6a/fastavro-1.12.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:9445da127751ba65975d8e4bdabf36bfcfdad70fc35b2d988e3950cce0ec0e7c", size = 1001377 }, + { url = "https://files.pythonhosted.org/packages/1e/4f/0e589089c7df0d8f57d7e5293fdc34efec9a3b758a0d4d0c99a7937e2492/fastavro-1.12.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed924233272719b5d5a6a0b4d80ef3345fc7e84fc7a382b6232192a9112d38a6", size = 3320401 }, + { url = "https://files.pythonhosted.org/packages/f9/19/260110d56194ae29d7e423a336fccea8bcd103196d00f0b364b732bdb84e/fastavro-1.12.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3616e2f0e1c9265e92954fa099db79c6e7817356d3ff34f4bcc92699ae99697c", size = 3350894 }, + { url = "https://files.pythonhosted.org/packages/d0/96/58b0411e8be9694d5972bee3167d6c1fd1fdfdf7ce253c1a19a327208f4f/fastavro-1.12.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cb0337b42fd3c047fcf0e9b7597bd6ad25868de719f29da81eabb6343f08d399", size = 3229644 }, + { url = "https://files.pythonhosted.org/packages/5b/db/38660660eac82c30471d9101f45b3acfdcbadfe42d8f7cdb129459a45050/fastavro-1.12.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:64961ab15b74b7c168717bbece5660e0f3d457837c3cc9d9145181d011199fa7", size = 3329704 }, + { url = "https://files.pythonhosted.org/packages/9d/a9/1672910f458ecb30b596c9e59e41b7c00309b602a0494341451e92e62747/fastavro-1.12.1-cp314-cp314-win_amd64.whl", hash = "sha256:792356d320f6e757e89f7ac9c22f481e546c886454a6709247f43c0dd7058004", size = 452911 }, + { url = "https://files.pythonhosted.org/packages/dc/8d/2e15d0938ded1891b33eff252e8500605508b799c2e57188a933f0bd744c/fastavro-1.12.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:120aaf82ac19d60a1016afe410935fe94728752d9c2d684e267e5b7f0e70f6d9", size = 3541999 }, + { url = "https://files.pythonhosted.org/packages/a7/1c/6dfd082a205be4510543221b734b1191299e6a1810c452b6bc76dfa6968e/fastavro-1.12.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6a3462934b20a74f9ece1daa49c2e4e749bd9a35fa2657b53bf62898fba80f5", size = 3433972 }, + { url = "https://files.pythonhosted.org/packages/24/90/9de694625a1a4b727b1ad0958d220cab25a9b6cf7f16a5c7faa9ea7b2261/fastavro-1.12.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1f81011d54dd47b12437b51dd93a70a9aa17b61307abf26542fc3c13efbc6c51", size = 3368752 }, + { url = "https://files.pythonhosted.org/packages/fa/93/b44f67589e4d439913dab6720f7e3507b0fa8b8e56d06f6fc875ced26afb/fastavro-1.12.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:43ded16b3f4a9f1a42f5970c2aa618acb23ea59c4fcaa06680bdf470b255e5a8", size = 3386636 }, +] + +[[package]] +name = "filelock" +version = "3.20.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/65/ce7f1b70157833bf3cb851b556a37d4547ceafc158aa9b34b36782f23696/filelock-3.20.3.tar.gz", hash = "sha256:18c57ee915c7ec61cff0ecf7f0f869936c7c30191bb0cf406f1341778d0834e1", size = 19485 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/36/7fb70f04bf00bc646cd5bb45aa9eddb15e19437a28b8fb2b4a5249fac770/filelock-3.20.3-py3-none-any.whl", hash = "sha256:4b0dda527ee31078689fc205ec4f1c1bf7d56cf88b6dc9426c4f230e46c2dce1", size = 16701 }, +] + +[[package]] +name = "fsspec" +version = "2026.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/7d/5df2650c57d47c57232af5ef4b4fdbff182070421e405e0d62c6cdbfaa87/fsspec-2026.1.0.tar.gz", hash = "sha256:e987cb0496a0d81bba3a9d1cee62922fb395e7d4c3b575e57f547953334fe07b", size = 310496 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/c9/97cc5aae1648dcb851958a3ddf73ccd7dbe5650d95203ecb4d7720b4cdbf/fsspec-2026.1.0-py3-none-any.whl", hash = "sha256:cb76aa913c2285a3b49bdd5fc55b1d7c708d7208126b60f2eb8194fe1b4cbdcc", size = 201838 }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 }, +] + +[[package]] +name = "hf-xet" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870 }, + { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584 }, + { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004 }, + { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636 }, + { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448 }, + { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401 }, + { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866 }, + { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861 }, + { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699 }, + { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885 }, + { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550 }, + { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010 }, + { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264 }, + { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071 }, + { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099 }, + { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178 }, + { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214 }, + { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054 }, + { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812 }, + { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920 }, + { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735 }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784 }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, +] + +[[package]] +name = "huggingface-hub" +version = "1.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "shellingham" }, + { name = "tqdm" }, + { name = "typer-slim" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/25/74af9d16cd59ae15b12467a79a84aa0fe24be4aba68fc4da0c1864d49c17/huggingface_hub-1.3.4.tar.gz", hash = "sha256:c20d5484a611b7b7891d272e8fc9f77d5de025b0480bdacfa858efb3780b455f", size = 627683 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/07/3d0c34c345043c6a398a5882e196b2220dc5861adfa18322448b90908f26/huggingface_hub-1.3.4-py3-none-any.whl", hash = "sha256:a0c526e76eb316e96a91e8a1a7a93cf66b0dd210be1a17bd5fc5ae53cba76bfd", size = 536611 }, +] + +[[package]] +name = "idna" +version = "3.11" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008 }, +] + +[[package]] +name = "iniconfig" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, +] + +[[package]] +name = "jiter" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/45/9d/e0660989c1370e25848bb4c52d061c71837239738ad937e83edca174c273/jiter-0.12.0.tar.gz", hash = "sha256:64dfcd7d5c168b38d3f9f8bba7fc639edb3418abcc74f22fdbe6b8938293f30b", size = 168294 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/c9/5b9f7b4983f1b542c64e84165075335e8a236fa9e2ea03a0c79780062be8/jiter-0.12.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:305e061fa82f4680607a775b2e8e0bcb071cd2205ac38e6ef48c8dd5ebe1cf37", size = 314449 }, + { url = "https://files.pythonhosted.org/packages/98/6e/e8efa0e78de00db0aee82c0cf9e8b3f2027efd7f8a71f859d8f4be8e98ef/jiter-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c1860627048e302a528333c9307c818c547f214d8659b0705d2195e1a94b274", size = 319855 }, + { url = "https://files.pythonhosted.org/packages/20/26/894cd88e60b5d58af53bec5c6759d1292bd0b37a8b5f60f07abf7a63ae5f/jiter-0.12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df37577a4f8408f7e0ec3205d2a8f87672af8f17008358063a4d6425b6081ce3", size = 350171 }, + { url = "https://files.pythonhosted.org/packages/f5/27/a7b818b9979ac31b3763d25f3653ec3a954044d5e9f5d87f2f247d679fd1/jiter-0.12.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fdd787356c1c13a4f40b43c2156276ef7a71eb487d98472476476d803fb2cf", size = 365590 }, + { url = "https://files.pythonhosted.org/packages/ba/7e/e46195801a97673a83746170b17984aa8ac4a455746354516d02ca5541b4/jiter-0.12.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1eb5db8d9c65b112aacf14fcd0faae9913d07a8afea5ed06ccdd12b724e966a1", size = 479462 }, + { url = "https://files.pythonhosted.org/packages/ca/75/f833bfb009ab4bd11b1c9406d333e3b4357709ed0570bb48c7c06d78c7dd/jiter-0.12.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:73c568cc27c473f82480abc15d1301adf333a7ea4f2e813d6a2c7d8b6ba8d0df", size = 378983 }, + { url = "https://files.pythonhosted.org/packages/71/b3/7a69d77943cc837d30165643db753471aff5df39692d598da880a6e51c24/jiter-0.12.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4321e8a3d868919bcb1abb1db550d41f2b5b326f72df29e53b2df8b006eb9403", size = 361328 }, + { url = "https://files.pythonhosted.org/packages/b0/ac/a78f90caf48d65ba70d8c6efc6f23150bc39dc3389d65bbec2a95c7bc628/jiter-0.12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0a51bad79f8cc9cac2b4b705039f814049142e0050f30d91695a2d9a6611f126", size = 386740 }, + { url = "https://files.pythonhosted.org/packages/39/b6/5d31c2cc8e1b6a6bcf3c5721e4ca0a3633d1ab4754b09bc7084f6c4f5327/jiter-0.12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2a67b678f6a5f1dd6c36d642d7db83e456bc8b104788262aaefc11a22339f5a9", size = 520875 }, + { url = "https://files.pythonhosted.org/packages/30/b5/4df540fae4e9f68c54b8dab004bd8c943a752f0b00efd6e7d64aa3850339/jiter-0.12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efe1a211fe1fd14762adea941e3cfd6c611a136e28da6c39272dbb7a1bbe6a86", size = 511457 }, + { url = "https://files.pythonhosted.org/packages/07/65/86b74010e450a1a77b2c1aabb91d4a91dd3cd5afce99f34d75fd1ac64b19/jiter-0.12.0-cp312-cp312-win32.whl", hash = "sha256:d779d97c834b4278276ec703dc3fc1735fca50af63eb7262f05bdb4e62203d44", size = 204546 }, + { url = "https://files.pythonhosted.org/packages/1c/c7/6659f537f9562d963488e3e55573498a442503ced01f7e169e96a6110383/jiter-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:e8269062060212b373316fe69236096aaf4c49022d267c6736eebd66bbbc60bb", size = 205196 }, + { url = "https://files.pythonhosted.org/packages/21/f4/935304f5169edadfec7f9c01eacbce4c90bb9a82035ac1de1f3bd2d40be6/jiter-0.12.0-cp312-cp312-win_arm64.whl", hash = "sha256:06cb970936c65de926d648af0ed3d21857f026b1cf5525cb2947aa5e01e05789", size = 186100 }, + { url = "https://files.pythonhosted.org/packages/3d/a6/97209693b177716e22576ee1161674d1d58029eb178e01866a0422b69224/jiter-0.12.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6cc49d5130a14b732e0612bc76ae8db3b49898732223ef8b7599aa8d9810683e", size = 313658 }, + { url = "https://files.pythonhosted.org/packages/06/4d/125c5c1537c7d8ee73ad3d530a442d6c619714b95027143f1b61c0b4dfe0/jiter-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:37f27a32ce36364d2fa4f7fdc507279db604d27d239ea2e044c8f148410defe1", size = 318605 }, + { url = "https://files.pythonhosted.org/packages/99/bf/a840b89847885064c41a5f52de6e312e91fa84a520848ee56c97e4fa0205/jiter-0.12.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbc0944aa3d4b4773e348cda635252824a78f4ba44328e042ef1ff3f6080d1cf", size = 349803 }, + { url = "https://files.pythonhosted.org/packages/8a/88/e63441c28e0db50e305ae23e19c1d8fae012d78ed55365da392c1f34b09c/jiter-0.12.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da25c62d4ee1ffbacb97fac6dfe4dcd6759ebdc9015991e92a6eae5816287f44", size = 365120 }, + { url = "https://files.pythonhosted.org/packages/0a/7c/49b02714af4343970eb8aca63396bc1c82fa01197dbb1e9b0d274b550d4e/jiter-0.12.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:048485c654b838140b007390b8182ba9774621103bd4d77c9c3f6f117474ba45", size = 479918 }, + { url = "https://files.pythonhosted.org/packages/69/ba/0a809817fdd5a1db80490b9150645f3aae16afad166960bcd562be194f3b/jiter-0.12.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:635e737fbb7315bef0037c19b88b799143d2d7d3507e61a76751025226b3ac87", size = 379008 }, + { url = "https://files.pythonhosted.org/packages/5f/c3/c9fc0232e736c8877d9e6d83d6eeb0ba4e90c6c073835cc2e8f73fdeef51/jiter-0.12.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e017c417b1ebda911bd13b1e40612704b1f5420e30695112efdbed8a4b389ed", size = 361785 }, + { url = "https://files.pythonhosted.org/packages/96/61/61f69b7e442e97ca6cd53086ddc1cf59fb830549bc72c0a293713a60c525/jiter-0.12.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:89b0bfb8b2bf2351fba36bb211ef8bfceba73ef58e7f0c68fb67b5a2795ca2f9", size = 386108 }, + { url = "https://files.pythonhosted.org/packages/e9/2e/76bb3332f28550c8f1eba3bf6e5efe211efda0ddbbaf24976bc7078d42a5/jiter-0.12.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:f5aa5427a629a824a543672778c9ce0c5e556550d1569bb6ea28a85015287626", size = 519937 }, + { url = "https://files.pythonhosted.org/packages/84/d6/fa96efa87dc8bff2094fb947f51f66368fa56d8d4fc9e77b25d7fbb23375/jiter-0.12.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed53b3d6acbcb0fd0b90f20c7cb3b24c357fe82a3518934d4edfa8c6898e498c", size = 510853 }, + { url = "https://files.pythonhosted.org/packages/8a/28/93f67fdb4d5904a708119a6ab58a8f1ec226ff10a94a282e0215402a8462/jiter-0.12.0-cp313-cp313-win32.whl", hash = "sha256:4747de73d6b8c78f2e253a2787930f4fffc68da7fa319739f57437f95963c4de", size = 204699 }, + { url = "https://files.pythonhosted.org/packages/c4/1f/30b0eb087045a0abe2a5c9c0c0c8da110875a1d3be83afd4a9a4e548be3c/jiter-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:e25012eb0c456fcc13354255d0338cd5397cce26c77b2832b3c4e2e255ea5d9a", size = 204258 }, + { url = "https://files.pythonhosted.org/packages/2c/f4/2b4daf99b96bce6fc47971890b14b2a36aef88d7beb9f057fafa032c6141/jiter-0.12.0-cp313-cp313-win_arm64.whl", hash = "sha256:c97b92c54fe6110138c872add030a1f99aea2401ddcdaa21edf74705a646dd60", size = 185503 }, + { url = "https://files.pythonhosted.org/packages/39/ca/67bb15a7061d6fe20b9b2a2fd783e296a1e0f93468252c093481a2f00efa/jiter-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:53839b35a38f56b8be26a7851a48b89bc47e5d88e900929df10ed93b95fea3d6", size = 317965 }, + { url = "https://files.pythonhosted.org/packages/18/af/1788031cd22e29c3b14bc6ca80b16a39a0b10e611367ffd480c06a259831/jiter-0.12.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94f669548e55c91ab47fef8bddd9c954dab1938644e715ea49d7e117015110a4", size = 345831 }, + { url = "https://files.pythonhosted.org/packages/05/17/710bf8472d1dff0d3caf4ced6031060091c1320f84ee7d5dcbed1f352417/jiter-0.12.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:351d54f2b09a41600ffea43d081522d792e81dcfb915f6d2d242744c1cc48beb", size = 361272 }, + { url = "https://files.pythonhosted.org/packages/fb/f1/1dcc4618b59761fef92d10bcbb0b038b5160be653b003651566a185f1a5c/jiter-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2a5e90604620f94bf62264e7c2c038704d38217b7465b863896c6d7c902b06c7", size = 204604 }, + { url = "https://files.pythonhosted.org/packages/d9/32/63cb1d9f1c5c6632a783c0052cde9ef7ba82688f7065e2f0d5f10a7e3edb/jiter-0.12.0-cp313-cp313t-win_arm64.whl", hash = "sha256:88ef757017e78d2860f96250f9393b7b577b06a956ad102c29c8237554380db3", size = 185628 }, + { url = "https://files.pythonhosted.org/packages/a8/99/45c9f0dbe4a1416b2b9a8a6d1236459540f43d7fb8883cff769a8db0612d/jiter-0.12.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c46d927acd09c67a9fb1416df45c5a04c27e83aae969267e98fba35b74e99525", size = 312478 }, + { url = "https://files.pythonhosted.org/packages/4c/a7/54ae75613ba9e0f55fcb0bc5d1f807823b5167cc944e9333ff322e9f07dd/jiter-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:774ff60b27a84a85b27b88cd5583899c59940bcc126caca97eb2a9df6aa00c49", size = 318706 }, + { url = "https://files.pythonhosted.org/packages/59/31/2aa241ad2c10774baf6c37f8b8e1f39c07db358f1329f4eb40eba179c2a2/jiter-0.12.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5433fab222fb072237df3f637d01b81f040a07dcac1cb4a5c75c7aa9ed0bef1", size = 351894 }, + { url = "https://files.pythonhosted.org/packages/54/4f/0f2759522719133a9042781b18cc94e335b6d290f5e2d3e6899d6af933e3/jiter-0.12.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f8c593c6e71c07866ec6bfb790e202a833eeec885022296aff6b9e0b92d6a70e", size = 365714 }, + { url = "https://files.pythonhosted.org/packages/dc/6f/806b895f476582c62a2f52c453151edd8a0fde5411b0497baaa41018e878/jiter-0.12.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:90d32894d4c6877a87ae00c6b915b609406819dce8bc0d4e962e4de2784e567e", size = 478989 }, + { url = "https://files.pythonhosted.org/packages/86/6c/012d894dc6e1033acd8db2b8346add33e413ec1c7c002598915278a37f79/jiter-0.12.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:798e46eed9eb10c3adbbacbd3bdb5ecd4cf7064e453d00dbef08802dae6937ff", size = 378615 }, + { url = "https://files.pythonhosted.org/packages/87/30/d718d599f6700163e28e2c71c0bbaf6dace692e7df2592fd793ac9276717/jiter-0.12.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3f1368f0a6719ea80013a4eb90ba72e75d7ea67cfc7846db2ca504f3df0169a", size = 364745 }, + { url = "https://files.pythonhosted.org/packages/8f/85/315b45ce4b6ddc7d7fceca24068543b02bdc8782942f4ee49d652e2cc89f/jiter-0.12.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65f04a9d0b4406f7e51279710b27484af411896246200e461d80d3ba0caa901a", size = 386502 }, + { url = "https://files.pythonhosted.org/packages/74/0b/ce0434fb40c5b24b368fe81b17074d2840748b4952256bab451b72290a49/jiter-0.12.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:fd990541982a24281d12b67a335e44f117e4c6cbad3c3b75c7dea68bf4ce3a67", size = 519845 }, + { url = "https://files.pythonhosted.org/packages/e8/a3/7a7a4488ba052767846b9c916d208b3ed114e3eb670ee984e4c565b9cf0d/jiter-0.12.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:b111b0e9152fa7df870ecaebb0bd30240d9f7fff1f2003bcb4ed0f519941820b", size = 510701 }, + { url = "https://files.pythonhosted.org/packages/c3/16/052ffbf9d0467b70af24e30f91e0579e13ded0c17bb4a8eb2aed3cb60131/jiter-0.12.0-cp314-cp314-win32.whl", hash = "sha256:a78befb9cc0a45b5a5a0d537b06f8544c2ebb60d19d02c41ff15da28a9e22d42", size = 205029 }, + { url = "https://files.pythonhosted.org/packages/e4/18/3cf1f3f0ccc789f76b9a754bdb7a6977e5d1d671ee97a9e14f7eb728d80e/jiter-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:e1fe01c082f6aafbe5c8faf0ff074f38dfb911d53f07ec333ca03f8f6226debf", size = 204960 }, + { url = "https://files.pythonhosted.org/packages/02/68/736821e52ecfdeeb0f024b8ab01b5a229f6b9293bbdb444c27efade50b0f/jiter-0.12.0-cp314-cp314-win_arm64.whl", hash = "sha256:d72f3b5a432a4c546ea4bedc84cce0c3404874f1d1676260b9c7f048a9855451", size = 185529 }, + { url = "https://files.pythonhosted.org/packages/30/61/12ed8ee7a643cce29ac97c2281f9ce3956eb76b037e88d290f4ed0d41480/jiter-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e6ded41aeba3603f9728ed2b6196e4df875348ab97b28fc8afff115ed42ba7a7", size = 318974 }, + { url = "https://files.pythonhosted.org/packages/2d/c6/f3041ede6d0ed5e0e79ff0de4c8f14f401bbf196f2ef3971cdbe5fd08d1d/jiter-0.12.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a947920902420a6ada6ad51892082521978e9dd44a802663b001436e4b771684", size = 345932 }, + { url = "https://files.pythonhosted.org/packages/d5/5d/4d94835889edd01ad0e2dbfc05f7bdfaed46292e7b504a6ac7839aa00edb/jiter-0.12.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:add5e227e0554d3a52cf390a7635edaffdf4f8fce4fdbcef3cc2055bb396a30c", size = 367243 }, + { url = "https://files.pythonhosted.org/packages/fd/76/0051b0ac2816253a99d27baf3dda198663aff882fa6ea7deeb94046da24e/jiter-0.12.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f9b1cda8fcb736250d7e8711d4580ebf004a46771432be0ae4796944b5dfa5d", size = 479315 }, + { url = "https://files.pythonhosted.org/packages/70/ae/83f793acd68e5cb24e483f44f482a1a15601848b9b6f199dacb970098f77/jiter-0.12.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deeb12a2223fe0135c7ff1356a143d57f95bbf1f4a66584f1fc74df21d86b993", size = 380714 }, + { url = "https://files.pythonhosted.org/packages/b1/5e/4808a88338ad2c228b1126b93fcd8ba145e919e886fe910d578230dabe3b/jiter-0.12.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c596cc0f4cb574877550ce4ecd51f8037469146addd676d7c1a30ebe6391923f", size = 365168 }, + { url = "https://files.pythonhosted.org/packages/0c/d4/04619a9e8095b42aef436b5aeb4c0282b4ff1b27d1db1508df9f5dc82750/jiter-0.12.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ab4c823b216a4aeab3fdbf579c5843165756bd9ad87cc6b1c65919c4715f783", size = 387893 }, + { url = "https://files.pythonhosted.org/packages/17/ea/d3c7e62e4546fdc39197fa4a4315a563a89b95b6d54c0d25373842a59cbe/jiter-0.12.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e427eee51149edf962203ff8db75a7514ab89be5cb623fb9cea1f20b54f1107b", size = 520828 }, + { url = "https://files.pythonhosted.org/packages/cc/0b/c6d3562a03fd767e31cb119d9041ea7958c3c80cb3d753eafb19b3b18349/jiter-0.12.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:edb868841f84c111255ba5e80339d386d937ec1fdce419518ce1bd9370fac5b6", size = 511009 }, + { url = "https://files.pythonhosted.org/packages/aa/51/2cb4468b3448a8385ebcd15059d325c9ce67df4e2758d133ab9442b19834/jiter-0.12.0-cp314-cp314t-win32.whl", hash = "sha256:8bbcfe2791dfdb7c5e48baf646d37a6a3dcb5a97a032017741dea9f817dca183", size = 205110 }, + { url = "https://files.pythonhosted.org/packages/b2/c5/ae5ec83dec9c2d1af805fd5fe8f74ebded9c8670c5210ec7820ce0dbeb1e/jiter-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2fa940963bf02e1d8226027ef461e36af472dea85d36054ff835aeed944dd873", size = 205223 }, + { url = "https://files.pythonhosted.org/packages/97/9a/3c5391907277f0e55195550cf3fa8e293ae9ee0c00fb402fec1e38c0c82f/jiter-0.12.0-cp314-cp314t-win_arm64.whl", hash = "sha256:506c9708dd29b27288f9f8f1140c3cb0e3d8ddb045956d7757b1fa0e0f39a473", size = 185564 }, +] + +[[package]] +name = "nodeenv" +version = "1.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/24/bf/d1bda4f6168e0b2e9e5958945e01910052158313224ada5ce1fb2e1113b8/nodeenv-1.10.0.tar.gz", hash = "sha256:996c191ad80897d076bdfba80a41994c2b47c68e224c542b48feba42ba00f8bb", size = 55611 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438 }, +] + +[[package]] +name = "nvisy-dal" +version = "0.1.0" +source = { editable = "../nvisy-dal" } +dependencies = [ + { name = "pydantic" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiomysql", marker = "extra == 'mysql'", specifier = ">=0.2" }, + { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30" }, + { name = "azure-storage-blob", marker = "extra == 'azure'", specifier = ">=12.23" }, + { name = "boto3", marker = "extra == 's3'", specifier = ">=1.35" }, + { name = "google-cloud-storage", marker = "extra == 'gcs'", specifier = ">=2.18" }, + { name = "moto", marker = "extra == 'dev'", specifier = ">=5.0" }, + { name = "nvisy-dal", extras = ["all"], marker = "extra == 'dev'" }, + { name = "nvisy-dal", extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone"], marker = "extra == 'all'" }, + { name = "pinecone-client", marker = "extra == 'pinecone'", specifier = ">=5.0" }, + { name = "pydantic", specifier = ">=2.10" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24" }, + { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.12" }, + { name = "types-boto3", marker = "extra == 's3'" }, +] +provides-extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone", "all", "dev"] + +[package.metadata.requires-dev] +dev = [ + { name = "pyright", specifier = ">=1.1.408" }, + { name = "ruff", specifier = ">=0.14.14" }, +] + +[[package]] +name = "nvisy-rig" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "nvisy-dal" }, + { name = "pydantic" }, +] + +[package.optional-dependencies] +all = [ + { name = "anthropic" }, + { name = "cohere" }, + { name = "openai" }, +] +anthropic = [ + { name = "anthropic" }, +] +cohere = [ + { name = "cohere" }, +] +dev = [ + { name = "anthropic" }, + { name = "cohere" }, + { name = "openai" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, +] +openai = [ + { name = "openai" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pyright" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "anthropic", marker = "extra == 'anthropic'", specifier = ">=0.40" }, + { name = "cohere", marker = "extra == 'cohere'", specifier = ">=5.13" }, + { name = "nvisy-dal", editable = "../nvisy-dal" }, + { name = "nvisy-rig", extras = ["all"], marker = "extra == 'dev'" }, + { name = "nvisy-rig", extras = ["openai", "anthropic", "cohere"], marker = "extra == 'all'" }, + { name = "openai", marker = "extra == 'openai'", specifier = ">=1.60" }, + { name = "pydantic", specifier = ">=2.10" }, + { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, + { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24" }, +] +provides-extras = ["openai", "anthropic", "cohere", "all", "dev"] + +[package.metadata.requires-dev] +dev = [ + { name = "pyright", specifier = ">=1.1.408" }, + { name = "ruff", specifier = ">=0.14.14" }, +] + +[[package]] +name = "openai" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/f4/4690ecb5d70023ce6bfcfeabfe717020f654bde59a775058ec6ac4692463/openai-2.15.0.tar.gz", hash = "sha256:42eb8cbb407d84770633f31bf727d4ffb4138711c670565a41663d9439174fba", size = 627383 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879 }, +] + +[[package]] +name = "packaging" +version = "26.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366 }, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 }, +] + +[[package]] +name = "pydantic" +version = "2.12.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580 }, +] + +[[package]] +name = "pydantic-core" +version = "2.41.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990 }, + { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003 }, + { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200 }, + { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578 }, + { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504 }, + { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816 }, + { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366 }, + { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698 }, + { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603 }, + { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591 }, + { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068 }, + { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908 }, + { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145 }, + { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179 }, + { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403 }, + { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206 }, + { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307 }, + { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258 }, + { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917 }, + { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186 }, + { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164 }, + { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146 }, + { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788 }, + { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133 }, + { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852 }, + { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679 }, + { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766 }, + { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005 }, + { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622 }, + { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725 }, + { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040 }, + { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691 }, + { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897 }, + { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302 }, + { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877 }, + { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680 }, + { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960 }, + { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102 }, + { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039 }, + { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126 }, + { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489 }, + { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288 }, + { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255 }, + { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760 }, + { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092 }, + { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385 }, + { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832 }, + { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585 }, + { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078 }, + { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914 }, + { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560 }, + { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244 }, + { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955 }, + { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906 }, + { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607 }, + { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769 }, +] + +[[package]] +name = "pygments" +version = "2.19.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 }, +] + +[[package]] +name = "pyright" +version = "1.1.408" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nodeenv" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/74/b2/5db700e52554b8f025faa9c3c624c59f1f6c8841ba81ab97641b54322f16/pyright-1.1.408.tar.gz", hash = "sha256:f28f2321f96852fa50b5829ea492f6adb0e6954568d1caa3f3af3a5f555eb684", size = 4400578 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/82/a2c93e32800940d9573fb28c346772a14778b84ba7524e691b324620ab89/pyright-1.1.408-py3-none-any.whl", hash = "sha256:090b32865f4fdb1e0e6cd82bf5618480d48eecd2eb2e70f960982a3d9a4c17c1", size = 6399144 }, +] + +[[package]] +name = "pytest" +version = "9.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "iniconfig" }, + { name = "packaging" }, + { name = "pluggy" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801 }, +] + +[[package]] +name = "pytest-asyncio" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063 }, + { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973 }, + { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116 }, + { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011 }, + { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870 }, + { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089 }, + { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181 }, + { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658 }, + { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003 }, + { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344 }, + { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669 }, + { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252 }, + { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081 }, + { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159 }, + { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626 }, + { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613 }, + { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115 }, + { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427 }, + { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090 }, + { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246 }, + { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814 }, + { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809 }, + { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454 }, + { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355 }, + { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175 }, + { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228 }, + { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194 }, + { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429 }, + { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912 }, + { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108 }, + { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641 }, + { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901 }, + { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132 }, + { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261 }, + { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272 }, + { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923 }, + { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062 }, + { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341 }, +] + +[[package]] +name = "requests" +version = "2.32.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738 }, +] + +[[package]] +name = "ruff" +version = "0.14.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650 }, + { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245 }, + { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273 }, + { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753 }, + { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052 }, + { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637 }, + { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761 }, + { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701 }, + { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455 }, + { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882 }, + { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549 }, + { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416 }, + { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491 }, + { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525 }, + { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626 }, + { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442 }, + { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486 }, + { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448 }, +] + +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755 }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 }, +] + +[[package]] +name = "tokenizers" +version = "0.22.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/97/5dbfabf04c7e348e655e907ed27913e03db0923abb5dfdd120d7b25630e1/tokenizers-0.22.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:544dd704ae7238755d790de45ba8da072e9af3eea688f698b137915ae959281c", size = 3100275 }, + { url = "https://files.pythonhosted.org/packages/2e/47/174dca0502ef88b28f1c9e06b73ce33500eedfac7a7692108aec220464e7/tokenizers-0.22.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:1e418a55456beedca4621dbab65a318981467a2b188e982a23e117f115ce5001", size = 2981472 }, + { url = "https://files.pythonhosted.org/packages/d6/84/7990e799f1309a8b87af6b948f31edaa12a3ed22d11b352eaf4f4b2e5753/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2249487018adec45d6e3554c71d46eb39fa8ea67156c640f7513eb26f318cec7", size = 3290736 }, + { url = "https://files.pythonhosted.org/packages/78/59/09d0d9ba94dcd5f4f1368d4858d24546b4bdc0231c2354aa31d6199f0399/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25b85325d0815e86e0bac263506dd114578953b7b53d7de09a6485e4a160a7dd", size = 3168835 }, + { url = "https://files.pythonhosted.org/packages/47/50/b3ebb4243e7160bda8d34b731e54dd8ab8b133e50775872e7a434e524c28/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfb88f22a209ff7b40a576d5324bf8286b519d7358663db21d6246fb17eea2d5", size = 3521673 }, + { url = "https://files.pythonhosted.org/packages/e0/fa/89f4cb9e08df770b57adb96f8cbb7e22695a4cb6c2bd5f0c4f0ebcf33b66/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c774b1276f71e1ef716e5486f21e76333464f47bece56bbd554485982a9e03e", size = 3724818 }, + { url = "https://files.pythonhosted.org/packages/64/04/ca2363f0bfbe3b3d36e95bf67e56a4c88c8e3362b658e616d1ac185d47f2/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df6c4265b289083bf710dff49bc51ef252f9d5be33a45ee2bed151114a56207b", size = 3379195 }, + { url = "https://files.pythonhosted.org/packages/2e/76/932be4b50ef6ccedf9d3c6639b056a967a86258c6d9200643f01269211ca/tokenizers-0.22.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:369cc9fc8cc10cb24143873a0d95438bb8ee257bb80c71989e3ee290e8d72c67", size = 3274982 }, + { url = "https://files.pythonhosted.org/packages/1d/28/5f9f5a4cc211b69e89420980e483831bcc29dade307955cc9dc858a40f01/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:29c30b83d8dcd061078b05ae0cb94d3c710555fbb44861139f9f83dcca3dc3e4", size = 9478245 }, + { url = "https://files.pythonhosted.org/packages/6c/fb/66e2da4704d6aadebf8cb39f1d6d1957df667ab24cff2326b77cda0dcb85/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:37ae80a28c1d3265bb1f22464c856bd23c02a05bb211e56d0c5301a435be6c1a", size = 9560069 }, + { url = "https://files.pythonhosted.org/packages/16/04/fed398b05caa87ce9b1a1bb5166645e38196081b225059a6edaff6440fac/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:791135ee325f2336f498590eb2f11dc5c295232f288e75c99a36c5dbce63088a", size = 9899263 }, + { url = "https://files.pythonhosted.org/packages/05/a1/d62dfe7376beaaf1394917e0f8e93ee5f67fea8fcf4107501db35996586b/tokenizers-0.22.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38337540fbbddff8e999d59970f3c6f35a82de10053206a7562f1ea02d046fa5", size = 10033429 }, + { url = "https://files.pythonhosted.org/packages/fd/18/a545c4ea42af3df6effd7d13d250ba77a0a86fb20393143bbb9a92e434d4/tokenizers-0.22.2-cp39-abi3-win32.whl", hash = "sha256:a6bf3f88c554a2b653af81f3204491c818ae2ac6fbc09e76ef4773351292bc92", size = 2502363 }, + { url = "https://files.pythonhosted.org/packages/65/71/0670843133a43d43070abeb1949abfdef12a86d490bea9cd9e18e37c5ff7/tokenizers-0.22.2-cp39-abi3-win_amd64.whl", hash = "sha256:c9ea31edff2968b44a88f97d784c2f16dc0729b8b143ed004699ebca91f05c48", size = 2747786 }, + { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133 }, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, +] + +[[package]] +name = "typer-slim" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444 }, +] + +[[package]] +name = "types-requests" +version = "2.32.4.20260107" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/f3/a0663907082280664d745929205a89d41dffb29e89a50f753af7d57d0a96/types_requests-2.32.4.20260107.tar.gz", hash = "sha256:018a11ac158f801bfa84857ddec1650750e393df8a004a8a9ae2a9bec6fcb24f", size = 23165 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1c/12/709ea261f2bf91ef0a26a9eed20f2623227a8ed85610c1e54c5805692ecb/types_requests-2.32.4.20260107-py3-none-any.whl", hash = "sha256:b703fe72f8ce5b31ef031264fe9395cac8f46a04661a79f7ed31a80fb308730d", size = 20676 }, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614 }, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611 }, +] + +[[package]] +name = "urllib3" +version = "2.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584 }, +] From ca20700828986e63369991bbc9ddb20f99e3b7f9 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 26 Jan 2026 20:11:25 +0100 Subject: [PATCH 27/28] refactor(dal): restructure datatypes, contexts, and params - Rename Blob to Object for consistency with object storage terminology - Remove builder methods from all datatypes (Object, Document, Embedding, Record, Message, Graph, Node, Edge) - Consolidate context files into core/contexts.rs with AnyContext enum - Create core/params.rs with RelationalParams, ObjectParams, VectorParams - Add bucket to ObjectParams, collection to VectorParams - Move table from PostgresParams to RelationalParams - Consolidate input_stream.rs and output_stream.rs into streams.rs - Add Python generated types (datatypes.py, params.py, contexts.py) - Use JsonValue type alias instead of Any for type safety - Add PyO3 integration layer for Python provider implementations - Fix tests to use struct literal initialization --- .gitignore | 8 + Cargo.lock | 1704 ++--------------- Cargo.toml | 8 +- crates/nvisy-dal/Cargo.toml | 19 +- crates/nvisy-dal/src/core/contexts.rs | 75 + crates/nvisy-dal/src/core/input_stream.rs | 47 - crates/nvisy-dal/src/core/mod.rs | 17 +- crates/nvisy-dal/src/core/object_context.rs | 37 - crates/nvisy-dal/src/core/params.rs | 71 + .../nvisy-dal/src/core/relational_context.rs | 45 - .../src/core/{output_stream.rs => streams.rs} | 45 +- crates/nvisy-dal/src/core/vector_context.rs | 21 - crates/nvisy-dal/src/datatype/document.rs | 28 +- crates/nvisy-dal/src/datatype/embedding.rs | 30 +- crates/nvisy-dal/src/datatype/graph.rs | 82 +- crates/nvisy-dal/src/datatype/message.rs | 37 +- crates/nvisy-dal/src/datatype/mod.rs | 117 +- .../src/datatype/{blob.rs => object.rs} | 40 +- crates/nvisy-dal/src/datatype/record.rs | 44 +- crates/nvisy-dal/src/error.rs | 6 + crates/nvisy-dal/src/lib.rs | 14 +- .../nvisy-dal/src/provider/azblob/config.rs | 26 - crates/nvisy-dal/src/provider/azblob/input.rs | 58 - crates/nvisy-dal/src/provider/azblob/mod.rs | 56 - .../nvisy-dal/src/provider/azblob/output.rs | 23 - crates/nvisy-dal/src/provider/gcs/config.rs | 20 - crates/nvisy-dal/src/provider/gcs/input.rs | 58 - crates/nvisy-dal/src/provider/gcs/mod.rs | 48 - crates/nvisy-dal/src/provider/gcs/output.rs | 23 - crates/nvisy-dal/src/provider/mod.rs | 43 +- crates/nvisy-dal/src/provider/mysql/config.rs | 20 - crates/nvisy-dal/src/provider/mysql/input.rs | 64 - crates/nvisy-dal/src/provider/mysql/mod.rs | 48 - crates/nvisy-dal/src/provider/mysql/output.rs | 32 - .../nvisy-dal/src/provider/pgvector/config.rs | 60 - crates/nvisy-dal/src/provider/pgvector/mod.rs | 247 --- .../nvisy-dal/src/provider/pgvector/output.rs | 65 - crates/nvisy-dal/src/provider/pinecone.rs | 77 + .../nvisy-dal/src/provider/pinecone/config.rs | 23 - crates/nvisy-dal/src/provider/pinecone/mod.rs | 215 --- .../nvisy-dal/src/provider/pinecone/output.rs | 47 - crates/nvisy-dal/src/provider/postgres.rs | 109 ++ .../nvisy-dal/src/provider/postgres/config.rs | 20 - .../nvisy-dal/src/provider/postgres/input.rs | 64 - crates/nvisy-dal/src/provider/postgres/mod.rs | 48 - .../nvisy-dal/src/provider/postgres/output.rs | 32 - .../nvisy-dal/src/provider/qdrant/config.rs | 23 - crates/nvisy-dal/src/provider/qdrant/mod.rs | 257 --- .../nvisy-dal/src/provider/qdrant/output.rs | 51 - crates/nvisy-dal/src/provider/s3.rs | 100 + crates/nvisy-dal/src/provider/s3/config.rs | 27 - crates/nvisy-dal/src/provider/s3/input.rs | 58 - crates/nvisy-dal/src/provider/s3/mod.rs | 54 - crates/nvisy-dal/src/provider/s3/output.rs | 23 - crates/nvisy-dal/src/python/error.rs | 92 + crates/nvisy-dal/src/python/loader.rs | 235 +++ crates/nvisy-dal/src/python/mod.rs | 11 + crates/nvisy-dal/src/python/provider.rs | 192 ++ .../src/provider/splitting/metadata.rs | 56 +- crates/nvisy-rig/src/rag/indexer/indexed.rs | 14 +- crates/nvisy-rig/src/rag/searcher/mod.rs | 2 +- .../nvisy-rig/src/rag/searcher/retrieved.rs | 99 +- crates/nvisy-rig/src/rag/vector_store.rs | 16 +- crates/nvisy-runtime/src/definition/input.rs | 19 +- crates/nvisy-runtime/src/definition/mod.rs | 4 +- crates/nvisy-runtime/src/definition/output.rs | 19 +- .../src/definition/transform/derive.rs | 24 +- .../src/definition/transform/embedding.rs | 24 +- .../src/definition/transform/enrich.rs | 24 +- .../src/definition/transform/extract.rs | 24 +- crates/nvisy-runtime/src/engine/compiler.rs | 139 +- crates/nvisy-runtime/src/engine/context.rs | 33 +- .../nvisy-runtime/src/engine/credentials.rs | 101 + crates/nvisy-runtime/src/engine/executor.rs | 2 +- crates/nvisy-runtime/src/engine/mod.rs | 3 + .../nvisy-runtime/src/graph/input/stream.rs | 2 +- .../nvisy-runtime/src/graph/output/stream.rs | 2 +- .../src/graph/route/file_category.rs | 52 +- .../nvisy-runtime/src/graph/route/language.rs | 53 +- crates/nvisy-runtime/src/graph/route/mod.rs | 23 +- .../src/graph/transform/chunk.rs | 2 +- .../src/graph/transform/derive.rs | 2 +- .../src/graph/transform/embedding.rs | 2 +- .../src/graph/transform/enrich.rs | 2 +- .../src/graph/transform/extract.rs | 2 +- .../nvisy-runtime/src/graph/transform/mod.rs | 2 +- .../src/graph/transform/partition.rs | 2 +- crates/nvisy-runtime/src/lib.rs | 2 +- crates/nvisy-runtime/src/provider/ai.rs | 156 -- crates/nvisy-runtime/src/provider/inputs.rs | 233 --- crates/nvisy-runtime/src/provider/mod.rs | 96 - crates/nvisy-runtime/src/provider/outputs.rs | 300 --- crates/nvisy-runtime/src/provider/registry.rs | 58 - .../src/provider/runtime/config.rs | 79 - .../nvisy-runtime/src/provider/runtime/mod.rs | 14 - .../src/provider/runtime/service.rs | 79 - docs/PROVIDERS.md | 321 ++++ docs/README.md | 27 +- docs/VISION.md | 50 - packages/nvisy-dal/README.md | 151 ++ packages/nvisy-dal/pyproject.toml | 8 +- .../src/nvisy_dal/_generated/__init__.py | 13 - packages/nvisy-dal/src/nvisy_dal/errors.py | 4 +- .../src/nvisy_dal/generated/__init__.py | 48 + .../{_generated => generated}/contexts.py | 12 +- .../src/nvisy_dal/generated/datatypes.py | 77 + .../src/nvisy_dal/generated/params.py | 42 + .../src/nvisy_dal/providers/__init__.py | 19 +- .../src/nvisy_dal/providers/pinecone.py | 104 + .../src/nvisy_dal/providers/postgres.py | 178 ++ .../nvisy-dal/src/nvisy_dal/providers/s3.py | 242 +++ packages/nvisy-dal/src/nvisy_dal/py.typed | 0 packages/nvisy-dal/uv.lock | 157 +- packages/nvisy-rig/README.md | 72 + .../{_generated => generated}/__init__.py | 0 115 files changed, 3050 insertions(+), 5385 deletions(-) create mode 100644 crates/nvisy-dal/src/core/contexts.rs delete mode 100644 crates/nvisy-dal/src/core/input_stream.rs delete mode 100644 crates/nvisy-dal/src/core/object_context.rs create mode 100644 crates/nvisy-dal/src/core/params.rs delete mode 100644 crates/nvisy-dal/src/core/relational_context.rs rename crates/nvisy-dal/src/core/{output_stream.rs => streams.rs} (56%) delete mode 100644 crates/nvisy-dal/src/core/vector_context.rs rename crates/nvisy-dal/src/datatype/{blob.rs => object.rs} (50%) delete mode 100644 crates/nvisy-dal/src/provider/azblob/config.rs delete mode 100644 crates/nvisy-dal/src/provider/azblob/input.rs delete mode 100644 crates/nvisy-dal/src/provider/azblob/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/azblob/output.rs delete mode 100644 crates/nvisy-dal/src/provider/gcs/config.rs delete mode 100644 crates/nvisy-dal/src/provider/gcs/input.rs delete mode 100644 crates/nvisy-dal/src/provider/gcs/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/gcs/output.rs delete mode 100644 crates/nvisy-dal/src/provider/mysql/config.rs delete mode 100644 crates/nvisy-dal/src/provider/mysql/input.rs delete mode 100644 crates/nvisy-dal/src/provider/mysql/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/mysql/output.rs delete mode 100644 crates/nvisy-dal/src/provider/pgvector/config.rs delete mode 100644 crates/nvisy-dal/src/provider/pgvector/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/pgvector/output.rs create mode 100644 crates/nvisy-dal/src/provider/pinecone.rs delete mode 100644 crates/nvisy-dal/src/provider/pinecone/config.rs delete mode 100644 crates/nvisy-dal/src/provider/pinecone/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/pinecone/output.rs create mode 100644 crates/nvisy-dal/src/provider/postgres.rs delete mode 100644 crates/nvisy-dal/src/provider/postgres/config.rs delete mode 100644 crates/nvisy-dal/src/provider/postgres/input.rs delete mode 100644 crates/nvisy-dal/src/provider/postgres/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/postgres/output.rs delete mode 100644 crates/nvisy-dal/src/provider/qdrant/config.rs delete mode 100644 crates/nvisy-dal/src/provider/qdrant/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/qdrant/output.rs create mode 100644 crates/nvisy-dal/src/provider/s3.rs delete mode 100644 crates/nvisy-dal/src/provider/s3/config.rs delete mode 100644 crates/nvisy-dal/src/provider/s3/input.rs delete mode 100644 crates/nvisy-dal/src/provider/s3/mod.rs delete mode 100644 crates/nvisy-dal/src/provider/s3/output.rs create mode 100644 crates/nvisy-dal/src/python/error.rs create mode 100644 crates/nvisy-dal/src/python/loader.rs create mode 100644 crates/nvisy-dal/src/python/mod.rs create mode 100644 crates/nvisy-dal/src/python/provider.rs create mode 100644 crates/nvisy-runtime/src/engine/credentials.rs delete mode 100644 crates/nvisy-runtime/src/provider/ai.rs delete mode 100644 crates/nvisy-runtime/src/provider/inputs.rs delete mode 100644 crates/nvisy-runtime/src/provider/mod.rs delete mode 100644 crates/nvisy-runtime/src/provider/outputs.rs delete mode 100644 crates/nvisy-runtime/src/provider/registry.rs delete mode 100644 crates/nvisy-runtime/src/provider/runtime/config.rs delete mode 100644 crates/nvisy-runtime/src/provider/runtime/mod.rs delete mode 100644 crates/nvisy-runtime/src/provider/runtime/service.rs create mode 100644 docs/PROVIDERS.md delete mode 100644 docs/VISION.md create mode 100644 packages/nvisy-dal/README.md delete mode 100644 packages/nvisy-dal/src/nvisy_dal/_generated/__init__.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/generated/__init__.py rename packages/nvisy-dal/src/nvisy_dal/{_generated => generated}/contexts.py (62%) create mode 100644 packages/nvisy-dal/src/nvisy_dal/generated/datatypes.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/generated/params.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/providers/pinecone.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/providers/postgres.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/providers/s3.py create mode 100644 packages/nvisy-dal/src/nvisy_dal/py.typed create mode 100644 packages/nvisy-rig/README.md rename packages/nvisy-rig/src/nvisy_rig/{_generated => generated}/__init__.py (100%) diff --git a/.gitignore b/.gitignore index 412cca2..58dde47 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,14 @@ target/ **/*.rs.bk *.pdb +# Python +__pycache__/ +*.py[cod] +.venv/ +*.egg-info/ +.ruff_cache/ +.pytest_cache/ + # Generated files *.pem *.backup diff --git a/Cargo.lock b/Cargo.lock index 92b3ce0..8c36be0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,12 +58,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6966317188cdfe54c58c0900a195d021294afb3ece9b7073d09e4018dbb1e3a2" dependencies = [ "aide-macros", - "axum 0.8.8", + "axum", "axum-extra 0.10.3", "bytes", "cfg-if", - "http 1.4.0", - "indexmap 2.13.0", + "http", + "indexmap", "schemars 0.9.0", "serde", "serde_json", @@ -101,12 +101,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -220,7 +214,7 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86dde77d8a733a9dbaf865a9eb65c72e09c88f3d14d3dd0d2aecf511920ee4fe" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-util", "memchr", @@ -232,7 +226,7 @@ dependencies = [ "rand 0.8.5", "regex", "ring", - "rustls-native-certs 0.7.3", + "rustls-native-certs", "rustls-pemfile", "rustls-webpki 0.102.8", "serde", @@ -242,7 +236,7 @@ dependencies = [ "thiserror 1.0.69", "time", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tokio-stream", "tokio-util", "tokio-websockets", @@ -284,15 +278,6 @@ dependencies = [ "syn", ] -[[package]] -name = "atoi" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] - [[package]] name = "atomic-waker" version = "1.1.2" @@ -340,79 +325,24 @@ dependencies = [ "fs_extra", ] -[[package]] -name = "axum" -version = "0.6.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" -dependencies = [ - "async-trait", - "axum-core 0.3.4", - "bitflags 1.3.2", - "bytes", - "futures-util", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.32", - "itoa", - "matchit 0.7.3", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper 0.1.2", - "tower 0.4.13", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core 0.4.5", - "bytes", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "itoa", - "matchit 0.7.3", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper 1.0.2", - "tower 0.5.3", - "tower-layer", - "tower-service", -] - [[package]] name = "axum" version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8" dependencies = [ - "axum-core 0.5.6", + "axum-core", "axum-macros", "bytes", "form_urlencoded", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-util", "itoa", - "matchit 0.8.4", + "matchit", "memchr", "mime", "multer", @@ -422,9 +352,9 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_urlencoded", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", - "tower 0.5.3", + "tower", "tower-layer", "tower-service", "tracing", @@ -436,48 +366,11 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f08a543641554404b42acd0d2494df12ca2be034d7b8ee4dbbf7446f940a2ef" dependencies = [ - "axum 0.8.8", + "axum", "client-ip", "serde", ] -[[package]] -name = "axum-core" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http 0.2.12", - "http-body 0.4.6", - "mime", - "rustversion", - "tower-layer", - "tower-service", -] - -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper 1.0.2", - "tower-layer", - "tower-service", -] - [[package]] name = "axum-core" version = "0.5.6" @@ -486,12 +379,12 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" dependencies = [ "bytes", "futures-core", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "mime", "pin-project-lite", - "sync_wrapper 1.0.2", + "sync_wrapper", "tower-layer", "tower-service", "tracing", @@ -503,12 +396,12 @@ version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9963ff19f40c6102c76756ef0a46004c0d58957d87259fc9208ff8441c12ab96" dependencies = [ - "axum 0.8.8", - "axum-core 0.5.6", + "axum", + "axum-core", "bytes", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "mime", "pin-project-lite", @@ -525,15 +418,15 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fef252edff26ddba56bbcdf2ee3307b8129acb86f5749b68990c168a6fcc9c76" dependencies = [ - "axum 0.8.8", - "axum-core 0.5.6", + "axum", + "axum-core", "bytes", "form_urlencoded", "futures-core", "futures-util", "headers", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "mime", "pin-project-lite", @@ -565,16 +458,16 @@ dependencies = [ "arc-swap", "bytes", "fs-err", - "http 1.4.0", - "http-body 1.0.1", - "hyper 1.8.1", + "http", + "http-body", + "hyper", "hyper-util", "pin-project-lite", - "rustls 0.23.36", + "rustls", "rustls-pemfile", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tower-service", ] @@ -585,14 +478,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce2a8627e8d8851f894696b39f2b67807d6375c177361d376173ace306a21e2" dependencies = [ "anyhow", - "axum 0.8.8", + "axum", "bytes", "bytesize", "cookie", "expect-json", - "http 1.4.0", + "http", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-util", "mime", "pretty_assertions", @@ -603,21 +496,10 @@ dependencies = [ "serde_urlencoded", "smallvec", "tokio", - "tower 0.5.3", + "tower", "url", ] -[[package]] -name = "backon" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" -dependencies = [ - "fastrand", - "gloo-timers", - "tokio", -] - [[package]] name = "backtrace" version = "0.3.76" @@ -633,12 +515,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -695,20 +571,11 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" -dependencies = [ - "serde_core", -] [[package]] name = "blake2" @@ -737,15 +604,6 @@ dependencies = [ "hybrid-array", ] -[[package]] -name = "block-padding" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" -dependencies = [ - "generic-array", -] - [[package]] name = "borrow-or-share" version = "0.2.4" @@ -815,15 +673,6 @@ dependencies = [ "libbz2-rs-sys", ] -[[package]] -name = "cbc" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" -dependencies = [ - "cipher", -] - [[package]] name = "cc" version = "1.2.53" @@ -928,7 +777,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31211fc26899744f5b22521fdc971e5f3875991d8880537537470685a0e9552d" dependencies = [ - "http 1.4.0", + "http", ] [[package]] @@ -966,41 +815,12 @@ version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" -[[package]] -name = "concurrent-queue" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "const-oid" version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" -[[package]] -name = "const-random" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" -dependencies = [ - "getrandom 0.2.17", - "once_cell", - "tiny-keccak", -] - [[package]] name = "constant_time_eq" version = "0.3.1" @@ -1036,16 +856,6 @@ dependencies = [ "libc", ] -[[package]] -name = "core-foundation" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -1085,15 +895,6 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" -[[package]] -name = "crc32c" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" -dependencies = [ - "rustc_version", -] - [[package]] name = "crc32fast" version = "1.5.0" @@ -1103,27 +904,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - [[package]] name = "crypto-common" version = "0.1.7" @@ -1386,7 +1166,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e130c806dccc85428c564f2dc5a96e05b6615a27c9a28776bd7761a9af4bb552" dependencies = [ "bigdecimal", - "bitflags 2.10.0", + "bitflags", "byteorder", "diesel_derives", "downcast-rs", @@ -1474,7 +1254,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer 0.10.4", - "const-oid", "crypto-common 0.1.7", "subtle", ] @@ -1501,15 +1280,6 @@ dependencies = [ "syn", ] -[[package]] -name = "dlv-list" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "442039f5147480ba31067cb00ada1adae6892028e40e45fc5de7b7df6dcc1b5f" -dependencies = [ - "const-random", -] - [[package]] name = "dotenvy" version = "0.15.7" @@ -1575,9 +1345,6 @@ name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" -dependencies = [ - "serde", -] [[package]] name = "email_address" @@ -1624,28 +1391,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "etcetera" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" -dependencies = [ - "cfg-if", - "home", - "windows-sys 0.48.0", -] - -[[package]] -name = "event-listener" -version = "5.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] - [[package]] name = "eventsource-stream" version = "0.2.3" @@ -1743,12 +1488,6 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - [[package]] name = "fixedbitset" version = "0.5.7" @@ -1777,17 +1516,6 @@ dependencies = [ "serde", ] -[[package]] -name = "flume" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" -dependencies = [ - "futures-core", - "futures-sink", - "spin", -] - [[package]] name = "fnv" version = "1.0.7" @@ -1800,21 +1528,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "form_urlencoded" version = "1.2.2" @@ -1892,17 +1605,6 @@ dependencies = [ "futures-util", ] -[[package]] -name = "futures-intrusive" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" -dependencies = [ - "futures-core", - "lock_api", - "parking_lot", -] - [[package]] name = "futures-io" version = "0.3.31" @@ -2033,37 +1735,6 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" -[[package]] -name = "gloo-timers" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" -dependencies = [ - "futures-channel", - "futures-core", - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "h2" -version = "0.3.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http 0.2.12", - "indexmap 2.13.0", - "slab", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "h2" version = "0.4.13" @@ -2075,34 +1746,20 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.4.0", - "indexmap 2.13.0", + "http", + "indexmap", "slab", "tokio", "tokio-util", "tracing", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" - [[package]] name = "hashbrown" version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", "foldhash", ] @@ -2112,15 +1769,6 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" -[[package]] -name = "hashlink" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" -dependencies = [ - "hashbrown 0.15.5", -] - [[package]] name = "hdrhistogram" version = "7.5.4" @@ -2137,10 +1785,10 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3314d5adb5d94bcdf56771f2e50dbbc80bb4bdf88967526706205ac9eff24eb" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "headers-core", - "http 1.4.0", + "http", "httpdate", "mime", "sha1", @@ -2152,7 +1800,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" dependencies = [ - "http 1.4.0", + "http", ] [[package]] @@ -2190,43 +1838,14 @@ dependencies = [ ] [[package]] -name = "hkdf" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" -dependencies = [ - "hmac", -] - -[[package]] -name = "hmac" -version = "0.12.1" +name = "hmac" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" dependencies = [ "digest 0.10.7", ] -[[package]] -name = "home" -version = "0.5.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "http" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - [[package]] name = "http" version = "1.4.0" @@ -2237,17 +1856,6 @@ dependencies = [ "itoa", ] -[[package]] -name = "http-body" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" -dependencies = [ - "bytes", - "http 0.2.12", - "pin-project-lite", -] - [[package]] name = "http-body" version = "1.0.1" @@ -2255,7 +1863,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.4.0", + "http", ] [[package]] @@ -2266,8 +1874,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "pin-project-lite", ] @@ -2298,30 +1906,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "hyper" -version = "0.14.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2 0.5.10", - "tokio", - "tower-service", - "tracing", - "want", -] - [[package]] name = "hyper" version = "1.8.1" @@ -2332,9 +1916,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2 0.4.13", - "http 1.4.0", - "http-body 1.0.1", + "h2", + "http", + "http-body", "httparse", "httpdate", "itoa", @@ -2351,77 +1935,36 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http 1.4.0", - "hyper 1.8.1", + "http", + "hyper", "hyper-util", - "rustls 0.23.36", + "rustls", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tower-service", "webpki-roots 1.0.5", ] -[[package]] -name = "hyper-timeout" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" -dependencies = [ - "hyper 0.14.32", - "pin-project-lite", - "tokio", - "tokio-io-timeout", -] - -[[package]] -name = "hyper-timeout" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" -dependencies = [ - "hyper 1.8.1", - "hyper-util", - "pin-project-lite", - "tokio", - "tower-service", -] - -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper 1.8.1", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-channel", "futures-core", "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "hyper 1.8.1", + "http", + "http-body", + "hyper", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2", "system-configuration", "tokio", "tower-service", @@ -2607,16 +2150,6 @@ dependencies = [ "icu_properties", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.13.0" @@ -2629,13 +2162,21 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + [[package]] name = "inout" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ - "block-padding", "generic-array", ] @@ -2673,15 +2214,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -2784,7 +2316,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "161c33c3ec738cfea3288c5c53dfcdb32fd4fc2954de86ea06f71b5a1a40bfcd" dependencies = [ "ahash", - "base64 0.22.1", + "base64", "bytecount", "email_address", "fancy-regex 0.14.0", @@ -2802,21 +2334,6 @@ dependencies = [ "uuid-simd", ] -[[package]] -name = "jsonwebtoken" -version = "9.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" -dependencies = [ - "base64 0.22.1", - "js-sys", - "pem", - "ring", - "serde", - "serde_json", - "simple_asn1", -] - [[package]] name = "jsonwebtoken" version = "10.2.0" @@ -2824,7 +2341,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c76e1c7d7df3e34443b3621b459b066a7b79644f059fc8b2db7070c825fd417e" dependencies = [ "aws-lc-rs", - "base64 0.22.1", + "base64", "getrandom 0.2.17", "js-sys", "pem", @@ -2839,9 +2356,6 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -dependencies = [ - "spin", -] [[package]] name = "libbz2-rs-sys" @@ -2867,21 +2381,11 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d0b95e02c851351f877147b7deea7b1afb1df71b63aa5f8270716e0c5720616" dependencies = [ - "bitflags 2.10.0", + "bitflags", "libc", "redox_syscall 0.7.0", ] -[[package]] -name = "libsqlite3-sys" -version = "0.30.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" -dependencies = [ - "pkg-config", - "vcpkg", -] - [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -2967,12 +2471,6 @@ dependencies = [ "regex-automata", ] -[[package]] -name = "matchit" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" - [[package]] name = "matchit" version = "0.8.4" @@ -2995,6 +2493,15 @@ version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "migrations_internals" version = "2.3.0" @@ -3068,7 +2575,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http 1.4.0", + "http", "httparse", "memchr", "mime", @@ -3076,29 +2583,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "multimap" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" - -[[package]] -name = "native-tls" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe 0.1.6", - "openssl-sys", - "schannel", - "security-framework 2.11.1", - "security-framework-sys", - "tempfile", -] - [[package]] name = "nkeys" version = "0.4.5" @@ -3166,22 +2650,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-bigint-dig" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" -dependencies = [ - "lazy_static", - "libm", - "num-integer", - "num-iter", - "num-traits", - "rand 0.8.5", - "smallvec", - "zeroize", -] - [[package]] name = "num-cmp" version = "0.1.0" @@ -3241,7 +2709,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", - "libm", ] [[package]] @@ -3259,7 +2726,7 @@ name = "nvisy-cli" version = "0.1.0" dependencies = [ "anyhow", - "axum 0.8.8", + "axum", "axum-server", "clap", "dotenvy", @@ -3293,20 +2760,15 @@ dependencies = [ name = "nvisy-dal" version = "0.1.0" dependencies = [ + "async-stream", "async-trait", "bytes", - "deadpool", "derive_more", - "diesel", - "diesel-async", "futures", "jiff", "nvisy-core", - "opendal", - "pgvector", - "pinecone-sdk", - "prost-types 0.12.6", - "qdrant-client", + "pyo3", + "pyo3-async-runtimes", "serde", "serde_json", "thiserror 2.0.18", @@ -3319,7 +2781,7 @@ name = "nvisy-nats" version = "0.1.0" dependencies = [ "async-nats", - "base64 0.22.1", + "base64", "clap", "derive_more", "futures", @@ -3340,7 +2802,7 @@ dependencies = [ name = "nvisy-postgres" version = "0.1.0" dependencies = [ - "base64 0.22.1", + "base64", "bigdecimal", "clap", "deadpool", @@ -3436,7 +2898,7 @@ version = "0.1.0" source = "git+https://github.com/nvisycom/runtime.git?branch=feature%2Fprerelease#1962b90589342d4e9becb14402ef1ee4f95a19cc" dependencies = [ "async-trait", - "base64 0.22.1", + "base64", "bytes", "derive_more", "jiff", @@ -3528,7 +2990,7 @@ dependencies = [ "nvisy-rig", "nvisy-rt-core", "nvisy-rt-engine", - "petgraph 0.8.3", + "petgraph", "semver", "serde", "serde_json", @@ -3547,11 +3009,11 @@ dependencies = [ "anyhow", "argon2", "async-trait", - "axum 0.8.8", + "axum", "axum-client-ip", "axum-extra 0.12.5", "axum-test", - "base64 0.22.1", + "base64", "bigdecimal", "clap", "derive_more", @@ -3559,7 +3021,7 @@ dependencies = [ "futures", "ipnet", "jiff", - "jsonwebtoken 10.2.0", + "jsonwebtoken", "nvisy-nats", "nvisy-postgres", "nvisy-runtime", @@ -3576,7 +3038,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "tower 0.5.3", + "tower", "tower-http", "tracing", "tracing-subscriber", @@ -3630,87 +3092,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" -[[package]] -name = "opendal" -version = "0.53.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f947c4efbca344c1a125753366033c8107f552b2e3f8251815ed1908f116ca3e" -dependencies = [ - "anyhow", - "async-trait", - "backon", - "base64 0.22.1", - "bytes", - "chrono", - "crc32c", - "futures", - "getrandom 0.2.17", - "http 1.4.0", - "http-body 1.0.1", - "log", - "md-5", - "percent-encoding", - "quick-xml", - "reqsign", - "reqwest", - "serde", - "serde_json", - "sha2", - "sqlx", - "tokio", - "uuid", -] - -[[package]] -name = "openssl" -version = "0.10.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" -dependencies = [ - "bitflags 2.10.0", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "openssl-probe" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" -[[package]] -name = "openssl-probe" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" - -[[package]] -name = "openssl-sys" -version = "0.9.111" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "ordered-float" version = "5.1.0" @@ -3720,28 +3107,12 @@ dependencies = [ "num-traits", ] -[[package]] -name = "ordered-multimap" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49203cdcae0030493bad186b28da2fa25645fa276a51b6fec8010d281e02ef79" -dependencies = [ - "dlv-list", - "hashbrown 0.14.5", -] - [[package]] name = "outref" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" -[[package]] -name = "parking" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" - [[package]] name = "parking_lot" version = "0.12.5" @@ -3792,7 +3163,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64 0.22.1", + "base64", "serde_core", ] @@ -3811,25 +3182,15 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" -[[package]] -name = "petgraph" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset 0.4.2", - "indexmap 2.13.0", -] - [[package]] name = "petgraph" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ - "fixedbitset 0.5.7", + "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.13.0", + "indexmap", "serde", "serde_derive", ] @@ -3916,64 +3277,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] -name = "pinecone-sdk" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f571fcb86d81e70a5de2817a029fa9e52160f66f10d662584b56607ae6c5dab9" -dependencies = [ - "anyhow", - "once_cell", - "prost 0.12.6", - "prost-types 0.12.6", - "rand 0.8.5", - "regex", - "reqwest", - "serde", - "serde_json", - "snafu", - "thiserror 1.0.69", - "tokio", - "tonic 0.11.0", - "tonic-build", - "url", - "uuid", -] - -[[package]] -name = "pkcs1" -version = "0.7.5" +name = "pkcs8" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" dependencies = [ "der", - "pkcs8", - "spki", -] - -[[package]] -name = "pkcs5" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" -dependencies = [ - "aes", - "cbc", - "der", - "pbkdf2", - "scrypt", - "sha2", - "spki", -] - -[[package]] -name = "pkcs8" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" -dependencies = [ - "der", - "pkcs5", - "rand_core 0.6.4", "spki", ] @@ -4004,7 +3313,7 @@ version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ee9dd5fe15055d2b6806f4736aa0c9637217074e224bbec46d4041b91bb9491" dependencies = [ - "base64 0.22.1", + "base64", "byteorder", "bytes", "fallible-iterator", @@ -4080,16 +3389,6 @@ dependencies = [ "yansi", ] -[[package]] -name = "prettyplease" -version = "0.2.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" -dependencies = [ - "proc-macro2", - "syn", -] - [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -4122,120 +3421,77 @@ dependencies = [ ] [[package]] -name = "prost" -version = "0.12.6" +name = "pyo3" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d" dependencies = [ - "bytes", - "prost-derive 0.12.6", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", ] [[package]] -name = "prost" -version = "0.13.5" +name = "pyo3-async-runtimes" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "57ddb5b570751e93cc6777e81fee8087e59cd53b5043292f2a6d59d5bd80fdfd" dependencies = [ - "bytes", - "prost-derive 0.13.5", + "futures", + "once_cell", + "pin-project-lite", + "pyo3", + "tokio", ] [[package]] -name = "prost-build" -version = "0.12.6" +name = "pyo3-build-config" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +checksum = "b455933107de8642b4487ed26d912c2d899dec6114884214a0b3bb3be9261ea6" dependencies = [ - "bytes", - "heck 0.5.0", - "itertools 0.12.1", - "log", - "multimap", - "once_cell", - "petgraph 0.6.5", - "prettyplease", - "prost 0.12.6", - "prost-types 0.12.6", - "regex", - "syn", - "tempfile", + "target-lexicon", ] [[package]] -name = "prost-derive" -version = "0.12.6" +name = "pyo3-ffi" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +checksum = "1c85c9cbfaddf651b1221594209aed57e9e5cff63c4d11d1feead529b872a089" dependencies = [ - "anyhow", - "itertools 0.12.1", - "proc-macro2", - "quote", - "syn", + "libc", + "pyo3-build-config", ] [[package]] -name = "prost-derive" -version = "0.13.5" +name = "pyo3-macros" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "0a5b10c9bf9888125d917fb4d2ca2d25c8df94c7ab5a52e13313a07e050a3b02" dependencies = [ - "anyhow", - "itertools 0.14.0", "proc-macro2", + "pyo3-macros-backend", "quote", "syn", ] [[package]] -name = "prost-types" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" -dependencies = [ - "prost 0.12.6", -] - -[[package]] -name = "prost-types" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" -dependencies = [ - "prost 0.13.5", -] - -[[package]] -name = "qdrant-client" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a76499f3e8385dae785d65a0216e0dfa8fadaddd18038adf04f438631683b26a" -dependencies = [ - "anyhow", - "derive_builder", - "futures", - "futures-util", - "parking_lot", - "prost 0.13.5", - "prost-types 0.13.5", - "reqwest", - "semver", - "serde", - "serde_json", - "thiserror 1.0.69", - "tokio", - "tonic 0.12.3", -] - -[[package]] -name = "quick-xml" -version = "0.37.5" +name = "pyo3-macros-backend" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +checksum = "03b51720d314836e53327f5871d4c0cfb4fb37cc2c4a11cc71907a86342c40f9" dependencies = [ - "memchr", - "serde", + "heck 0.5.0", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", ] [[package]] @@ -4250,8 +3506,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.36", - "socket2 0.6.1", + "rustls", + "socket2", "thiserror 2.0.18", "tokio", "tracing", @@ -4270,7 +3526,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls 0.23.36", + "rustls", "rustls-pki-types", "slab", "thiserror 2.0.18", @@ -4288,7 +3544,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2", "tracing", "windows-sys 0.60.2", ] @@ -4391,7 +3647,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags", ] [[package]] @@ -4400,7 +3656,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f3fe0889e69e2ae9e41f4d6c4c0181701d00e4697b356fb1f74173a5e0ee27" dependencies = [ - "bitflags 2.10.0", + "bitflags", ] [[package]] @@ -4466,76 +3722,42 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" -[[package]] -name = "reqsign" -version = "0.16.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" -dependencies = [ - "anyhow", - "async-trait", - "base64 0.22.1", - "chrono", - "form_urlencoded", - "getrandom 0.2.17", - "hex", - "hmac", - "home", - "http 1.4.0", - "jsonwebtoken 9.3.1", - "log", - "percent-encoding", - "quick-xml", - "rand 0.8.5", - "reqwest", - "rsa", - "rust-ini", - "serde", - "serde_json", - "sha1", - "sha2", - "tokio", -] - [[package]] name = "reqwest" version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "encoding_rs", "futures-channel", "futures-core", "futures-util", - "h2 0.4.13", - "http 1.4.0", - "http-body 1.0.1", + "h2", + "http", + "http-body", "http-body-util", - "hyper 1.8.1", + "hyper", "hyper-rustls", - "hyper-tls", "hyper-util", "js-sys", "log", "mime", "mime_guess", - "native-tls", "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.36", + "rustls", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", - "tokio-native-tls", - "tokio-rustls 0.26.4", + "tokio-rustls", "tokio-util", - "tower 0.5.3", + "tower", "tower-http", "tower-service", "url", @@ -4563,14 +3785,14 @@ checksum = "7207790134ee24d87ac3d022c308e1a7c871219d139acf70d13be76c1f6919c5" dependencies = [ "as-any", "async-stream", - "base64 0.22.1", + "base64", "bytes", "eventsource-stream", "fastrand", "futures", "futures-timer", "glob", - "http 1.4.0", + "http", "mime", "mime_guess", "ordered-float", @@ -4600,37 +3822,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rsa" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" -dependencies = [ - "const-oid", - "digest 0.10.7", - "num-bigint-dig", - "num-integer", - "num-traits", - "pkcs1", - "pkcs8", - "rand_core 0.6.4", - "sha2", - "signature", - "spki", - "subtle", - "zeroize", -] - -[[package]] -name = "rust-ini" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" -dependencies = [ - "cfg-if", - "ordered-multimap", -] - [[package]] name = "rust-multipart-rfc7578_2" version = "0.8.0" @@ -4640,7 +3831,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http 1.4.0", + "http", "mime", "rand 0.9.2", "thiserror 2.0.18", @@ -4673,27 +3864,13 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags 2.10.0", + "bitflags", "errno", "libc", "linux-raw-sys", "windows-sys 0.61.2", ] -[[package]] -name = "rustls" -version = "0.22.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" -dependencies = [ - "log", - "ring", - "rustls-pki-types", - "rustls-webpki 0.102.8", - "subtle", - "zeroize", -] - [[package]] name = "rustls" version = "0.23.36" @@ -4701,7 +3878,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "aws-lc-rs", - "log", "once_cell", "ring", "rustls-pki-types", @@ -4716,23 +3892,11 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" dependencies = [ - "openssl-probe 0.1.6", + "openssl-probe", "rustls-pemfile", "rustls-pki-types", "schannel", - "security-framework 2.11.1", -] - -[[package]] -name = "rustls-native-certs" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" -dependencies = [ - "openssl-probe 0.2.0", - "rustls-pki-types", - "schannel", - "security-framework 3.5.1", + "security-framework", ] [[package]] @@ -4760,7 +3924,6 @@ version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ - "ring", "rustls-pki-types", "untrusted 0.9.0", ] @@ -4789,15 +3952,6 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" -[[package]] -name = "salsa20" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" -dependencies = [ - "cipher", -] - [[package]] name = "schannel" version = "0.1.28" @@ -4814,7 +3968,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" dependencies = [ "dyn-clone", - "indexmap 2.13.0", + "indexmap", "jiff", "ref-cast", "schemars_derive 0.9.0", @@ -4881,38 +4035,14 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "scrypt" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" -dependencies = [ - "pbkdf2", - "salsa20", - "sha2", -] - [[package]] name = "security-framework" version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.9.4", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework" -version = "3.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" -dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.10.1", + "bitflags", + "core-foundation", "core-foundation-sys", "libc", "security-framework-sys", @@ -4986,7 +4116,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2f2d7ff8a2140333718bb329f5c40fc5f0865b84c426183ce14c97d2ab8154f" dependencies = [ "form_urlencoded", - "indexmap 2.13.0", + "indexmap", "itoa", "ryu", "serde_core", @@ -5031,7 +4161,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b417bedc008acbdf6d6b4bc482d29859924114bbe2650b7921fb68a261d0aa6" dependencies = [ - "axum 0.8.8", + "axum", "futures", "percent-encoding", "serde", @@ -5174,40 +4304,6 @@ name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" -dependencies = [ - "serde", -] - -[[package]] -name = "snafu" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" -dependencies = [ - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "socket2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] [[package]] name = "socket2" @@ -5224,9 +4320,6 @@ name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] [[package]] name = "spki" @@ -5238,196 +4331,6 @@ dependencies = [ "der", ] -[[package]] -name = "sqlx" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" -dependencies = [ - "sqlx-core", - "sqlx-macros", - "sqlx-mysql", - "sqlx-postgres", - "sqlx-sqlite", -] - -[[package]] -name = "sqlx-core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" -dependencies = [ - "base64 0.22.1", - "bytes", - "crc", - "crossbeam-queue", - "either", - "event-listener", - "futures-core", - "futures-intrusive", - "futures-io", - "futures-util", - "hashbrown 0.15.5", - "hashlink", - "indexmap 2.13.0", - "log", - "memchr", - "once_cell", - "percent-encoding", - "rustls 0.23.36", - "serde", - "serde_json", - "sha2", - "smallvec", - "thiserror 2.0.18", - "tokio", - "tokio-stream", - "tracing", - "url", - "webpki-roots 0.26.11", -] - -[[package]] -name = "sqlx-macros" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" -dependencies = [ - "proc-macro2", - "quote", - "sqlx-core", - "sqlx-macros-core", - "syn", -] - -[[package]] -name = "sqlx-macros-core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" -dependencies = [ - "dotenvy", - "either", - "heck 0.5.0", - "hex", - "once_cell", - "proc-macro2", - "quote", - "serde", - "serde_json", - "sha2", - "sqlx-core", - "sqlx-mysql", - "sqlx-postgres", - "sqlx-sqlite", - "syn", - "tokio", - "url", -] - -[[package]] -name = "sqlx-mysql" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" -dependencies = [ - "atoi", - "base64 0.22.1", - "bitflags 2.10.0", - "byteorder", - "bytes", - "crc", - "digest 0.10.7", - "dotenvy", - "either", - "futures-channel", - "futures-core", - "futures-io", - "futures-util", - "generic-array", - "hex", - "hkdf", - "hmac", - "itoa", - "log", - "md-5", - "memchr", - "once_cell", - "percent-encoding", - "rand 0.8.5", - "rsa", - "serde", - "sha1", - "sha2", - "smallvec", - "sqlx-core", - "stringprep", - "thiserror 2.0.18", - "tracing", - "whoami 1.6.1", -] - -[[package]] -name = "sqlx-postgres" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" -dependencies = [ - "atoi", - "base64 0.22.1", - "bitflags 2.10.0", - "byteorder", - "crc", - "dotenvy", - "etcetera", - "futures-channel", - "futures-core", - "futures-util", - "hex", - "hkdf", - "hmac", - "home", - "itoa", - "log", - "md-5", - "memchr", - "once_cell", - "rand 0.8.5", - "serde", - "serde_json", - "sha2", - "smallvec", - "sqlx-core", - "stringprep", - "thiserror 2.0.18", - "tracing", - "whoami 1.6.1", -] - -[[package]] -name = "sqlx-sqlite" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" -dependencies = [ - "atoi", - "flume", - "futures-channel", - "futures-core", - "futures-executor", - "futures-intrusive", - "futures-util", - "libsqlite3-sys", - "log", - "percent-encoding", - "serde", - "serde_urlencoded", - "sqlx-core", - "thiserror 2.0.18", - "tracing", - "url", -] - [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -5489,12 +4392,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - [[package]] name = "sync_wrapper" version = "1.0.2" @@ -5521,8 +4418,8 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.10.0", - "core-foundation 0.9.4", + "bitflags", + "core-foundation", "system-configuration-sys", ] @@ -5547,6 +4444,12 @@ dependencies = [ "xattr", ] +[[package]] +name = "target-lexicon" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" + [[package]] name = "tempfile" version = "3.24.0" @@ -5657,15 +4560,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "tinystr" version = "0.8.2" @@ -5701,24 +4595,13 @@ dependencies = [ "bytes", "libc", "mio", - "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.1", + "socket2", "tokio-macros", "windows-sys 0.61.2", ] -[[package]] -name = "tokio-io-timeout" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bd86198d9ee903fedd2f9a2e72014287c0d9167e4ae43b5853007205dda1b76" -dependencies = [ - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-macros" version = "2.6.0" @@ -5730,16 +4613,6 @@ dependencies = [ "syn", ] -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - [[package]] name = "tokio-postgres" version = "0.7.16" @@ -5760,21 +4633,10 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.9.2", - "socket2 0.6.1", + "socket2", "tokio", "tokio-util", - "whoami 2.0.2", -] - -[[package]] -name = "tokio-rustls" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" -dependencies = [ - "rustls 0.22.4", - "rustls-pki-types", - "tokio", + "whoami", ] [[package]] @@ -5783,7 +4645,7 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.36", + "rustls", "tokio", ] @@ -5817,17 +4679,17 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f591660438b3038dd04d16c938271c79e7e06260ad2ea2885a4861bfb238605d" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-core", "futures-sink", - "http 1.4.0", + "http", "httparse", "rand 0.8.5", "ring", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.4", + "tokio-rustls", "tokio-util", "webpki-roots 0.26.11", ] @@ -5863,104 +4725,6 @@ dependencies = [ "winnow", ] -[[package]] -name = "tonic" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" -dependencies = [ - "async-stream", - "async-trait", - "axum 0.6.20", - "base64 0.21.7", - "bytes", - "h2 0.3.27", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.32", - "hyper-timeout 0.4.1", - "percent-encoding", - "pin-project", - "prost 0.12.6", - "rustls-native-certs 0.7.3", - "rustls-pemfile", - "rustls-pki-types", - "tokio", - "tokio-rustls 0.25.0", - "tokio-stream", - "tower 0.4.13", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tonic" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" -dependencies = [ - "async-stream", - "async-trait", - "axum 0.7.9", - "base64 0.22.1", - "bytes", - "flate2", - "h2 0.4.13", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "hyper 1.8.1", - "hyper-timeout 0.5.2", - "hyper-util", - "percent-encoding", - "pin-project", - "prost 0.13.5", - "rustls-native-certs 0.8.3", - "rustls-pemfile", - "socket2 0.5.10", - "tokio", - "tokio-rustls 0.26.4", - "tokio-stream", - "tower 0.4.13", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "tonic-build" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4ef6dd70a610078cb4e338a0f79d06bc759ff1b22d2120c2ff02ae264ba9c2" -dependencies = [ - "prettyplease", - "proc-macro2", - "prost-build", - "quote", - "syn", -] - -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap 1.9.3", - "pin-project", - "pin-project-lite", - "rand 0.8.5", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - [[package]] name = "tower" version = "0.5.3" @@ -5970,10 +4734,10 @@ dependencies = [ "futures-core", "futures-util", "hdrhistogram", - "indexmap 2.13.0", + "indexmap", "pin-project-lite", "slab", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", "tokio-util", "tower-layer", @@ -5988,13 +4752,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "async-compression", - "base64 0.22.1", - "bitflags 2.10.0", + "base64", + "bitflags", "bytes", "futures-core", "futures-util", - "http 1.4.0", - "http-body 1.0.1", + "http", + "http-body", "http-body-util", "http-range-header", "httpdate", @@ -6005,7 +4769,7 @@ dependencies = [ "pin-project-lite", "tokio", "tokio-util", - "tower 0.5.3", + "tower", "tower-layer", "tower-service", "tracing", @@ -6214,6 +4978,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + [[package]] name = "untrusted" version = "0.7.1" @@ -6361,12 +5131,6 @@ dependencies = [ "wit-bindgen", ] -[[package]] -name = "wasite" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" - [[package]] name = "wasite" version = "1.0.2" @@ -6486,16 +5250,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "whoami" -version = "1.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4a4db5077702ca3015d3d02d74974948aba2ad9e12ab7df718ee64ccd7e97d" -dependencies = [ - "libredox", - "wasite 0.1.0", -] - [[package]] name = "whoami" version = "2.0.2" @@ -6503,7 +5257,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace4d5c7b5ab3d99629156d4e0997edbe98a4beb6d5ba99e2cae830207a81983" dependencies = [ "libredox", - "wasite 1.0.2", + "wasite", "web-sys", ] @@ -6577,15 +5331,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets 0.48.5", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -6613,21 +5358,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", -] - [[package]] name = "windows-targets" version = "0.52.6" @@ -6661,12 +5391,6 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -6679,12 +5403,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -6697,12 +5415,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -6727,12 +5439,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -6745,12 +5451,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -6763,12 +5463,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -6781,12 +5475,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -6985,7 +5673,7 @@ dependencies = [ "generic-array", "getrandom 0.3.4", "hmac", - "indexmap 2.13.0", + "indexmap", "lzma-rust2", "memchr", "pbkdf2", diff --git a/Cargo.toml b/Cargo.toml index 39c8c33..994c5fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -139,9 +139,7 @@ woothee = { version = "0.13", features = [] } # AI/ML frameworks rig-core = { version = "0.29", default-features = false, features = ["reqwest-rustls"] } -# Storage abstractions and providers -opendal = { version = "0.53", features = [] } -qdrant-client = { version = "1.13", features = [] } -pinecone-sdk = { version = "0.1", features = [] } -prost-types = { version = "0.12", features = [] } +# Python interop +pyo3 = { version = "0.27", features = [] } +pyo3-async-runtimes = { version = "0.27", features = [] } diff --git a/crates/nvisy-dal/Cargo.toml b/crates/nvisy-dal/Cargo.toml index 01246ac..2331349 100644 --- a/crates/nvisy-dal/Cargo.toml +++ b/crates/nvisy-dal/Cargo.toml @@ -44,20 +44,13 @@ bytes = { workspace = true, features = [] } uuid = { workspace = true, features = ["v4", "v7"] } jiff = { workspace = true, features = ["serde"] } -# Storage (OpenDAL) -opendal = { workspace = true, features = ["services-s3", "services-gcs", "services-azblob", "services-postgresql", "services-mysql"] } +# Python interop +pyo3 = { workspace = true, features = ["auto-initialize"] } +pyo3-async-runtimes = { workspace = true, features = ["tokio-runtime"] } +async-stream = { workspace = true } -# Vector store clients -qdrant-client = { workspace = true, features = [] } -pinecone-sdk = { workspace = true, features = [] } - -prost-types = { workspace = true, features = [] } - -# Database (for pgvector) -diesel = { workspace = true, features = ["postgres"] } -diesel-async = { workspace = true, features = ["postgres", "deadpool"] } -pgvector = { workspace = true, features = ["diesel"] } -deadpool = { workspace = true, features = [] } +[features] +default = [] [dev-dependencies] tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/crates/nvisy-dal/src/core/contexts.rs b/crates/nvisy-dal/src/core/contexts.rs new file mode 100644 index 0000000..cf31741 --- /dev/null +++ b/crates/nvisy-dal/src/core/contexts.rs @@ -0,0 +1,75 @@ +//! Context types for data operations. +//! +//! Contexts carry state from previous runs to enable pagination and resumption. + +use derive_more::From; +use serde::{Deserialize, Serialize}; + +/// Context for object storage operations (S3, GCS, Azure Blob). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ObjectContext { + /// Path prefix for listing objects. + pub prefix: Option, + /// Continuation token for pagination. + pub token: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +/// Context for relational database operations (Postgres, MySQL). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct RelationalContext { + /// Last seen cursor value (for keyset pagination). + pub cursor: Option, + /// Tiebreaker value for resolving cursor conflicts. + pub tiebreaker: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +/// Context for vector database operations (Qdrant, Pinecone, pgvector). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct VectorContext { + /// Continuation token or offset for pagination. + pub token: Option, + /// Maximum number of items to read. + pub limit: Option, +} + +/// Type-erased context for runtime dispatch. +#[derive(Debug, Clone, Default, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyContext { + /// No context / empty state. + #[default] + None, + /// Object storage context. + Object(ObjectContext), + /// Relational database context. + Relational(RelationalContext), + /// Vector database context. + Vector(VectorContext), +} + +impl AnyContext { + /// Returns the limit if set in any context type. + pub fn limit(&self) -> Option { + match self { + Self::None => None, + Self::Object(ctx) => ctx.limit, + Self::Relational(ctx) => ctx.limit, + Self::Vector(ctx) => ctx.limit, + } + } + + /// Sets the limit on the inner context. + pub fn with_limit(mut self, limit: usize) -> Self { + match &mut self { + Self::None => {} + Self::Object(ctx) => ctx.limit = Some(limit), + Self::Relational(ctx) => ctx.limit = Some(limit), + Self::Vector(ctx) => ctx.limit = Some(limit), + } + self + } +} diff --git a/crates/nvisy-dal/src/core/input_stream.rs b/crates/nvisy-dal/src/core/input_stream.rs deleted file mode 100644 index 5b015eb..0000000 --- a/crates/nvisy-dal/src/core/input_stream.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Input stream types for reading data. - -use std::pin::Pin; -use std::task::{Context, Poll}; - -use futures::Stream; -use futures::stream::BoxStream; - -use crate::Result; - -/// A boxed stream of items with a lifetime. -pub type ItemStream<'a, T> = BoxStream<'a, Result>; - -/// Input stream wrapper for reading data. -pub struct InputStream { - stream: ItemStream<'static, T>, -} - -impl InputStream { - /// Creates a new input stream. - pub fn new(stream: ItemStream<'static, T>) -> Self { - Self { stream } - } - - /// Consumes the stream and returns the inner boxed stream. - pub fn into_inner(self) -> ItemStream<'static, T> { - self.stream - } -} - -impl Stream for InputStream { - type Item = Result; - - fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - Pin::new(&mut self.stream).poll_next(cx) - } - - fn size_hint(&self) -> (usize, Option) { - self.stream.size_hint() - } -} - -impl std::fmt::Debug for InputStream { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("InputStream").finish_non_exhaustive() - } -} diff --git a/crates/nvisy-dal/src/core/mod.rs b/crates/nvisy-dal/src/core/mod.rs index 3703504..1462750 100644 --- a/crates/nvisy-dal/src/core/mod.rs +++ b/crates/nvisy-dal/src/core/mod.rs @@ -1,18 +1,13 @@ //! Core types and traits for data operations. -mod input_stream; -mod object_context; -mod output_stream; -mod relational_context; -mod vector_context; +mod contexts; +mod params; +mod streams; -pub use input_stream::{InputStream, ItemStream}; -// Re-export IntoProvider from nvisy-core +pub use contexts::{AnyContext, ObjectContext, RelationalContext, VectorContext}; pub use nvisy_core::Provider; -pub use object_context::ObjectContext; -pub use output_stream::{ItemSink, OutputStream}; -pub use relational_context::RelationalContext; -pub use vector_context::VectorContext; +pub use params::{DistanceMetric, ObjectParams, RelationalParams, VectorParams}; +pub use streams::{InputStream, ItemSink, ItemStream, OutputStream}; use crate::Result; diff --git a/crates/nvisy-dal/src/core/object_context.rs b/crates/nvisy-dal/src/core/object_context.rs deleted file mode 100644 index ae92795..0000000 --- a/crates/nvisy-dal/src/core/object_context.rs +++ /dev/null @@ -1,37 +0,0 @@ -//! Context for object storage operations. - -/// Context for object storage operations (S3, GCS, Azure Blob). -#[derive(Debug, Clone, Default)] -pub struct ObjectContext { - /// Path prefix for listing objects. - pub prefix: Option, - /// Continuation token for pagination. - pub token: Option, - /// Maximum number of items to read. - pub limit: Option, -} - -impl ObjectContext { - /// Creates a new empty context. - pub fn new() -> Self { - Self::default() - } - - /// Sets the prefix. - pub fn with_prefix(mut self, prefix: impl Into) -> Self { - self.prefix = Some(prefix.into()); - self - } - - /// Sets the continuation token. - pub fn with_token(mut self, token: impl Into) -> Self { - self.token = Some(token.into()); - self - } - - /// Sets the limit. - pub fn with_limit(mut self, limit: usize) -> Self { - self.limit = Some(limit); - self - } -} diff --git a/crates/nvisy-dal/src/core/params.rs b/crates/nvisy-dal/src/core/params.rs new file mode 100644 index 0000000..2f34efe --- /dev/null +++ b/crates/nvisy-dal/src/core/params.rs @@ -0,0 +1,71 @@ +//! Parameter types for provider configuration. +//! +//! Params define how providers operate (columns, batch sizes, etc.), +//! while contexts carry runtime state (cursors, tokens, limits). + +use serde::{Deserialize, Serialize}; + +/// Common parameters for relational database operations. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct RelationalParams { + /// Target table name. + #[serde(skip_serializing_if = "Option::is_none")] + pub table: Option, + /// Column to use for cursor-based pagination (e.g., "id", "created_at"). + #[serde(skip_serializing_if = "Option::is_none")] + pub cursor_column: Option, + /// Column to use as tiebreaker when cursor values are not unique (e.g., "id"). + #[serde(skip_serializing_if = "Option::is_none")] + pub tiebreaker_column: Option, + /// Default batch size for bulk operations. + #[serde(default = "default_batch_size")] + pub batch_size: usize, +} + +/// Common parameters for object storage operations. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ObjectParams { + /// Bucket name (S3 bucket, GCS bucket, Azure container). + #[serde(skip_serializing_if = "Option::is_none")] + pub bucket: Option, + /// Default prefix for object keys. + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, + /// Default batch size for bulk operations. + #[serde(default = "default_batch_size")] + pub batch_size: usize, +} + +/// Common parameters for vector database operations. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct VectorParams { + /// Collection or index name (Pinecone index, Qdrant collection). + #[serde(skip_serializing_if = "Option::is_none")] + pub collection: Option, + /// Dimension of vectors (required for some providers). + #[serde(skip_serializing_if = "Option::is_none")] + pub dimension: Option, + /// Distance metric for similarity search. + #[serde(default)] + pub metric: DistanceMetric, + /// Default batch size for bulk operations. + #[serde(default = "default_batch_size")] + pub batch_size: usize, +} + +/// Distance metric for vector similarity search. +#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DistanceMetric { + /// Cosine similarity (default). + #[default] + Cosine, + /// Euclidean distance (L2). + Euclidean, + /// Dot product. + DotProduct, +} + +fn default_batch_size() -> usize { + 1000 +} diff --git a/crates/nvisy-dal/src/core/relational_context.rs b/crates/nvisy-dal/src/core/relational_context.rs deleted file mode 100644 index db0744d..0000000 --- a/crates/nvisy-dal/src/core/relational_context.rs +++ /dev/null @@ -1,45 +0,0 @@ -//! Context for relational database operations. - -/// Context for relational database operations (Postgres, MySQL). -#[derive(Debug, Clone, Default)] -pub struct RelationalContext { - /// Target table name. - pub table: Option, - /// Last seen cursor value (for keyset pagination). - pub cursor: Option, - /// Tiebreaker value for resolving cursor conflicts. - pub tiebreaker: Option, - /// Maximum number of items to read. - pub limit: Option, -} - -impl RelationalContext { - /// Creates a new empty context. - pub fn new() -> Self { - Self::default() - } - - /// Sets the table name. - pub fn with_table(mut self, table: impl Into) -> Self { - self.table = Some(table.into()); - self - } - - /// Sets the cursor value. - pub fn with_cursor(mut self, cursor: impl Into) -> Self { - self.cursor = Some(cursor.into()); - self - } - - /// Sets the tiebreaker value. - pub fn with_tiebreaker(mut self, tiebreaker: impl Into) -> Self { - self.tiebreaker = Some(tiebreaker.into()); - self - } - - /// Sets the limit. - pub fn with_limit(mut self, limit: usize) -> Self { - self.limit = Some(limit); - self - } -} diff --git a/crates/nvisy-dal/src/core/output_stream.rs b/crates/nvisy-dal/src/core/streams.rs similarity index 56% rename from crates/nvisy-dal/src/core/output_stream.rs rename to crates/nvisy-dal/src/core/streams.rs index a1676a4..07f1aa5 100644 --- a/crates/nvisy-dal/src/core/output_stream.rs +++ b/crates/nvisy-dal/src/core/streams.rs @@ -1,15 +1,54 @@ -//! Output stream types for writing data. +//! Stream types for reading and writing data. use std::pin::Pin; use std::task::{Context, Poll}; -use futures::Sink; +use futures::stream::BoxStream; +use futures::{Sink, Stream}; -use crate::Error; +use crate::{Error, Result}; + +/// A boxed stream of items with a lifetime. +pub type ItemStream<'a, T> = BoxStream<'a, Result>; /// A boxed sink for items with a lifetime. pub type ItemSink<'a, T> = Pin + Send + 'a>>; +/// Input stream wrapper for reading data. +pub struct InputStream { + stream: ItemStream<'static, T>, +} + +impl InputStream { + /// Creates a new input stream. + pub fn new(stream: ItemStream<'static, T>) -> Self { + Self { stream } + } + + /// Consumes the stream and returns the inner boxed stream. + pub fn into_inner(self) -> ItemStream<'static, T> { + self.stream + } +} + +impl Stream for InputStream { + type Item = Result; + + fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + Pin::new(&mut self.stream).poll_next(cx) + } + + fn size_hint(&self) -> (usize, Option) { + self.stream.size_hint() + } +} + +impl std::fmt::Debug for InputStream { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("InputStream").finish_non_exhaustive() + } +} + /// Output stream wrapper for writing data. /// /// Wraps a boxed sink for streaming writes. diff --git a/crates/nvisy-dal/src/core/vector_context.rs b/crates/nvisy-dal/src/core/vector_context.rs deleted file mode 100644 index 50987da..0000000 --- a/crates/nvisy-dal/src/core/vector_context.rs +++ /dev/null @@ -1,21 +0,0 @@ -//! Context for vector database operations. - -/// Context for vector database operations (Qdrant, Pinecone, pgvector). -#[derive(Debug, Clone, Default)] -pub struct VectorContext { - /// Target collection name. - pub collection: Option, -} - -impl VectorContext { - /// Creates a new empty context. - pub fn new() -> Self { - Self::default() - } - - /// Sets the collection name. - pub fn with_collection(mut self, collection: impl Into) -> Self { - self.collection = Some(collection.into()); - self - } -} diff --git a/crates/nvisy-dal/src/datatype/document.rs b/crates/nvisy-dal/src/datatype/document.rs index 7354346..f3389da 100644 --- a/crates/nvisy-dal/src/datatype/document.rs +++ b/crates/nvisy-dal/src/datatype/document.rs @@ -1,6 +1,7 @@ //! Document data type for JSON documents. use serde::{Deserialize, Serialize}; +use serde_json::Value; use super::{DataType, Metadata}; @@ -10,33 +11,10 @@ pub struct Document { /// Unique identifier. pub id: String, /// Document content as JSON. - pub content: serde_json::Value, + pub content: Value, /// Additional metadata. #[serde(default)] pub metadata: Metadata, } -impl Document { - /// Creates a new document. - pub fn new(id: impl Into, content: serde_json::Value) -> Self { - Self { - id: id.into(), - content, - metadata: Metadata::new(), - } - } - - /// Sets metadata. - pub fn with_metadata(mut self, metadata: Metadata) -> Self { - self.metadata = metadata; - self - } -} - -impl DataType for Document { - const TYPE_ID: &'static str = "document"; - - fn data_type_id() -> super::DataTypeId { - super::DataTypeId::Document - } -} +impl DataType for Document {} diff --git a/crates/nvisy-dal/src/datatype/embedding.rs b/crates/nvisy-dal/src/datatype/embedding.rs index 0e9b152..b467741 100644 --- a/crates/nvisy-dal/src/datatype/embedding.rs +++ b/crates/nvisy-dal/src/datatype/embedding.rs @@ -16,32 +16,4 @@ pub struct Embedding { pub metadata: Metadata, } -impl Embedding { - /// Creates a new embedding. - pub fn new(id: impl Into, vector: Vec) -> Self { - Self { - id: id.into(), - vector, - metadata: Metadata::new(), - } - } - - /// Sets metadata. - pub fn with_metadata(mut self, metadata: Metadata) -> Self { - self.metadata = metadata; - self - } - - /// Returns the vector dimensions. - pub fn dimensions(&self) -> usize { - self.vector.len() - } -} - -impl DataType for Embedding { - const TYPE_ID: &'static str = "embedding"; - - fn data_type_id() -> super::DataTypeId { - super::DataTypeId::Embedding - } -} +impl DataType for Embedding {} diff --git a/crates/nvisy-dal/src/datatype/graph.rs b/crates/nvisy-dal/src/datatype/graph.rs index beb222e..47533eb 100644 --- a/crates/nvisy-dal/src/datatype/graph.rs +++ b/crates/nvisy-dal/src/datatype/graph.rs @@ -17,32 +17,7 @@ pub struct Graph { pub edges: Vec, } -impl Graph { - /// Creates a new empty graph. - pub fn new() -> Self { - Self::default() - } - - /// Adds a node. - pub fn with_node(mut self, node: Node) -> Self { - self.nodes.push(node); - self - } - - /// Adds an edge. - pub fn with_edge(mut self, edge: Edge) -> Self { - self.edges.push(edge); - self - } -} - -impl DataType for Graph { - const TYPE_ID: &'static str = "graph"; - - fn data_type_id() -> super::DataTypeId { - super::DataTypeId::Graph - } -} +impl DataType for Graph {} /// A node in a graph. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -57,33 +32,6 @@ pub struct Node { pub properties: HashMap, } -impl Node { - /// Creates a new node. - pub fn new(id: impl Into) -> Self { - Self { - id: id.into(), - labels: Vec::new(), - properties: HashMap::new(), - } - } - - /// Adds a label. - pub fn with_label(mut self, label: impl Into) -> Self { - self.labels.push(label.into()); - self - } - - /// Sets a property. - pub fn with_property( - mut self, - key: impl Into, - value: impl Into, - ) -> Self { - self.properties.insert(key.into(), value.into()); - self - } -} - /// An edge in a graph. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Edge { @@ -99,31 +47,3 @@ pub struct Edge { #[serde(default)] pub properties: HashMap, } - -impl Edge { - /// Creates a new edge. - pub fn new( - id: impl Into, - from: impl Into, - to: impl Into, - label: impl Into, - ) -> Self { - Self { - id: id.into(), - from: from.into(), - to: to.into(), - label: label.into(), - properties: HashMap::new(), - } - } - - /// Sets a property. - pub fn with_property( - mut self, - key: impl Into, - value: impl Into, - ) -> Self { - self.properties.insert(key.into(), value.into()); - self - } -} diff --git a/crates/nvisy-dal/src/datatype/message.rs b/crates/nvisy-dal/src/datatype/message.rs index 21ca7a0..189ac07 100644 --- a/crates/nvisy-dal/src/datatype/message.rs +++ b/crates/nvisy-dal/src/datatype/message.rs @@ -24,42 +24,7 @@ pub struct Message { pub timestamp: Option, } -impl Message { - /// Creates a new message. - pub fn new(id: impl Into, payload: impl Into) -> Self { - Self { - id: id.into(), - payload: payload.into(), - headers: HashMap::new(), - timestamp: None, - } - } - - /// Sets a header. - pub fn with_header(mut self, key: impl Into, value: impl Into) -> Self { - self.headers.insert(key.into(), value.into()); - self - } - - /// Sets the timestamp. - pub fn with_timestamp(mut self, timestamp: Timestamp) -> Self { - self.timestamp = Some(timestamp); - self - } - - /// Tries to deserialize the payload as JSON. - pub fn payload_json(&self) -> Result { - serde_json::from_slice(&self.payload) - } -} - -impl DataType for Message { - const TYPE_ID: &'static str = "message"; - - fn data_type_id() -> super::DataTypeId { - super::DataTypeId::Message - } -} +impl DataType for Message {} mod serde_bytes { use bytes::Bytes; diff --git a/crates/nvisy-dal/src/datatype/mod.rs b/crates/nvisy-dal/src/datatype/mod.rs index 46591fe..a3bced8 100644 --- a/crates/nvisy-dal/src/datatype/mod.rs +++ b/crates/nvisy-dal/src/datatype/mod.rs @@ -1,137 +1,44 @@ //! Data types for the DAL. -mod blob; mod document; mod embedding; mod graph; mod message; +mod object; mod record; use std::collections::HashMap; -pub use blob::Blob; use derive_more::From; +use serde::{Deserialize, Serialize}; + pub use document::Document; pub use embedding::Embedding; pub use graph::{Edge, Graph, Node}; pub use message::Message; +pub use object::Object; pub use record::Record; -use serde::{Deserialize, Serialize}; /// Metadata associated with data items. pub type Metadata = HashMap; /// Marker trait for data types that can be read/written through the DAL. -pub trait DataType: Send + Sync + 'static { - /// Unique type identifier. - const TYPE_ID: &'static str; - - /// Returns the corresponding DataTypeId. - fn data_type_id() -> DataTypeId; -} - -/// Data type identifier for runtime type checking and JSON schema. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum DataTypeId { - Blob, - Document, - Embedding, - Graph, - Record, - Message, -} - -impl DataTypeId { - /// Returns the string identifier for this type. - pub const fn as_str(&self) -> &'static str { - match self { - Self::Blob => "blob", - Self::Document => "document", - Self::Embedding => "embedding", - Self::Graph => "graph", - Self::Record => "record", - Self::Message => "message", - } - } -} - -impl std::fmt::Display for DataTypeId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(self.as_str()) - } -} +pub trait DataType: Send + Sync + 'static {} /// Type-erased data value for runtime dispatch. #[derive(Debug, Clone, From, Serialize, Deserialize)] #[serde(tag = "type", content = "data", rename_all = "snake_case")] pub enum AnyDataValue { - Blob(Blob), + /// Object storage item (S3, GCS, etc.). + Object(Object), + /// JSON document. Document(Document), + /// Vector embedding. Embedding(Embedding), + /// Graph with nodes and edges. Graph(Graph), + /// Relational record/row. Record(Record), + /// Queue/stream message. Message(Message), } - -impl AnyDataValue { - /// Returns the type identifier for this value. - pub const fn type_id(&self) -> DataTypeId { - match self { - Self::Blob(_) => DataTypeId::Blob, - Self::Document(_) => DataTypeId::Document, - Self::Embedding(_) => DataTypeId::Embedding, - Self::Graph(_) => DataTypeId::Graph, - Self::Record(_) => DataTypeId::Record, - Self::Message(_) => DataTypeId::Message, - } - } - - /// Attempts to extract a Blob value. - pub fn into_blob(self) -> Option { - match self { - Self::Blob(v) => Some(v), - _ => None, - } - } - - /// Attempts to extract a Document value. - pub fn into_document(self) -> Option { - match self { - Self::Document(v) => Some(v), - _ => None, - } - } - - /// Attempts to extract an Embedding value. - pub fn into_embedding(self) -> Option { - match self { - Self::Embedding(v) => Some(v), - _ => None, - } - } - - /// Attempts to extract a Graph value. - pub fn into_graph(self) -> Option { - match self { - Self::Graph(v) => Some(v), - _ => None, - } - } - - /// Attempts to extract a Record value. - pub fn into_record(self) -> Option { - match self { - Self::Record(v) => Some(v), - _ => None, - } - } - - /// Attempts to extract a Message value. - pub fn into_message(self) -> Option { - match self { - Self::Message(v) => Some(v), - _ => None, - } - } -} diff --git a/crates/nvisy-dal/src/datatype/blob.rs b/crates/nvisy-dal/src/datatype/object.rs similarity index 50% rename from crates/nvisy-dal/src/datatype/blob.rs rename to crates/nvisy-dal/src/datatype/object.rs index e6f23a1..009b07f 100644 --- a/crates/nvisy-dal/src/datatype/blob.rs +++ b/crates/nvisy-dal/src/datatype/object.rs @@ -1,14 +1,14 @@ -//! Blob data type for files and objects. +//! Object data type for files and binary objects. use bytes::Bytes; use serde::{Deserialize, Serialize}; use super::{DataType, Metadata}; -/// A blob representing a file or object. +/// An object representing a file or binary data (S3, GCS, Azure Blob). #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Blob { - /// Path or key identifying this blob. +pub struct Object { + /// Path or key identifying this object. pub path: String, /// Raw binary data. #[serde(with = "serde_bytes")] @@ -21,37 +21,7 @@ pub struct Blob { pub metadata: Metadata, } -impl Blob { - /// Creates a new blob. - pub fn new(path: impl Into, data: impl Into) -> Self { - Self { - path: path.into(), - data: data.into(), - content_type: None, - metadata: Metadata::new(), - } - } - - /// Sets the content type. - pub fn with_content_type(mut self, content_type: impl Into) -> Self { - self.content_type = Some(content_type.into()); - self - } - - /// Sets metadata. - pub fn with_metadata(mut self, metadata: Metadata) -> Self { - self.metadata = metadata; - self - } -} - -impl DataType for Blob { - const TYPE_ID: &'static str = "blob"; - - fn data_type_id() -> super::DataTypeId { - super::DataTypeId::Blob - } -} +impl DataType for Object {} mod serde_bytes { use bytes::Bytes; diff --git a/crates/nvisy-dal/src/datatype/record.rs b/crates/nvisy-dal/src/datatype/record.rs index 246fbd1..8255a00 100644 --- a/crates/nvisy-dal/src/datatype/record.rs +++ b/crates/nvisy-dal/src/datatype/record.rs @@ -3,51 +3,15 @@ use std::collections::HashMap; use serde::{Deserialize, Serialize}; +use serde_json::Value; use super::DataType; /// A record representing a row in a relational table. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct Record { /// Column values keyed by column name. - pub columns: HashMap, + pub columns: HashMap, } -impl Record { - /// Creates a new empty record. - pub fn new() -> Self { - Self { - columns: HashMap::new(), - } - } - - /// Creates a record from columns. - pub fn from_columns(columns: HashMap) -> Self { - Self { columns } - } - - /// Sets a column value. - pub fn set(mut self, column: impl Into, value: impl Into) -> Self { - self.columns.insert(column.into(), value.into()); - self - } - - /// Gets a column value. - pub fn get(&self, column: &str) -> Option<&serde_json::Value> { - self.columns.get(column) - } -} - -impl Default for Record { - fn default() -> Self { - Self::new() - } -} - -impl DataType for Record { - const TYPE_ID: &'static str = "record"; - - fn data_type_id() -> super::DataTypeId { - super::DataTypeId::Record - } -} +impl DataType for Record {} diff --git a/crates/nvisy-dal/src/error.rs b/crates/nvisy-dal/src/error.rs index 5b4b9b0..4be3f5e 100644 --- a/crates/nvisy-dal/src/error.rs +++ b/crates/nvisy-dal/src/error.rs @@ -96,3 +96,9 @@ impl From for nvisy_core::Error { .with_source(err) } } + +impl From for Error { + fn from(err: serde_json::Error) -> Self { + Error::new(ErrorKind::InvalidInput, err.to_string()) + } +} diff --git a/crates/nvisy-dal/src/lib.rs b/crates/nvisy-dal/src/lib.rs index e8c95fd..bfe6a7e 100644 --- a/crates/nvisy-dal/src/lib.rs +++ b/crates/nvisy-dal/src/lib.rs @@ -2,6 +2,12 @@ //! //! This crate provides a unified interface for reading and writing data //! across various storage backends. +//! +//! # Architecture +//! +//! The DAL is split into two parts: +//! - **Rust**: Streaming, observability, unified interface, server integration +//! - **Python**: Provider implementations, client libraries, external integrations #![forbid(unsafe_code)] #![cfg_attr(docsrs, feature(doc_cfg))] @@ -10,12 +16,14 @@ pub mod core; pub mod datatype; pub mod provider; +mod python; + mod error; pub use core::{ - DataInput, DataOutput, InputStream, ItemSink, ItemStream, ObjectContext, OutputStream, - Provider, RelationalContext, VectorContext, + AnyContext, DataInput, DataOutput, InputStream, ItemSink, ItemStream, ObjectContext, + OutputStream, Provider, RelationalContext, VectorContext, }; -pub use datatype::{AnyDataValue, DataTypeId}; +pub use datatype::{AnyDataValue, DataType, Document, Embedding, Graph, Message, Object, Record}; pub use error::{BoxError, Error, ErrorKind, Result}; diff --git a/crates/nvisy-dal/src/provider/azblob/config.rs b/crates/nvisy-dal/src/provider/azblob/config.rs deleted file mode 100644 index 61b13c9..0000000 --- a/crates/nvisy-dal/src/provider/azblob/config.rs +++ /dev/null @@ -1,26 +0,0 @@ -//! Azure Blob Storage configuration types. - -use serde::{Deserialize, Serialize}; - -/// Azure Blob Storage credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct AzblobCredentials { - /// Storage account name. - pub account_name: String, - /// Account key for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub account_key: Option, - /// SAS token for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub sas_token: Option, -} - -/// Azure Blob Storage parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct AzblobParams { - /// Container name. - pub container: String, - /// Path prefix within the container. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} diff --git a/crates/nvisy-dal/src/provider/azblob/input.rs b/crates/nvisy-dal/src/provider/azblob/input.rs deleted file mode 100644 index 2ca1b21..0000000 --- a/crates/nvisy-dal/src/provider/azblob/input.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! Azure Blob DataInput implementation. - -use async_trait::async_trait; -use futures::StreamExt; - -use super::AzblobProvider; -use crate::core::{DataInput, InputStream, ObjectContext}; -use crate::datatype::Blob; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataInput for AzblobProvider { - type Context = ObjectContext; - type Item = Blob; - - async fn read(&self, ctx: &ObjectContext) -> Result> { - let prefix = ctx.prefix.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let path = entry.path().to_string(); - if path.ends_with('/') { - return None; - } - - match op.read(&path).await { - Ok(data) => { - let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await - && let Some(ct) = meta.content_type() - { - blob = blob.with_content_type(ct); - } - Some(Ok(blob)) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} diff --git a/crates/nvisy-dal/src/provider/azblob/mod.rs b/crates/nvisy-dal/src/provider/azblob/mod.rs deleted file mode 100644 index 2c8d215..0000000 --- a/crates/nvisy-dal/src/provider/azblob/mod.rs +++ /dev/null @@ -1,56 +0,0 @@ -//! Azure Blob Storage provider. - -mod config; -mod input; -mod output; - -pub use config::{AzblobCredentials, AzblobParams}; -use opendal::{Operator, services}; - -use crate::core::Provider; -use crate::error::Error; - -/// Azure Blob Storage provider for blob storage. -#[derive(Clone)] -pub struct AzblobProvider { - operator: Operator, -} - -#[async_trait::async_trait] -impl Provider for AzblobProvider { - type Credentials = AzblobCredentials; - type Params = AzblobParams; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let mut builder = services::Azblob::default() - .account_name(&credentials.account_name) - .container(¶ms.container); - - if let Some(ref account_key) = credentials.account_key { - builder = builder.account_key(account_key); - } - - if let Some(ref sas_token) = credentials.sas_token { - builder = builder.sas_token(sas_token); - } - - if let Some(ref prefix) = params.prefix { - builder = builder.root(prefix); - } - - let operator = Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| Error::connection(e.to_string()))?; - - Ok(Self { operator }) - } -} - -impl std::fmt::Debug for AzblobProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("AzblobProvider").finish() - } -} diff --git a/crates/nvisy-dal/src/provider/azblob/output.rs b/crates/nvisy-dal/src/provider/azblob/output.rs deleted file mode 100644 index 49fa07e..0000000 --- a/crates/nvisy-dal/src/provider/azblob/output.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Azure Blob DataOutput implementation. - -use async_trait::async_trait; - -use super::AzblobProvider; -use crate::core::DataOutput; -use crate::datatype::Blob; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for AzblobProvider { - type Item = Blob; - - async fn write(&self, items: Vec) -> Result<()> { - for blob in items { - self.operator - .write(&blob.path, blob.data) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/provider/gcs/config.rs b/crates/nvisy-dal/src/provider/gcs/config.rs deleted file mode 100644 index edcabc6..0000000 --- a/crates/nvisy-dal/src/provider/gcs/config.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Google Cloud Storage configuration types. - -use serde::{Deserialize, Serialize}; - -/// Google Cloud Storage credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct GcsCredentials { - /// Service account credentials JSON. - pub credentials_json: String, -} - -/// Google Cloud Storage parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct GcsParams { - /// Bucket name. - pub bucket: String, - /// Path prefix within the bucket. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} diff --git a/crates/nvisy-dal/src/provider/gcs/input.rs b/crates/nvisy-dal/src/provider/gcs/input.rs deleted file mode 100644 index c709bb8..0000000 --- a/crates/nvisy-dal/src/provider/gcs/input.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! GCS DataInput implementation. - -use async_trait::async_trait; -use futures::StreamExt; - -use super::GcsProvider; -use crate::core::{DataInput, InputStream, ObjectContext}; -use crate::datatype::Blob; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataInput for GcsProvider { - type Context = ObjectContext; - type Item = Blob; - - async fn read(&self, ctx: &ObjectContext) -> Result> { - let prefix = ctx.prefix.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let path = entry.path().to_string(); - if path.ends_with('/') { - return None; - } - - match op.read(&path).await { - Ok(data) => { - let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await - && let Some(ct) = meta.content_type() - { - blob = blob.with_content_type(ct); - } - Some(Ok(blob)) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} diff --git a/crates/nvisy-dal/src/provider/gcs/mod.rs b/crates/nvisy-dal/src/provider/gcs/mod.rs deleted file mode 100644 index 04f1ac4..0000000 --- a/crates/nvisy-dal/src/provider/gcs/mod.rs +++ /dev/null @@ -1,48 +0,0 @@ -//! Google Cloud Storage provider. - -mod config; -mod input; -mod output; - -pub use config::{GcsCredentials, GcsParams}; -use opendal::{Operator, services}; - -use crate::core::Provider; -use crate::error::Error; - -/// Google Cloud Storage provider for blob storage. -#[derive(Clone)] -pub struct GcsProvider { - operator: Operator, -} - -#[async_trait::async_trait] -impl Provider for GcsProvider { - type Credentials = GcsCredentials; - type Params = GcsParams; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let mut builder = services::Gcs::default() - .bucket(¶ms.bucket) - .credential(&credentials.credentials_json); - - if let Some(ref prefix) = params.prefix { - builder = builder.root(prefix); - } - - let operator = Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| Error::connection(e.to_string()))?; - - Ok(Self { operator }) - } -} - -impl std::fmt::Debug for GcsProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("GcsProvider").finish() - } -} diff --git a/crates/nvisy-dal/src/provider/gcs/output.rs b/crates/nvisy-dal/src/provider/gcs/output.rs deleted file mode 100644 index 73c79bd..0000000 --- a/crates/nvisy-dal/src/provider/gcs/output.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! GCS DataOutput implementation. - -use async_trait::async_trait; - -use super::GcsProvider; -use crate::core::DataOutput; -use crate::datatype::Blob; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for GcsProvider { - type Item = Blob; - - async fn write(&self, items: Vec) -> Result<()> { - for blob in items { - self.operator - .write(&blob.path, blob.data) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/provider/mod.rs b/crates/nvisy-dal/src/provider/mod.rs index 8a51028..3c970c3 100644 --- a/crates/nvisy-dal/src/provider/mod.rs +++ b/crates/nvisy-dal/src/provider/mod.rs @@ -1,26 +1,27 @@ -//! Data providers for various storage backends. +//! Provider implementations for external services. +//! +//! Each provider module exports credentials and params types +//! along with the main provider struct. +//! +//! Data types for input/output are in the `datatype` module: +//! - `Record` for PostgreSQL rows +//! - `Object` for S3 objects +//! - `Embedding` for Pinecone vectors +//! +//! Context types for pagination are in the `core` module: +//! - `RelationalContext` for relational databases +//! - `ObjectContext` for object storage +//! - `VectorContext` for vector databases +//! +//! Available providers: +//! - `postgres`: PostgreSQL relational database +//! - `s3`: AWS S3 / MinIO object storage +//! - `pinecone`: Pinecone vector database -mod azblob; -mod gcs; - -mod mysql; -mod pgvector; mod pinecone; mod postgres; -mod qdrant; mod s3; -// Object storage providers -pub use azblob::{AzblobCredentials, AzblobParams, AzblobProvider}; -pub use gcs::{GcsCredentials, GcsParams, GcsProvider}; -// Vector database providers - -// Relational database providers -pub use mysql::{MysqlCredentials, MysqlParams, MysqlProvider}; -pub use pgvector::{ - DistanceMetric, IndexType, PgVectorCredentials, PgVectorParams, PgVectorProvider, -}; -pub use pinecone::{PineconeCredentials, PineconeParams, PineconeProvider}; -pub use postgres::{PostgresCredentials, PostgresParams, PostgresProvider}; -pub use qdrant::{QdrantCredentials, QdrantParams, QdrantProvider}; -pub use s3::{S3Credentials, S3Params, S3Provider}; +pub use self::pinecone::{PineconeCredentials, PineconeParams, PineconeProvider}; +pub use self::postgres::{PostgresCredentials, PostgresParams, PostgresProvider}; +pub use self::s3::{S3Credentials, S3Params, S3Provider}; diff --git a/crates/nvisy-dal/src/provider/mysql/config.rs b/crates/nvisy-dal/src/provider/mysql/config.rs deleted file mode 100644 index 92c9e12..0000000 --- a/crates/nvisy-dal/src/provider/mysql/config.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! MySQL configuration types. - -use serde::{Deserialize, Serialize}; - -/// MySQL credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct MysqlCredentials { - /// Connection string (e.g., "mysql://user:pass@host:3306/db"). - pub connection_string: String, -} - -/// MySQL parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct MysqlParams { - /// Table name. - pub table: String, - /// Database name. - #[serde(skip_serializing_if = "Option::is_none")] - pub database: Option, -} diff --git a/crates/nvisy-dal/src/provider/mysql/input.rs b/crates/nvisy-dal/src/provider/mysql/input.rs deleted file mode 100644 index a058128..0000000 --- a/crates/nvisy-dal/src/provider/mysql/input.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! MySQL DataInput implementation. - -use std::collections::HashMap; - -use async_trait::async_trait; -use futures::StreamExt; - -use super::MysqlProvider; -use crate::core::{DataInput, InputStream, RelationalContext}; -use crate::datatype::Record; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataInput for MysqlProvider { - type Context = RelationalContext; - type Item = Record; - - async fn read(&self, ctx: &RelationalContext) -> Result> { - let prefix = ctx.table.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let key = entry.path().to_string(); - match op.read(&key).await { - Ok(data) => { - let value: serde_json::Value = - serde_json::from_slice(&data.to_bytes()) - .unwrap_or(serde_json::json!({})); - - let columns: HashMap = - if let serde_json::Value::Object(map) = value { - map.into_iter().collect() - } else { - let mut cols = HashMap::new(); - cols.insert("_key".to_string(), serde_json::json!(key)); - cols.insert("_value".to_string(), value); - cols - }; - - Some(Ok(Record::from_columns(columns))) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} diff --git a/crates/nvisy-dal/src/provider/mysql/mod.rs b/crates/nvisy-dal/src/provider/mysql/mod.rs deleted file mode 100644 index e5cfa8f..0000000 --- a/crates/nvisy-dal/src/provider/mysql/mod.rs +++ /dev/null @@ -1,48 +0,0 @@ -//! MySQL provider. - -mod config; -mod input; -mod output; - -pub use config::{MysqlCredentials, MysqlParams}; -use opendal::{Operator, services}; - -use crate::core::Provider; -use crate::error::Error; - -/// MySQL provider for relational data. -#[derive(Clone)] -pub struct MysqlProvider { - operator: Operator, -} - -#[async_trait::async_trait] -impl Provider for MysqlProvider { - type Credentials = MysqlCredentials; - type Params = MysqlParams; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let mut builder = services::Mysql::default() - .connection_string(&credentials.connection_string) - .table(¶ms.table); - - if let Some(ref database) = params.database { - builder = builder.root(database); - } - - let operator = Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| Error::connection(e.to_string()))?; - - Ok(Self { operator }) - } -} - -impl std::fmt::Debug for MysqlProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MysqlProvider").finish() - } -} diff --git a/crates/nvisy-dal/src/provider/mysql/output.rs b/crates/nvisy-dal/src/provider/mysql/output.rs deleted file mode 100644 index 568668e..0000000 --- a/crates/nvisy-dal/src/provider/mysql/output.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! MySQL DataOutput implementation. - -use async_trait::async_trait; - -use super::MysqlProvider; -use crate::core::DataOutput; -use crate::datatype::Record; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for MysqlProvider { - type Item = Record; - - async fn write(&self, items: Vec) -> Result<()> { - for record in items { - let key = record - .get("_key") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) - .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); - - let value = - serde_json::to_vec(&record.columns).map_err(|e| Error::provider(e.to_string()))?; - - self.operator - .write(&key, value) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/provider/pgvector/config.rs b/crates/nvisy-dal/src/provider/pgvector/config.rs deleted file mode 100644 index 5d6b7bb..0000000 --- a/crates/nvisy-dal/src/provider/pgvector/config.rs +++ /dev/null @@ -1,60 +0,0 @@ -//! pgvector configuration types. - -use serde::{Deserialize, Serialize}; - -/// pgvector credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PgVectorCredentials { - /// PostgreSQL connection URL. - pub connection_url: String, -} - -/// pgvector parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PgVectorParams { - /// Table name for vectors. - pub table: String, - /// Vector dimensions. - pub dimensions: usize, - /// Distance metric. - #[serde(default)] - pub distance_metric: DistanceMetric, - /// Index type for similarity search. - #[serde(default)] - pub index_type: IndexType, -} - -/// Distance metric for pgvector. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum DistanceMetric { - /// L2 (Euclidean) distance. - #[default] - L2, - /// Inner product (dot product). - InnerProduct, - /// Cosine distance. - Cosine, -} - -impl DistanceMetric { - /// Returns the pgvector operator for this metric. - pub fn operator(&self) -> &'static str { - match self { - Self::L2 => "<->", - Self::InnerProduct => "<#>", - Self::Cosine => "<=>", - } - } -} - -/// Index type for pgvector. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum IndexType { - /// IVFFlat index (faster build, good recall). - #[default] - IvfFlat, - /// HNSW index (slower build, better recall). - Hnsw, -} diff --git a/crates/nvisy-dal/src/provider/pgvector/mod.rs b/crates/nvisy-dal/src/provider/pgvector/mod.rs deleted file mode 100644 index 28ced98..0000000 --- a/crates/nvisy-dal/src/provider/pgvector/mod.rs +++ /dev/null @@ -1,247 +0,0 @@ -//! PostgreSQL pgvector provider. - -mod config; -mod output; - -use std::collections::HashMap; - -pub use config::{DistanceMetric, IndexType, PgVectorCredentials, PgVectorParams}; -use diesel::prelude::*; -use diesel::sql_types::{Float, Integer, Text}; -use diesel_async::pooled_connection::AsyncDieselConnectionManager; -use diesel_async::pooled_connection::deadpool::Pool; -use diesel_async::{AsyncPgConnection, RunQueryDsl}; - -use crate::core::Provider; -use crate::error::{Error, Result}; - -/// pgvector provider for vector storage using PostgreSQL. -pub struct PgVectorProvider { - pool: Pool, - params: PgVectorParams, -} - -#[async_trait::async_trait] -impl Provider for PgVectorProvider { - type Credentials = PgVectorCredentials; - type Params = PgVectorParams; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let manager = - AsyncDieselConnectionManager::::new(&credentials.connection_url); - - let pool = Pool::builder(manager) - .build() - .map_err(|e| Error::connection(e.to_string()))?; - - { - let mut conn = pool - .get() - .await - .map_err(|e| Error::connection(e.to_string()))?; - - diesel::sql_query("CREATE EXTENSION IF NOT EXISTS vector") - .execute(&mut conn) - .await - .map_err(|e| { - Error::provider(format!("Failed to create vector extension: {}", e)) - })?; - } - - Ok(Self { pool, params }) - } -} - -impl PgVectorProvider { - /// Returns the configured table name. - pub fn table(&self) -> &str { - &self.params.table - } - - pub(crate) async fn get_conn( - &self, - ) -> Result>> { - self.pool - .get() - .await - .map_err(|e| Error::connection(e.to_string())) - } - - pub(crate) fn distance_operator(&self) -> &'static str { - self.params.distance_metric.operator() - } - - /// Ensures a collection (table) exists, creating it if necessary. - pub(crate) async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { - let mut conn = self.get_conn().await?; - - let create_table = format!( - r#" - CREATE TABLE IF NOT EXISTS {} ( - id VARCHAR(256) PRIMARY KEY, - vector vector({}), - metadata JSONB DEFAULT '{{}}'::jsonb, - created_at TIMESTAMPTZ DEFAULT NOW() - ) - "#, - name, dimensions - ); - - diesel::sql_query(&create_table) - .execute(&mut conn) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let index_name = format!("{}_vector_idx", name); - let operator = self.distance_operator(); - - let create_index = match self.params.index_type { - IndexType::IvfFlat => { - format!( - r#" - CREATE INDEX IF NOT EXISTS {} ON {} - USING ivfflat (vector {}) - WITH (lists = 100) - "#, - index_name, name, operator - ) - } - IndexType::Hnsw => { - format!( - r#" - CREATE INDEX IF NOT EXISTS {} ON {} - USING hnsw (vector {}) - WITH (m = 16, ef_construction = 64) - "#, - index_name, name, operator - ) - } - }; - - diesel::sql_query(&create_index) - .execute(&mut conn) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - Ok(()) - } - - /// Searches for similar vectors. - pub async fn search( - &self, - collection: &str, - query: Vec, - limit: usize, - include_vectors: bool, - ) -> Result> { - let mut conn = self.get_conn().await?; - - let operator = self.distance_operator(); - let vector_str = format!( - "[{}]", - query - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ); - - let vector_column = if include_vectors { - ", vector::text as vector_data" - } else { - "" - }; - - let score_expr = match self.params.distance_metric { - DistanceMetric::L2 => format!("vector {} $1::vector", operator), - DistanceMetric::InnerProduct => format!("-(vector {} $1::vector)", operator), - DistanceMetric::Cosine => format!("1 - (vector {} $1::vector)", operator), - }; - - let search_query = format!( - r#" - SELECT id, {} as score{}, metadata::text as metadata_json - FROM {} - ORDER BY vector {} $1::vector - LIMIT $2 - "#, - score_expr, vector_column, collection, operator - ); - - let results: Vec = diesel::sql_query(&search_query) - .bind::(&vector_str) - .bind::(limit as i32) - .load(&mut conn) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let search_results = results - .into_iter() - .map(|row| { - let metadata: HashMap = - serde_json::from_str(&row.metadata_json).unwrap_or_default(); - - let vector = if include_vectors { - row.vector_data.and_then(|v| parse_vector(&v).ok()) - } else { - None - }; - - SearchResult { - id: row.id, - score: row.score, - vector, - metadata, - } - }) - .collect(); - - Ok(search_results) - } -} - -/// Result from a vector similarity search. -#[derive(Debug, Clone)] -pub struct SearchResult { - /// The ID of the matched vector. - pub id: String, - /// Similarity score. - pub score: f32, - /// The vector data, if requested. - pub vector: Option>, - /// Metadata associated with this vector. - pub metadata: HashMap, -} - -impl std::fmt::Debug for PgVectorProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PgVectorProvider").finish() - } -} - -fn parse_vector(s: &str) -> Result> { - let trimmed = s.trim_start_matches('[').trim_end_matches(']'); - trimmed - .split(',') - .map(|s| { - s.trim() - .parse::() - .map_err(|e| Error::provider(e.to_string())) - }) - .collect() -} - -#[derive(QueryableByName)] -struct SearchRow { - #[diesel(sql_type = Text)] - id: String, - #[diesel(sql_type = Float)] - score: f32, - #[diesel(sql_type = Text)] - metadata_json: String, - #[diesel(sql_type = diesel::sql_types::Nullable)] - vector_data: Option, -} diff --git a/crates/nvisy-dal/src/provider/pgvector/output.rs b/crates/nvisy-dal/src/provider/pgvector/output.rs deleted file mode 100644 index 5c29557..0000000 --- a/crates/nvisy-dal/src/provider/pgvector/output.rs +++ /dev/null @@ -1,65 +0,0 @@ -//! pgvector DataOutput implementation. - -use async_trait::async_trait; -use diesel::sql_types::Text; -use diesel_async::RunQueryDsl; - -use super::PgVectorProvider; -use crate::core::DataOutput; -use crate::datatype::Embedding; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for PgVectorProvider { - type Item = Embedding; - - async fn write(&self, items: Vec) -> Result<()> { - if items.is_empty() { - return Ok(()); - } - - let table = self.table(); - - let dimensions = <[_]>::first(&items) - .map(|v| v.vector.len()) - .ok_or_else(|| Error::invalid_input("No embeddings provided"))?; - - self.ensure_collection(table, dimensions).await?; - - let mut conn = self.get_conn().await?; - - for v in items { - let vector_str = format!( - "[{}]", - v.vector - .iter() - .map(|f| f.to_string()) - .collect::>() - .join(",") - ); - let metadata_json = - serde_json::to_string(&v.metadata).unwrap_or_else(|_| "{}".to_string()); - - let upsert_query = format!( - r#" - INSERT INTO {} (id, vector, metadata) - VALUES ($1, $2::vector, $3::jsonb) - ON CONFLICT (id) DO UPDATE SET - vector = EXCLUDED.vector, - metadata = EXCLUDED.metadata - "#, - table - ); - - diesel::sql_query(&upsert_query) - .bind::(&v.id) - .bind::(&vector_str) - .bind::(&metadata_json) - .execute(&mut conn) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/provider/pinecone.rs b/crates/nvisy-dal/src/provider/pinecone.rs new file mode 100644 index 0000000..f2a23f3 --- /dev/null +++ b/crates/nvisy-dal/src/provider/pinecone.rs @@ -0,0 +1,77 @@ +//! Pinecone vector database provider. +//! +//! Provides vector upsert operations for the Pinecone vector database. + +use serde::{Deserialize, Serialize}; + +use crate::Result; +use crate::core::{DataOutput, Provider}; +use crate::datatype::Embedding; +use crate::python::{PyDataOutput, PyProvider, PyProviderLoader}; + +/// Credentials for Pinecone connection. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PineconeCredentials { + /// Pinecone API key. + pub api_key: String, +} + +/// Parameters for Pinecone operations. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PineconeParams { + /// Index name. + pub index_name: String, + /// Namespace within the index. + pub namespace: String, +} + +/// Pinecone provider for vector upsert operations. +pub struct PineconeProvider { + inner: PyProvider, + output: PyDataOutput, +} + +impl PineconeProvider { + /// Disconnects from Pinecone. + pub async fn disconnect(self) -> Result<()> { + self.inner.disconnect().await + } +} + +#[async_trait::async_trait] +impl Provider for PineconeProvider { + type Credentials = PineconeCredentials; + type Params = PineconeParams; + + async fn connect( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let loader = PyProviderLoader::new().map_err(crate::Error::from)?; + let creds_json = serde_json::to_value(&credentials).map_err(crate::Error::from)?; + let params_json = serde_json::to_value(¶ms).map_err(crate::Error::from)?; + + let inner = loader + .load("pinecone", creds_json, params_json) + .await + .map_err(crate::Error::from)?; + let output = PyDataOutput::new(PyProvider::new(inner.clone_py_object())); + + Ok(Self { inner, output }) + } +} + +#[async_trait::async_trait] +impl DataOutput for PineconeProvider { + type Item = Embedding; + + async fn write(&self, items: Vec) -> Result<()> { + self.output.write(items).await + } +} + +impl std::fmt::Debug for PineconeProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PineconeProvider").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/provider/pinecone/config.rs b/crates/nvisy-dal/src/provider/pinecone/config.rs deleted file mode 100644 index 1d58997..0000000 --- a/crates/nvisy-dal/src/provider/pinecone/config.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Pinecone configuration types. - -use serde::{Deserialize, Serialize}; - -/// Pinecone credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PineconeCredentials { - /// Pinecone API key. - pub api_key: String, -} - -/// Pinecone parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PineconeParams { - /// Index name. - pub index: String, - /// Namespace. - #[serde(skip_serializing_if = "Option::is_none")] - pub namespace: Option, - /// Vector dimensions. - #[serde(skip_serializing_if = "Option::is_none")] - pub dimensions: Option, -} diff --git a/crates/nvisy-dal/src/provider/pinecone/mod.rs b/crates/nvisy-dal/src/provider/pinecone/mod.rs deleted file mode 100644 index 010c648..0000000 --- a/crates/nvisy-dal/src/provider/pinecone/mod.rs +++ /dev/null @@ -1,215 +0,0 @@ -//! Pinecone vector store provider. - -mod config; -mod output; - -use std::collections::{BTreeMap, HashMap}; - -pub use config::{PineconeCredentials, PineconeParams}; -use pinecone_sdk::models::{Kind, Metadata, Namespace, Value as PineconeValue}; -use pinecone_sdk::pinecone::PineconeClientConfig; -use pinecone_sdk::pinecone::data::Index; -use tokio::sync::Mutex; - -use crate::core::Provider; -use crate::error::{Error, Result}; - -/// Pinecone provider for vector storage. -pub struct PineconeProvider { - index: Mutex, - params: PineconeParams, -} - -#[async_trait::async_trait] -impl Provider for PineconeProvider { - type Credentials = PineconeCredentials; - type Params = PineconeParams; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let client_config = PineconeClientConfig { - api_key: Some(credentials.api_key), - ..Default::default() - }; - - let client = client_config - .client() - .map_err(|e| Error::connection(e.to_string()))?; - - let index_description = client - .describe_index(¶ms.index) - .await - .map_err(|e| Error::connection(format!("Failed to describe index: {}", e)))?; - - let host = &index_description.host; - - let index = client - .index(host) - .await - .map_err(|e| Error::connection(format!("Failed to connect to index: {}", e)))?; - - Ok(Self { - index: Mutex::new(index), - params, - }) - } -} - -impl PineconeProvider { - pub(crate) fn get_namespace(&self, collection: &str) -> Namespace { - if collection.is_empty() { - self.params - .namespace - .as_ref() - .map(|ns| Namespace::from(ns.as_str())) - .unwrap_or_default() - } else { - Namespace::from(collection) - } - } - - /// Returns the configured namespace. - pub fn namespace(&self) -> Option<&str> { - self.params.namespace.as_deref() - } - - /// Searches for similar vectors. - pub async fn search( - &self, - collection: &str, - query: Vec, - limit: usize, - include_vectors: bool, - include_metadata: bool, - filter: Option<&serde_json::Value>, - ) -> Result> { - let namespace = self.get_namespace(collection); - - let filter_metadata: Option = filter.and_then(|f| { - if let serde_json::Value::Object(obj) = f { - let map: HashMap = obj.clone().into_iter().collect(); - Some(hashmap_to_metadata(map)) - } else { - None - } - }); - - let mut index = self.index.lock().await; - let response = index - .query_by_value( - query, - None, - limit as u32, - &namespace, - filter_metadata, - Some(include_vectors), - Some(include_metadata), - ) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let results = response - .matches - .into_iter() - .map(|m| { - let metadata = m.metadata.map(metadata_to_hashmap).unwrap_or_default(); - - SearchResult { - id: m.id, - score: m.score, - vector: Some(m.values), - metadata, - } - }) - .collect(); - - Ok(results) - } -} - -/// Result from a vector similarity search. -#[derive(Debug, Clone)] -pub struct SearchResult { - /// The ID of the matched vector. - pub id: String, - /// Similarity score. - pub score: f32, - /// The vector data, if requested. - pub vector: Option>, - /// Metadata associated with this vector. - pub metadata: HashMap, -} - -impl std::fmt::Debug for PineconeProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PineconeProvider").finish() - } -} - -fn metadata_to_hashmap(metadata: Metadata) -> HashMap { - metadata - .fields - .into_iter() - .map(|(k, v)| (k, pinecone_value_to_json(v))) - .collect() -} - -pub(crate) fn hashmap_to_metadata(map: HashMap) -> Metadata { - let fields: BTreeMap = map - .into_iter() - .map(|(k, v)| (k, json_to_pinecone_value(v))) - .collect(); - - Metadata { fields } -} - -fn pinecone_value_to_json(value: PineconeValue) -> serde_json::Value { - match value.kind { - Some(Kind::NullValue(_)) => serde_json::Value::Null, - Some(Kind::NumberValue(n)) => serde_json::Value::Number( - serde_json::Number::from_f64(n).unwrap_or(serde_json::Number::from(0)), - ), - Some(Kind::StringValue(s)) => serde_json::Value::String(s), - Some(Kind::BoolValue(b)) => serde_json::Value::Bool(b), - Some(Kind::StructValue(s)) => { - let map: serde_json::Map = s - .fields - .into_iter() - .map(|(k, v)| (k, pinecone_value_to_json(v))) - .collect(); - serde_json::Value::Object(map) - } - Some(Kind::ListValue(list)) => { - let arr: Vec = list - .values - .into_iter() - .map(pinecone_value_to_json) - .collect(); - serde_json::Value::Array(arr) - } - None => serde_json::Value::Null, - } -} - -fn json_to_pinecone_value(value: serde_json::Value) -> PineconeValue { - let kind = match value { - serde_json::Value::Null => Some(Kind::NullValue(0)), - serde_json::Value::Bool(b) => Some(Kind::BoolValue(b)), - serde_json::Value::Number(n) => Some(Kind::NumberValue(n.as_f64().unwrap_or(0.0))), - serde_json::Value::String(s) => Some(Kind::StringValue(s)), - serde_json::Value::Array(arr) => Some(Kind::ListValue(prost_types::ListValue { - values: arr.into_iter().map(json_to_pinecone_value).collect(), - })), - serde_json::Value::Object(obj) => { - let fields: BTreeMap = obj - .into_iter() - .map(|(k, v)| (k, json_to_pinecone_value(v))) - .collect(); - Some(Kind::StructValue(prost_types::Struct { fields })) - } - }; - - PineconeValue { kind } -} diff --git a/crates/nvisy-dal/src/provider/pinecone/output.rs b/crates/nvisy-dal/src/provider/pinecone/output.rs deleted file mode 100644 index 39629e1..0000000 --- a/crates/nvisy-dal/src/provider/pinecone/output.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Pinecone DataOutput implementation. - -use async_trait::async_trait; -use pinecone_sdk::models::Vector; - -use super::{PineconeProvider, hashmap_to_metadata}; -use crate::core::DataOutput; -use crate::datatype::Embedding; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for PineconeProvider { - type Item = Embedding; - - async fn write(&self, items: Vec) -> Result<()> { - let namespace = self - .namespace() - .map(pinecone_sdk::models::Namespace::from) - .unwrap_or_default(); - - let pinecone_vectors: Vec = items - .into_iter() - .map(|v| { - let metadata = if v.metadata.is_empty() { - None - } else { - Some(hashmap_to_metadata(v.metadata)) - }; - - Vector { - id: v.id, - values: v.vector, - sparse_values: None, - metadata, - } - }) - .collect(); - - let mut index = self.index.lock().await; - index - .upsert(&pinecone_vectors, &namespace) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/provider/postgres.rs b/crates/nvisy-dal/src/provider/postgres.rs new file mode 100644 index 0000000..13076c3 --- /dev/null +++ b/crates/nvisy-dal/src/provider/postgres.rs @@ -0,0 +1,109 @@ +//! PostgreSQL provider. +//! +//! Provides relational data operations using a connection pool. + +use serde::{Deserialize, Serialize}; + +use crate::Result; +use crate::core::{ + DataInput, DataOutput, InputStream, Provider, RelationalContext, RelationalParams, +}; +use crate::datatype::Record; +use crate::python::{PyDataInput, PyDataOutput, PyProvider, PyProviderLoader}; + +/// Credentials for PostgreSQL connection. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PostgresCredentials { + /// Database host. + pub host: String, + /// Database port. + pub port: u16, + /// Database user. + pub user: String, + /// Database password. + pub password: String, + /// Database name. + pub database: String, +} + +/// Parameters for PostgreSQL operations. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PostgresParams { + /// Schema name (defaults to "public"). + #[serde(default = "default_schema")] + pub schema: String, + /// Relational parameters (table, pagination). + #[serde(flatten)] + pub relational: RelationalParams, +} + +fn default_schema() -> String { + "public".to_string() +} + +/// PostgreSQL provider for relational data operations. +pub struct PostgresProvider { + inner: PyProvider, + input: PyDataInput, + output: PyDataOutput, +} + +impl PostgresProvider { + /// Disconnects from the database. + pub async fn disconnect(self) -> Result<()> { + self.inner.disconnect().await + } +} + +#[async_trait::async_trait] +impl Provider for PostgresProvider { + type Credentials = PostgresCredentials; + type Params = PostgresParams; + + async fn connect( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let loader = PyProviderLoader::new().map_err(crate::Error::from)?; + let creds_json = serde_json::to_value(&credentials).map_err(crate::Error::from)?; + let params_json = serde_json::to_value(¶ms).map_err(crate::Error::from)?; + + let inner = loader + .load("postgres", creds_json, params_json) + .await + .map_err(crate::Error::from)?; + let input = PyDataInput::new(PyProvider::new(inner.clone_py_object())); + let output = PyDataOutput::new(PyProvider::new(inner.clone_py_object())); + + Ok(Self { + inner, + input, + output, + }) + } +} + +#[async_trait::async_trait] +impl DataInput for PostgresProvider { + type Item = Record; + type Context = RelationalContext; + + async fn read(&self, ctx: &Self::Context) -> Result> { + self.input.read(ctx).await + } +} + +#[async_trait::async_trait] +impl DataOutput for PostgresProvider { + type Item = Record; + + async fn write(&self, items: Vec) -> Result<()> { + self.output.write(items).await + } +} + +impl std::fmt::Debug for PostgresProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PostgresProvider").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/provider/postgres/config.rs b/crates/nvisy-dal/src/provider/postgres/config.rs deleted file mode 100644 index 33b12aa..0000000 --- a/crates/nvisy-dal/src/provider/postgres/config.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! PostgreSQL configuration types. - -use serde::{Deserialize, Serialize}; - -/// PostgreSQL credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PostgresCredentials { - /// Connection string (e.g., "postgresql://user:pass@host:5432/db"). - pub connection_string: String, -} - -/// PostgreSQL parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct PostgresParams { - /// Table name. - pub table: String, - /// Schema name. - #[serde(skip_serializing_if = "Option::is_none")] - pub schema: Option, -} diff --git a/crates/nvisy-dal/src/provider/postgres/input.rs b/crates/nvisy-dal/src/provider/postgres/input.rs deleted file mode 100644 index a7f549b..0000000 --- a/crates/nvisy-dal/src/provider/postgres/input.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! PostgreSQL DataInput implementation. - -use std::collections::HashMap; - -use async_trait::async_trait; -use futures::StreamExt; - -use super::PostgresProvider; -use crate::core::{DataInput, InputStream, RelationalContext}; -use crate::datatype::Record; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataInput for PostgresProvider { - type Context = RelationalContext; - type Item = Record; - - async fn read(&self, ctx: &RelationalContext) -> Result> { - let prefix = ctx.table.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let key = entry.path().to_string(); - match op.read(&key).await { - Ok(data) => { - let value: serde_json::Value = - serde_json::from_slice(&data.to_bytes()) - .unwrap_or(serde_json::json!({})); - - let columns: HashMap = - if let serde_json::Value::Object(map) = value { - map.into_iter().collect() - } else { - let mut cols = HashMap::new(); - cols.insert("_key".to_string(), serde_json::json!(key)); - cols.insert("_value".to_string(), value); - cols - }; - - Some(Ok(Record::from_columns(columns))) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} diff --git a/crates/nvisy-dal/src/provider/postgres/mod.rs b/crates/nvisy-dal/src/provider/postgres/mod.rs deleted file mode 100644 index afc319c..0000000 --- a/crates/nvisy-dal/src/provider/postgres/mod.rs +++ /dev/null @@ -1,48 +0,0 @@ -//! PostgreSQL provider. - -mod config; -mod input; -mod output; - -pub use config::{PostgresCredentials, PostgresParams}; -use opendal::{Operator, services}; - -use crate::core::Provider; -use crate::error::Error; - -/// PostgreSQL provider for relational data. -#[derive(Clone)] -pub struct PostgresProvider { - operator: Operator, -} - -#[async_trait::async_trait] -impl Provider for PostgresProvider { - type Credentials = PostgresCredentials; - type Params = PostgresParams; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let mut builder = services::Postgresql::default() - .connection_string(&credentials.connection_string) - .table(¶ms.table); - - if let Some(ref schema) = params.schema { - builder = builder.root(schema); - } - - let operator = Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| Error::connection(e.to_string()))?; - - Ok(Self { operator }) - } -} - -impl std::fmt::Debug for PostgresProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PostgresProvider").finish() - } -} diff --git a/crates/nvisy-dal/src/provider/postgres/output.rs b/crates/nvisy-dal/src/provider/postgres/output.rs deleted file mode 100644 index 7382302..0000000 --- a/crates/nvisy-dal/src/provider/postgres/output.rs +++ /dev/null @@ -1,32 +0,0 @@ -//! PostgreSQL DataOutput implementation. - -use async_trait::async_trait; - -use super::PostgresProvider; -use crate::core::DataOutput; -use crate::datatype::Record; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for PostgresProvider { - type Item = Record; - - async fn write(&self, items: Vec) -> Result<()> { - for record in items { - let key = record - .get("_key") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()) - .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()); - - let value = - serde_json::to_vec(&record.columns).map_err(|e| Error::provider(e.to_string()))?; - - self.operator - .write(&key, value) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/provider/qdrant/config.rs b/crates/nvisy-dal/src/provider/qdrant/config.rs deleted file mode 100644 index 176d5fb..0000000 --- a/crates/nvisy-dal/src/provider/qdrant/config.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! Qdrant configuration types. - -use serde::{Deserialize, Serialize}; - -/// Qdrant credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct QdrantCredentials { - /// Qdrant server URL (e.g., "http://localhost:6334"). - pub url: String, - /// API key for authentication. - #[serde(skip_serializing_if = "Option::is_none")] - pub api_key: Option, -} - -/// Qdrant parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct QdrantParams { - /// Collection name. - pub collection: String, - /// Vector dimensions. - #[serde(skip_serializing_if = "Option::is_none")] - pub dimensions: Option, -} diff --git a/crates/nvisy-dal/src/provider/qdrant/mod.rs b/crates/nvisy-dal/src/provider/qdrant/mod.rs deleted file mode 100644 index 7e8405f..0000000 --- a/crates/nvisy-dal/src/provider/qdrant/mod.rs +++ /dev/null @@ -1,257 +0,0 @@ -//! Qdrant vector store provider. - -mod config; -mod output; - -use std::collections::HashMap; - -pub use config::{QdrantCredentials, QdrantParams}; -use qdrant_client::Qdrant; -use qdrant_client::qdrant::vectors_config::Config as VectorsConfig; -use qdrant_client::qdrant::with_payload_selector::SelectorOptions; -use qdrant_client::qdrant::with_vectors_selector::SelectorOptions as VectorsSelectorOptions; -use qdrant_client::qdrant::{ - Condition, CreateCollectionBuilder, Distance, Filter, PointId, SearchPointsBuilder, - VectorParamsBuilder, -}; - -use crate::core::Provider; -use crate::error::{Error, Result}; - -/// Qdrant provider for vector storage. -pub struct QdrantProvider { - client: Qdrant, - params: QdrantParams, -} - -#[async_trait::async_trait] -impl Provider for QdrantProvider { - type Credentials = QdrantCredentials; - type Params = QdrantParams; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let client = Qdrant::from_url(&credentials.url) - .api_key(credentials.api_key) - .build() - .map_err(|e| Error::connection(e.to_string()))?; - - Ok(Self { client, params }) - } -} - -impl QdrantProvider { - /// Ensures a collection exists, creating it if necessary. - pub(crate) async fn ensure_collection(&self, name: &str, dimensions: usize) -> Result<()> { - let exists = self - .client - .collection_exists(name) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - if !exists { - let vectors_config = VectorsConfig::Params( - VectorParamsBuilder::new(dimensions as u64, Distance::Cosine).build(), - ); - - self.client - .create_collection( - CreateCollectionBuilder::new(name).vectors_config(vectors_config), - ) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - - Ok(()) - } - - /// Returns the configured collection name. - pub fn collection(&self) -> &str { - &self.params.collection - } - - /// Searches for similar vectors. - pub async fn search( - &self, - collection: &str, - query: Vec, - limit: usize, - include_vectors: bool, - include_metadata: bool, - filter: Option<&serde_json::Value>, - ) -> Result> { - let mut search = SearchPointsBuilder::new(collection, query, limit as u64); - - if include_vectors { - search = search.with_vectors(VectorsSelectorOptions::Enable(true)); - } - - if include_metadata { - search = search.with_payload(SelectorOptions::Enable(true)); - } - - if let Some(filter_json) = filter - && let Some(conditions) = parse_filter(filter_json) - { - search = search.filter(Filter::must(conditions)); - } - - let response = self - .client - .search_points(search) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let results = response - .result - .into_iter() - .map(|point| { - let id = extract_point_id(point.id).unwrap_or_default(); - let vector = extract_vector(point.vectors); - - let metadata: HashMap = point - .payload - .into_iter() - .map(|(k, v)| (k, qdrant_value_to_json(v))) - .collect(); - - SearchResult { - id, - score: point.score, - vector, - metadata, - } - }) - .collect(); - - Ok(results) - } -} - -/// Result from a vector similarity search. -#[derive(Debug, Clone)] -pub struct SearchResult { - /// The ID of the matched vector. - pub id: String, - /// Similarity score. - pub score: f32, - /// The vector data, if requested. - pub vector: Option>, - /// Metadata associated with this vector. - pub metadata: HashMap, -} - -impl std::fmt::Debug for QdrantProvider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("QdrantProvider").finish() - } -} - -fn extract_vector(vectors: Option) -> Option> { - use qdrant_client::qdrant::vectors_output::VectorsOptions; - - vectors.and_then(|v| match v.vectors_options { - #[allow(deprecated)] - Some(VectorsOptions::Vector(vec)) => Some(vec.data), - _ => None, - }) -} - -fn extract_point_id(id: Option) -> Option { - use qdrant_client::qdrant::point_id::PointIdOptions; - - match id { - Some(PointId { - point_id_options: Some(id), - }) => match id { - PointIdOptions::Num(n) => Some(n.to_string()), - PointIdOptions::Uuid(s) => Some(s), - }, - _ => None, - } -} - -pub(crate) fn json_to_qdrant_value(value: serde_json::Value) -> qdrant_client::qdrant::Value { - use qdrant_client::qdrant::value::Kind; - - let kind = match value { - serde_json::Value::Null => Kind::NullValue(0), - serde_json::Value::Bool(b) => Kind::BoolValue(b), - serde_json::Value::Number(n) => { - if let Some(i) = n.as_i64() { - Kind::IntegerValue(i) - } else if let Some(f) = n.as_f64() { - Kind::DoubleValue(f) - } else { - Kind::StringValue(n.to_string()) - } - } - serde_json::Value::String(s) => Kind::StringValue(s), - serde_json::Value::Array(arr) => { - let values: Vec = - arr.into_iter().map(json_to_qdrant_value).collect(); - Kind::ListValue(qdrant_client::qdrant::ListValue { values }) - } - serde_json::Value::Object(obj) => { - let fields: HashMap = obj - .into_iter() - .map(|(k, v)| (k, json_to_qdrant_value(v))) - .collect(); - Kind::StructValue(qdrant_client::qdrant::Struct { fields }) - } - }; - - qdrant_client::qdrant::Value { kind: Some(kind) } -} - -fn qdrant_value_to_json(value: qdrant_client::qdrant::Value) -> serde_json::Value { - use qdrant_client::qdrant::value::Kind; - - match value.kind { - Some(Kind::NullValue(_)) => serde_json::Value::Null, - Some(Kind::BoolValue(b)) => serde_json::Value::Bool(b), - Some(Kind::IntegerValue(i)) => serde_json::json!(i), - Some(Kind::DoubleValue(f)) => serde_json::json!(f), - Some(Kind::StringValue(s)) => serde_json::Value::String(s), - Some(Kind::ListValue(list)) => { - let arr: Vec = - list.values.into_iter().map(qdrant_value_to_json).collect(); - serde_json::Value::Array(arr) - } - Some(Kind::StructValue(obj)) => { - let map: serde_json::Map = obj - .fields - .into_iter() - .map(|(k, v)| (k, qdrant_value_to_json(v))) - .collect(); - serde_json::Value::Object(map) - } - None => serde_json::Value::Null, - } -} - -fn parse_filter(filter: &serde_json::Value) -> Option> { - if let serde_json::Value::Object(obj) = filter { - let conditions: Vec = obj - .iter() - .filter_map(|(key, value)| match value { - serde_json::Value::String(s) => Some(Condition::matches(key.clone(), s.clone())), - serde_json::Value::Number(n) => { - n.as_i64().map(|i| Condition::matches(key.clone(), i)) - } - serde_json::Value::Bool(b) => Some(Condition::matches(key.clone(), *b)), - _ => None, - }) - .collect(); - - if conditions.is_empty() { - None - } else { - Some(conditions) - } - } else { - None - } -} diff --git a/crates/nvisy-dal/src/provider/qdrant/output.rs b/crates/nvisy-dal/src/provider/qdrant/output.rs deleted file mode 100644 index 6319c86..0000000 --- a/crates/nvisy-dal/src/provider/qdrant/output.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! Qdrant DataOutput implementation. - -use std::collections::HashMap; - -use async_trait::async_trait; -use qdrant_client::qdrant::{PointStruct, UpsertPointsBuilder}; - -use super::{QdrantProvider, json_to_qdrant_value}; -use crate::core::DataOutput; -use crate::datatype::Embedding; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for QdrantProvider { - type Item = Embedding; - - async fn write(&self, items: Vec) -> Result<()> { - if items.is_empty() { - return Ok(()); - } - - let collection = self.collection(); - - let dimensions = items - .first() - .map(|v| v.vector.len()) - .ok_or_else(|| Error::invalid_input("No embeddings provided"))?; - - self.ensure_collection(collection, dimensions).await?; - - let points: Vec = items - .into_iter() - .map(|v| { - let payload: HashMap = v - .metadata - .into_iter() - .map(|(k, v)| (k, json_to_qdrant_value(v))) - .collect(); - - PointStruct::new(v.id, v.vector, payload) - }) - .collect(); - - self.client - .upsert_points(UpsertPointsBuilder::new(collection, points)) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/provider/s3.rs b/crates/nvisy-dal/src/provider/s3.rs new file mode 100644 index 0000000..f358f3a --- /dev/null +++ b/crates/nvisy-dal/src/provider/s3.rs @@ -0,0 +1,100 @@ +//! S3 provider. +//! +//! Provides object storage operations for AWS S3 and S3-compatible services. + +use serde::{Deserialize, Serialize}; + +use crate::Result; +use crate::core::{DataInput, DataOutput, InputStream, ObjectContext, Provider}; +use crate::datatype::Object; +use crate::python::{PyDataInput, PyDataOutput, PyProvider, PyProviderLoader}; + +/// Credentials for S3 connection. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct S3Credentials { + /// AWS access key ID. + pub access_key_id: String, + /// AWS secret access key. + pub secret_access_key: String, + /// AWS region. + pub region: String, + /// Custom endpoint URL (for MinIO, LocalStack, etc.). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub endpoint_url: Option, +} + +/// Parameters for S3 operations. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct S3Params { + /// Target bucket name. + pub bucket: String, + /// Key prefix for all operations. + pub prefix: String, +} + +/// S3 provider for object storage operations. +pub struct S3Provider { + inner: PyProvider, + input: PyDataInput, + output: PyDataOutput, +} + +impl S3Provider { + /// Disconnects from S3. + pub async fn disconnect(self) -> Result<()> { + self.inner.disconnect().await + } +} + +#[async_trait::async_trait] +impl Provider for S3Provider { + type Credentials = S3Credentials; + type Params = S3Params; + + async fn connect( + params: Self::Params, + credentials: Self::Credentials, + ) -> nvisy_core::Result { + let loader = PyProviderLoader::new().map_err(crate::Error::from)?; + let creds_json = serde_json::to_value(&credentials).map_err(crate::Error::from)?; + let params_json = serde_json::to_value(¶ms).map_err(crate::Error::from)?; + + let inner = loader + .load("s3", creds_json, params_json) + .await + .map_err(crate::Error::from)?; + let input = PyDataInput::new(PyProvider::new(inner.clone_py_object())); + let output = PyDataOutput::new(PyProvider::new(inner.clone_py_object())); + + Ok(Self { + inner, + input, + output, + }) + } +} + +#[async_trait::async_trait] +impl DataInput for S3Provider { + type Item = Object; + type Context = ObjectContext; + + async fn read(&self, ctx: &Self::Context) -> Result> { + self.input.read(ctx).await + } +} + +#[async_trait::async_trait] +impl DataOutput for S3Provider { + type Item = Object; + + async fn write(&self, items: Vec) -> Result<()> { + self.output.write(items).await + } +} + +impl std::fmt::Debug for S3Provider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("S3Provider").finish_non_exhaustive() + } +} diff --git a/crates/nvisy-dal/src/provider/s3/config.rs b/crates/nvisy-dal/src/provider/s3/config.rs deleted file mode 100644 index 81a8e03..0000000 --- a/crates/nvisy-dal/src/provider/s3/config.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! Amazon S3 configuration types. - -use serde::{Deserialize, Serialize}; - -/// Amazon S3 credentials (sensitive). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct S3Credentials { - /// AWS region. - pub region: String, - /// Access key ID. - pub access_key_id: String, - /// Secret access key. - pub secret_access_key: String, - /// Custom endpoint URL (for S3-compatible storage like MinIO, R2). - #[serde(skip_serializing_if = "Option::is_none")] - pub endpoint: Option, -} - -/// Amazon S3 parameters (non-sensitive). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct S3Params { - /// Bucket name. - pub bucket: String, - /// Path prefix within the bucket. - #[serde(skip_serializing_if = "Option::is_none")] - pub prefix: Option, -} diff --git a/crates/nvisy-dal/src/provider/s3/input.rs b/crates/nvisy-dal/src/provider/s3/input.rs deleted file mode 100644 index dcfb8a1..0000000 --- a/crates/nvisy-dal/src/provider/s3/input.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! S3 DataInput implementation. - -use async_trait::async_trait; -use futures::StreamExt; - -use super::S3Provider; -use crate::core::{DataInput, InputStream, ObjectContext}; -use crate::datatype::Blob; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataInput for S3Provider { - type Context = ObjectContext; - type Item = Blob; - - async fn read(&self, ctx: &ObjectContext) -> Result> { - let prefix = ctx.prefix.as_deref().unwrap_or(""); - let limit = ctx.limit.unwrap_or(usize::MAX); - - let lister = self - .operator - .lister(prefix) - .await - .map_err(|e| Error::provider(e.to_string()))?; - - let operator = self.operator.clone(); - - let stream = lister.take(limit).filter_map(move |entry_result| { - let op = operator.clone(); - async move { - match entry_result { - Ok(entry) => { - let path = entry.path().to_string(); - if path.ends_with('/') { - return None; - } - - match op.read(&path).await { - Ok(data) => { - let mut blob = Blob::new(path.clone(), data.to_bytes()); - if let Ok(meta) = op.stat(&path).await - && let Some(ct) = meta.content_type() - { - blob = blob.with_content_type(ct); - } - Some(Ok(blob)) - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - Err(e) => Some(Err(Error::provider(e.to_string()))), - } - } - }); - - Ok(InputStream::new(Box::pin(stream))) - } -} diff --git a/crates/nvisy-dal/src/provider/s3/mod.rs b/crates/nvisy-dal/src/provider/s3/mod.rs deleted file mode 100644 index 0ac9eed..0000000 --- a/crates/nvisy-dal/src/provider/s3/mod.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Amazon S3 provider. - -mod config; -mod input; -mod output; - -pub use config::{S3Credentials, S3Params}; -use opendal::{Operator, services}; - -use crate::core::Provider; -use crate::error::Error; - -/// Amazon S3 provider for blob storage. -#[derive(Clone)] -pub struct S3Provider { - operator: Operator, -} - -#[async_trait::async_trait] -impl Provider for S3Provider { - type Credentials = S3Credentials; - type Params = S3Params; - - async fn connect( - params: Self::Params, - credentials: Self::Credentials, - ) -> nvisy_core::Result { - let mut builder = services::S3::default() - .bucket(¶ms.bucket) - .region(&credentials.region) - .access_key_id(&credentials.access_key_id) - .secret_access_key(&credentials.secret_access_key); - - if let Some(ref endpoint) = credentials.endpoint { - builder = builder.endpoint(endpoint); - } - - if let Some(ref prefix) = params.prefix { - builder = builder.root(prefix); - } - - let operator = Operator::new(builder) - .map(|op| op.finish()) - .map_err(|e| Error::connection(e.to_string()))?; - - Ok(Self { operator }) - } -} - -impl std::fmt::Debug for S3Provider { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("S3Provider").finish() - } -} diff --git a/crates/nvisy-dal/src/provider/s3/output.rs b/crates/nvisy-dal/src/provider/s3/output.rs deleted file mode 100644 index c89988d..0000000 --- a/crates/nvisy-dal/src/provider/s3/output.rs +++ /dev/null @@ -1,23 +0,0 @@ -//! S3 DataOutput implementation. - -use async_trait::async_trait; - -use super::S3Provider; -use crate::core::DataOutput; -use crate::datatype::Blob; -use crate::error::{Error, Result}; - -#[async_trait] -impl DataOutput for S3Provider { - type Item = Blob; - - async fn write(&self, items: Vec) -> Result<()> { - for blob in items { - self.operator - .write(&blob.path, blob.data) - .await - .map_err(|e| Error::provider(e.to_string()))?; - } - Ok(()) - } -} diff --git a/crates/nvisy-dal/src/python/error.rs b/crates/nvisy-dal/src/python/error.rs new file mode 100644 index 0000000..5730cd9 --- /dev/null +++ b/crates/nvisy-dal/src/python/error.rs @@ -0,0 +1,92 @@ +//! Error types for Python interop. + +use pyo3::PyErr; +use thiserror::Error; + +use crate::error::{Error, ErrorKind}; + +/// Result type for Python interop operations. +pub type PyResult = std::result::Result; + +/// Error type for Python interop operations. +#[derive(Debug, Error)] +#[error("{message}")] +pub struct PyError { + kind: PyErrorKind, + message: String, + #[source] + source: Option, +} + +#[derive(Debug, Clone, Copy)] +pub enum PyErrorKind { + /// Failed to initialize Python interpreter. + InitializationFailed, + /// Failed to import the nvisy_dal module. + ModuleNotFound, + /// Provider not found in the Python package. + ProviderNotFound, + /// Failed to call a Python method. + CallFailed, + /// Type conversion error between Rust and Python. + ConversionError, +} + +impl PyError { + pub fn new(kind: PyErrorKind, message: impl Into) -> Self { + Self { + kind, + message: message.into(), + source: None, + } + } + + pub fn with_source(mut self, source: PyErr) -> Self { + self.source = Some(source); + self + } + + pub fn initialization(message: impl Into) -> Self { + Self::new(PyErrorKind::InitializationFailed, message) + } + + pub fn module_not_found(message: impl Into) -> Self { + Self::new(PyErrorKind::ModuleNotFound, message) + } + + pub fn provider_not_found(name: &str) -> Self { + Self::new( + PyErrorKind::ProviderNotFound, + format!("Provider '{}' not found in nvisy_dal", name), + ) + } + + pub fn call_failed(message: impl Into) -> Self { + Self::new(PyErrorKind::CallFailed, message) + } + + pub fn conversion(message: impl Into) -> Self { + Self::new(PyErrorKind::ConversionError, message) + } +} + +impl From for PyError { + fn from(err: PyErr) -> Self { + Self::new(PyErrorKind::CallFailed, err.to_string()).with_source(err) + } +} + +impl From for Error { + fn from(err: PyError) -> Self { + let kind = match err.kind { + PyErrorKind::InitializationFailed | PyErrorKind::ModuleNotFound => { + ErrorKind::Connection + } + PyErrorKind::ProviderNotFound => ErrorKind::NotFound, + PyErrorKind::ConversionError => ErrorKind::InvalidInput, + PyErrorKind::CallFailed => ErrorKind::Provider, + }; + + Error::new(kind, err.message) + } +} diff --git a/crates/nvisy-dal/src/python/loader.rs b/crates/nvisy-dal/src/python/loader.rs new file mode 100644 index 0000000..fa7e62a --- /dev/null +++ b/crates/nvisy-dal/src/python/loader.rs @@ -0,0 +1,235 @@ +//! Python package loader for nvisy_dal providers. + +use std::sync::OnceLock; + +use pyo3::prelude::*; +use pyo3::types::{PyDict, PyList, PyModule}; + +use super::error::{PyError, PyResult}; +use super::provider::PyProvider; + +/// Global reference to the nvisy_dal Python module. +static NVISY_DAL_MODULE: OnceLock> = OnceLock::new(); + +/// Loader for Python-based data providers. +/// +/// Handles initialization of the Python interpreter and loading +/// of provider classes from the `nvisy_dal` package. +#[derive(Debug)] +pub struct PyProviderLoader { + _private: (), +} + +impl PyProviderLoader { + /// Creates a new provider loader. + /// + /// Initializes the Python interpreter if not already done. + pub fn new() -> PyResult { + // Ensure Python is initialized (pyo3 auto-initialize feature handles this) + Self::ensure_module_loaded()?; + Ok(Self { _private: () }) + } + + /// Ensures the nvisy_dal module is loaded and cached. + fn ensure_module_loaded() -> PyResult<()> { + if NVISY_DAL_MODULE.get().is_some() { + return Ok(()); + } + + Python::attach(|py| { + let module = py.import("nvisy_dal").map_err(|e| { + PyError::module_not_found("Failed to import nvisy_dal").with_source(e) + })?; + + // Store a reference to the module + let _ = NVISY_DAL_MODULE.set(module.unbind()); + Ok(()) + }) + } + + /// Loads a provider by name and connects with the given credentials. + /// + /// # Arguments + /// + /// * `name` - Provider name (e.g., "qdrant", "pinecone", "s3") + /// * `credentials` - JSON-serializable credentials + /// * `params` - JSON-serializable connection parameters + pub async fn load( + &self, + name: &str, + credentials: serde_json::Value, + params: serde_json::Value, + ) -> PyResult { + let name = name.to_owned(); + + // Get the provider class and prepare arguments + let (provider_class, creds_dict, params_dict) = Python::attach(|py| { + let module = self.get_module(py)?; + + // Import the specific provider module + let providers_mod = module + .getattr("providers") + .map_err(|e| PyError::module_not_found("providers").with_source(e))?; + let provider_mod = providers_mod + .getattr(name.as_str()) + .map_err(|e| PyError::provider_not_found(&name).with_source(e))?; + + // Get the Provider class + let provider_class = provider_mod + .getattr("Provider") + .map_err(|e| PyError::provider_not_found(&name).with_source(e))?; + + // Convert credentials and params to Python dicts + let creds_dict = json_to_pydict(py, &credentials)?; + let params_dict = json_to_pydict(py, ¶ms)?; + + Ok::<_, PyError>(( + provider_class.unbind(), + creds_dict.unbind(), + params_dict.unbind(), + )) + })?; + + // Call the async connect method + let coro = Python::attach(|py| { + let provider_class = provider_class.bind(py); + let creds = creds_dict.bind(py); + let params = params_dict.bind(py); + + let coro = provider_class.call_method1("connect", (creds, params))?; + pyo3_async_runtimes::tokio::into_future(coro) + })?; + + let instance = coro.await.map_err(PyError::from)?; + + Ok(PyProvider::new(instance)) + } + + fn get_module<'py>(&self, py: Python<'py>) -> PyResult> { + NVISY_DAL_MODULE + .get() + .map(|m| m.bind(py).clone()) + .ok_or_else(|| PyError::module_not_found("nvisy_dal module not loaded")) + } +} + +impl Default for PyProviderLoader { + fn default() -> Self { + Self::new().expect("Failed to initialize PyProviderLoader") + } +} + +/// Converts a serde_json::Value to a Python dict. +pub(super) fn json_to_pydict<'py>( + py: Python<'py>, + value: &serde_json::Value, +) -> PyResult> { + let dict = PyDict::new(py); + + if let serde_json::Value::Object(map) = value { + for (key, val) in map { + let py_val = json_to_pyobject(py, val)?; + dict.set_item(key, py_val) + .map_err(|e| PyError::conversion("Failed to set dict item").with_source(e))?; + } + } + + Ok(dict) +} + +/// Converts a serde_json::Value to a Python object. +pub(super) fn json_to_pyobject<'py>( + py: Python<'py>, + value: &serde_json::Value, +) -> PyResult> { + let obj: Bound<'py, PyAny> = match value { + serde_json::Value::Null => py.None().into_bound(py), + serde_json::Value::Bool(b) => (*b) + .into_pyobject(py) + .expect("infallible") + .to_owned() + .into_any(), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + i.into_pyobject(py) + .expect("infallible") + .to_owned() + .into_any() + } else if let Some(f) = n.as_f64() { + f.into_pyobject(py) + .expect("infallible") + .to_owned() + .into_any() + } else { + return Err(PyError::conversion("Unsupported number type")); + } + } + serde_json::Value::String(s) => { + s.as_str().into_pyobject(py).expect("infallible").into_any() + } + serde_json::Value::Array(arr) => { + let list = PyList::empty(py); + for item in arr { + let py_item = json_to_pyobject(py, item)?; + list.append(py_item) + .map_err(|e| PyError::conversion("Failed to append to list").with_source(e))?; + } + list.into_any() + } + serde_json::Value::Object(_) => json_to_pydict(py, value)?.into_any(), + }; + + Ok(obj) +} + +/// Converts a Python object to a serde_json::Value. +pub(super) fn pyobject_to_json(obj: &Bound<'_, PyAny>) -> PyResult { + if obj.is_none() { + return Ok(serde_json::Value::Null); + } + + if let Ok(b) = obj.extract::() { + return Ok(serde_json::Value::Bool(b)); + } + + if let Ok(i) = obj.extract::() { + return Ok(serde_json::json!(i)); + } + + if let Ok(f) = obj.extract::() { + return Ok(serde_json::json!(f)); + } + + if let Ok(s) = obj.extract::() { + return Ok(serde_json::Value::String(s)); + } + + if let Ok(list) = obj.cast::() { + let mut arr = Vec::new(); + for item in list.iter() { + arr.push(pyobject_to_json(item.as_any())?); + } + return Ok(serde_json::Value::Array(arr)); + } + + if let Ok(dict) = obj.cast::() { + let mut map = serde_json::Map::new(); + for (key, value) in dict.iter() { + let key_str: String = key + .extract() + .map_err(|e| PyError::conversion("Dict key must be string").with_source(e))?; + map.insert(key_str, pyobject_to_json(&value)?); + } + return Ok(serde_json::Value::Object(map)); + } + + let type_name = obj + .get_type() + .name() + .map(|s| s.to_string()) + .unwrap_or_else(|_| "unknown".to_string()); + Err(PyError::conversion(format!( + "Unsupported Python type: {}", + type_name + ))) +} diff --git a/crates/nvisy-dal/src/python/mod.rs b/crates/nvisy-dal/src/python/mod.rs new file mode 100644 index 0000000..3a84a99 --- /dev/null +++ b/crates/nvisy-dal/src/python/mod.rs @@ -0,0 +1,11 @@ +//! Python interop for data providers. +//! +//! This module provides integration with the `nvisy_dal` Python package, +//! allowing Rust code to load and use Python-based providers. + +mod error; +mod loader; +mod provider; + +pub(crate) use loader::PyProviderLoader; +pub(crate) use provider::{PyDataInput, PyDataOutput, PyProvider}; diff --git a/crates/nvisy-dal/src/python/provider.rs b/crates/nvisy-dal/src/python/provider.rs new file mode 100644 index 0000000..4e2f06b --- /dev/null +++ b/crates/nvisy-dal/src/python/provider.rs @@ -0,0 +1,192 @@ +//! Python provider wrapper implementing Rust traits. + +use std::marker::PhantomData; + +use async_stream::try_stream; +use futures::Stream; +use pyo3::prelude::*; + +use super::error::PyError; +use super::loader::pyobject_to_json; +use crate::Result; +use crate::core::{DataInput, DataOutput, InputStream}; + +/// A wrapper around a Python provider instance. +/// +/// Implements the Rust `DataInput` and `DataOutput` traits by delegating +/// to the underlying Python provider's `read` and `write` methods. +pub struct PyProvider { + instance: Py, +} + +impl PyProvider { + /// Creates a new PyProvider from a connected Python provider instance. + pub fn new(instance: Py) -> Self { + Self { instance } + } + + /// Clones the underlying Python object reference. + pub fn clone_py_object(&self) -> Py { + Python::attach(|py| self.instance.clone_ref(py)) + } + + /// Disconnects the provider. + pub async fn disconnect(&self) -> Result<()> { + let coro = Python::attach(|py| { + let coro = self + .instance + .bind(py) + .call_method0("disconnect") + .map_err(|e| PyError::call_failed(format!("Failed to call disconnect: {}", e)))?; + pyo3_async_runtimes::tokio::into_future(coro) + .map_err(|e| PyError::call_failed(format!("Failed to convert to future: {}", e))) + })?; + + coro.await + .map_err(|e| PyError::call_failed(format!("Failed to disconnect: {}", e)))?; + + Ok(()) + } +} + +impl std::fmt::Debug for PyProvider { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PyProvider").finish_non_exhaustive() + } +} + +/// Typed wrapper for Python providers implementing DataInput. +pub struct PyDataInput { + provider: PyProvider, + _marker: PhantomData<(T, Ctx)>, +} + +impl PyDataInput { + /// Creates a new typed input wrapper. + pub fn new(provider: PyProvider) -> Self { + Self { + provider, + _marker: PhantomData, + } + } +} + +#[async_trait::async_trait] +impl DataInput for PyDataInput +where + T: for<'de> serde::Deserialize<'de> + Send + Sync + 'static, + Ctx: serde::Serialize + Send + Sync, +{ + type Item = T; + type Context = Ctx; + + async fn read(&self, ctx: &Self::Context) -> Result> { + let ctx_json = serde_json::to_value(ctx) + .map_err(|e| PyError::conversion(format!("Failed to serialize context: {}", e)))?; + + // Call Python read method which returns an async iterator + let coro = Python::attach(|py| { + let bound = self.provider.instance.bind(py); + let ctx_dict = super::loader::json_to_pydict(py, &ctx_json)?; + let coro = bound + .call_method1("read", (ctx_dict,)) + .map_err(|e| PyError::call_failed(format!("Failed to call read: {}", e)))?; + pyo3_async_runtimes::tokio::into_future(coro) + .map_err(|e| PyError::call_failed(format!("Failed to convert to future: {}", e))) + })?; + + let py_iterator = coro + .await + .map_err(|e| PyError::call_failed(format!("Failed to call read: {}", e)))?; + + // Create a stream that pulls from the Python async iterator + let stream = py_async_iterator_to_stream::(py_iterator); + Ok(InputStream::new(Box::pin(stream))) + } +} + +/// Typed wrapper for Python providers implementing DataOutput. +pub struct PyDataOutput { + provider: PyProvider, + _marker: PhantomData, +} + +impl PyDataOutput { + /// Creates a new typed output wrapper. + pub fn new(provider: PyProvider) -> Self { + Self { + provider, + _marker: PhantomData, + } + } +} + +#[async_trait::async_trait] +impl DataOutput for PyDataOutput +where + T: serde::Serialize + Send + Sync, +{ + type Item = T; + + async fn write(&self, items: Vec) -> Result<()> { + let items_json = serde_json::to_value(&items) + .map_err(|e| PyError::conversion(format!("Failed to serialize items: {}", e)))?; + + let coro = Python::attach(|py| { + let bound = self.provider.instance.bind(py); + let items_list = super::loader::json_to_pyobject(py, &items_json)?; + let coro = bound + .call_method1("write", (items_list,)) + .map_err(|e| PyError::call_failed(format!("Failed to call write: {}", e)))?; + pyo3_async_runtimes::tokio::into_future(coro) + .map_err(|e| PyError::call_failed(format!("Failed to convert to future: {}", e))) + })?; + + coro.await + .map_err(|e| PyError::call_failed(format!("Failed to call write: {}", e)))?; + + Ok(()) + } +} + +/// Converts a Python async iterator to a Rust Stream. +fn py_async_iterator_to_stream(iterator: Py) -> impl Stream> +where + T: for<'de> serde::Deserialize<'de> + Send + 'static, +{ + try_stream! { + loop { + // Get the next coroutine from __anext__ + let next_coro = Python::attach(|py| { + let bound = iterator.bind(py); + match bound.call_method0("__anext__") { + Ok(coro) => { + let future = pyo3_async_runtimes::tokio::into_future(coro)?; + Ok(Some(future)) + } + Err(e) => { + if e.is_instance_of::(py) { + Ok(None) + } else { + Err(PyError::from(e)) + } + } + } + })?; + + let Some(coro) = next_coro else { + break; + }; + + // Await the coroutine + let result = coro.await.map_err(PyError::from)?; + + // Convert result to Rust type + let json_value = Python::attach(|py| pyobject_to_json(result.bind(py)))?; + let item: T = serde_json::from_value(json_value) + .map_err(|e| PyError::conversion(format!("Failed to deserialize item: {}", e)))?; + + yield item; + } + } +} diff --git a/crates/nvisy-rig/src/provider/splitting/metadata.rs b/crates/nvisy-rig/src/provider/splitting/metadata.rs index b3c9411..e3d4c40 100644 --- a/crates/nvisy-rig/src/provider/splitting/metadata.rs +++ b/crates/nvisy-rig/src/provider/splitting/metadata.rs @@ -4,39 +4,89 @@ use std::num::NonZeroU32; use serde::{Deserialize, Serialize}; -/// Metadata about a split chunk's location in the source text. +/// Metadata about a chunk's location in the source document. +/// +/// This is the unified chunk metadata type used throughout the system: +/// - Created during text splitting with offset information +/// - Stored in the database with the chunk +/// - Retrieved during search operations #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct ChunkMetadata { /// Chunk index within the source (0-based). pub index: u32, + /// Start byte offset in the source text. pub start_offset: u32, + /// End byte offset in the source text. pub end_offset: u32, + /// Page number (1-indexed, if applicable). #[serde(default, skip_serializing_if = "Option::is_none")] pub page: Option, + + /// Section or heading the chunk belongs to. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub section: Option, } impl ChunkMetadata { - /// Creates metadata with offset information. + /// Creates metadata with index and offset information. pub fn new(index: u32, start_offset: u32, end_offset: u32) -> Self { Self { index, start_offset, end_offset, page: None, + section: None, } } + /// Creates metadata from JSON (used when loading from database). + /// + /// The `index` parameter overrides any index value in the JSON. + pub fn from_json(json: &serde_json::Value, index: u32) -> Self { + let mut metadata: Self = serde_json::from_value(json.clone()).unwrap_or_default(); + metadata.index = index; + metadata + } + /// Sets the page number. pub fn with_page(mut self, page: NonZeroU32) -> Self { self.page = Some(page); self } + /// Sets the section name. + pub fn with_section(mut self, section: impl Into) -> Self { + self.section = Some(section.into()); + self + } + /// Returns the byte length of the chunk. pub fn byte_len(&self) -> u32 { - self.end_offset - self.start_offset + self.end_offset.saturating_sub(self.start_offset) + } + + /// Returns the byte range for content extraction. + pub fn byte_range(&self) -> std::ops::Range { + self.start_offset as usize..self.end_offset as usize + } + + /// Returns a location string for display (e.g., "page 5, 'Introduction', chunk 3"). + pub fn location_string(&self) -> String { + let mut parts = Vec::new(); + + if let Some(page) = self.page { + parts.push(format!("page {page}")); + } + + if let Some(section) = &self.section { + parts.push(format!("'{section}'")); + } + + parts.push(format!("chunk {}", self.index + 1)); + + parts.join(", ") } } diff --git a/crates/nvisy-rig/src/rag/indexer/indexed.rs b/crates/nvisy-rig/src/rag/indexer/indexed.rs index 2245ea7..244e823 100644 --- a/crates/nvisy-rig/src/rag/indexer/indexed.rs +++ b/crates/nvisy-rig/src/rag/indexer/indexed.rs @@ -8,21 +8,21 @@ use uuid::Uuid; pub struct IndexedChunk { /// Database ID of the created chunk. pub id: Uuid, - /// Index of the chunk within the file. - pub chunk_index: i32, + /// Index of the chunk within the file (0-based). + pub index: u32, /// Size of the chunk content in bytes. - pub content_size: i32, + pub content_size: u32, /// Number of tokens in the chunk. - pub token_count: i32, + pub token_count: u32, } impl From for IndexedChunk { fn from(chunk: FileChunk) -> Self { Self { id: chunk.id, - chunk_index: chunk.chunk_index, - content_size: chunk.content_size, - token_count: chunk.token_count, + index: chunk.chunk_index as u32, + content_size: chunk.content_size as u32, + token_count: chunk.token_count as u32, } } } diff --git a/crates/nvisy-rig/src/rag/searcher/mod.rs b/crates/nvisy-rig/src/rag/searcher/mod.rs index 1f95b01..ce280dc 100644 --- a/crates/nvisy-rig/src/rag/searcher/mod.rs +++ b/crates/nvisy-rig/src/rag/searcher/mod.rs @@ -103,7 +103,7 @@ impl Searcher { .into_iter() .map(|scored| { let chunk = scored.chunk; - let metadata = ChunkMetadata::from_json(&chunk.metadata, chunk.chunk_index); + let metadata = ChunkMetadata::from_json(&chunk.metadata, chunk.chunk_index as u32); RetrievedChunk::new(chunk.id, chunk.file_id, scored.score, metadata) }) .collect(); diff --git a/crates/nvisy-rig/src/rag/searcher/retrieved.rs b/crates/nvisy-rig/src/rag/searcher/retrieved.rs index 423542c..bb40fd4 100644 --- a/crates/nvisy-rig/src/rag/searcher/retrieved.rs +++ b/crates/nvisy-rig/src/rag/searcher/retrieved.rs @@ -3,103 +3,8 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; -/// Metadata about a chunk's location in the source document. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct ChunkMetadata { - /// Page number (1-indexed, if applicable). - #[serde(skip_serializing_if = "Option::is_none")] - pub page: Option, - - /// Section or heading the chunk belongs to. - #[serde(skip_serializing_if = "Option::is_none")] - pub section: Option, - - /// Start byte offset in the source file. - pub start_offset: u32, - - /// End byte offset in the source file. - pub end_offset: u32, - - /// Chunk index within the file (0-based). - pub chunk_index: u32, -} - -impl ChunkMetadata { - /// Creates metadata with offset information. - pub fn new(chunk_index: u32, start_offset: u32, end_offset: u32) -> Self { - Self { - page: None, - section: None, - start_offset, - end_offset, - chunk_index, - } - } - - /// Creates metadata from JSON and chunk index. - pub fn from_json(json: &serde_json::Value, chunk_index: i32) -> Self { - let start_offset = json - .get("start_offset") - .and_then(|v| v.as_u64()) - .unwrap_or(0) as u32; - - let end_offset = json.get("end_offset").and_then(|v| v.as_u64()).unwrap_or(0) as u32; - - let page = json.get("page").and_then(|v| v.as_u64()).map(|p| p as u32); - - let section = json - .get("section") - .and_then(|v| v.as_str()) - .map(String::from); - - Self { - page, - section, - start_offset, - end_offset, - chunk_index: chunk_index as u32, - } - } - - /// Sets the page number. - pub fn with_page(mut self, page: u32) -> Self { - self.page = Some(page); - self - } - - /// Sets the section name. - pub fn with_section(mut self, section: impl Into) -> Self { - self.section = Some(section.into()); - self - } - - /// Returns the byte range for content extraction. - pub fn byte_range(&self) -> std::ops::Range { - self.start_offset as usize..self.end_offset as usize - } - - /// Returns the content length in bytes. - pub fn content_len(&self) -> u32 { - self.end_offset.saturating_sub(self.start_offset) - } - - /// Returns a location string for display. - pub fn location_string(&self) -> String { - let mut parts = Vec::new(); - - if let Some(page) = self.page { - parts.push(format!("page {page}")); - } - - if let Some(section) = &self.section { - parts.push(format!("'{section}'")); - } - - parts.push(format!("chunk {}", self.chunk_index + 1)); - - parts.join(", ") - } -} +// Re-export ChunkMetadata from the canonical location +pub use crate::provider::splitting::ChunkMetadata; /// A retrieved chunk with content and similarity score. #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/crates/nvisy-rig/src/rag/vector_store.rs b/crates/nvisy-rig/src/rag/vector_store.rs index 6ee2d6a..6ab37f1 100644 --- a/crates/nvisy-rig/src/rag/vector_store.rs +++ b/crates/nvisy-rig/src/rag/vector_store.rs @@ -67,13 +67,13 @@ pub struct ChunkDocument { pub text: String, /// The file ID this chunk belongs to. pub file_id: Uuid, - /// The chunk index within the file. - pub chunk_index: u32, + /// The chunk index within the file (0-based). + pub index: u32, /// Start byte offset in the source file. pub start_offset: u32, /// End byte offset in the source file. pub end_offset: u32, - /// Optional page number. + /// Optional page number (1-indexed). #[serde(skip_serializing_if = "Option::is_none")] pub page: Option, } @@ -83,14 +83,14 @@ impl ChunkDocument { pub fn new( text: impl Into, file_id: Uuid, - chunk_index: u32, + index: u32, start_offset: u32, end_offset: u32, ) -> Self { Self { text: text.into(), file_id, - chunk_index, + index, start_offset, end_offset, page: None, @@ -191,7 +191,7 @@ impl InsertDocuments for PgVectorStore { .get("file_id") .and_then(|v| v.as_str()) .and_then(|s| Uuid::parse_str(s).ok())?; - let chunk_index = json.get("chunk_index").and_then(|v| v.as_u64())? as i32; + let index = json.get("index").and_then(|v| v.as_u64())? as u32; let start_offset = json.get("start_offset").and_then(|v| v.as_u64())? as u32; let end_offset = json.get("end_offset").and_then(|v| v.as_u64())? as u32; let page = json.get("page").and_then(|v| v.as_u64()).map(|p| p as u32); @@ -205,7 +205,7 @@ impl InsertDocuments for PgVectorStore { let content_size = content_bytes.len() as i32; let metadata = serde_json::json!({ - "index": chunk_index, + "index": index, "start_offset": start_offset, "end_offset": end_offset, "page": page, @@ -213,7 +213,7 @@ impl InsertDocuments for PgVectorStore { Some(NewFileChunk { file_id, - chunk_index: Some(chunk_index), + chunk_index: Some(index as i32), content_sha256, content_size: Some(content_size), token_count: None, diff --git a/crates/nvisy-runtime/src/definition/input.rs b/crates/nvisy-runtime/src/definition/input.rs index 692b850..b8126fa 100644 --- a/crates/nvisy-runtime/src/definition/input.rs +++ b/crates/nvisy-runtime/src/definition/input.rs @@ -3,32 +3,19 @@ use serde::{Deserialize, Serialize}; use super::route::CacheSlot; -use crate::provider::InputProviderConfig; - -/// Input provider definition for workflow nodes. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct InputProvider { - /// Provider configuration (credentials_id + params). - #[serde(flatten)] - pub provider: InputProviderConfig, -} /// Input node definition - source of data for the workflow. +/// +/// Storage provider inputs (S3, Postgres, etc.) are handled externally via Python. +/// This enum only supports cache slots for internal workflow data flow. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "source", rename_all = "snake_case")] pub enum Input { - /// Read from external provider (S3, Postgres, etc.). - Provider(InputProvider), /// Read from named cache slot (resolved at compile time). CacheSlot(CacheSlot), } impl Input { - /// Creates a new input from a provider configuration. - pub fn from_provider(provider: InputProviderConfig) -> Self { - Self::Provider(InputProvider { provider }) - } - /// Creates a new input from a cache slot. pub fn from_cache(slot: impl Into) -> Self { Self::CacheSlot(CacheSlot { diff --git a/crates/nvisy-runtime/src/definition/mod.rs b/crates/nvisy-runtime/src/definition/mod.rs index 2a148a1..cbc9206 100644 --- a/crates/nvisy-runtime/src/definition/mod.rs +++ b/crates/nvisy-runtime/src/definition/mod.rs @@ -23,10 +23,10 @@ mod transform; mod util; pub use edge::Edge; -pub use input::{Input, InputProvider}; +pub use input::Input; pub use metadata::WorkflowMetadata; pub use node::{Node, NodeId, NodeKind}; -pub use output::{Output, OutputProvider}; +pub use output::Output; pub use route::{ CacheSlot, FileCategory, FileCategoryCondition, LanguageCondition, SwitchCondition, SwitchDef, }; diff --git a/crates/nvisy-runtime/src/definition/output.rs b/crates/nvisy-runtime/src/definition/output.rs index bd33b8c..0e8e346 100644 --- a/crates/nvisy-runtime/src/definition/output.rs +++ b/crates/nvisy-runtime/src/definition/output.rs @@ -3,32 +3,19 @@ use serde::{Deserialize, Serialize}; use super::route::CacheSlot; -use crate::provider::OutputProviderConfig; - -/// Output provider definition for workflow nodes. -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OutputProvider { - /// Provider configuration (credentials_id + params). - #[serde(flatten)] - pub provider: OutputProviderConfig, -} /// Output node definition - destination for workflow data. +/// +/// Storage provider outputs (S3, Qdrant, etc.) are handled externally via Python. +/// This enum only supports cache slots for internal workflow data flow. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "target", rename_all = "snake_case")] pub enum Output { - /// Write to external provider (S3, Qdrant, etc.). - Provider(OutputProvider), /// Write to named cache slot (resolved at compile time). Cache(CacheSlot), } impl Output { - /// Creates a new output from a provider configuration. - pub fn from_provider(provider: OutputProviderConfig) -> Self { - Self::Provider(OutputProvider { provider }) - } - /// Creates a new output from a cache slot. pub fn from_cache(slot: impl Into) -> Self { Self::Cache(CacheSlot { diff --git a/crates/nvisy-runtime/src/definition/transform/derive.rs b/crates/nvisy-runtime/src/definition/transform/derive.rs index 558acd1..6febbe1 100644 --- a/crates/nvisy-runtime/src/definition/transform/derive.rs +++ b/crates/nvisy-runtime/src/definition/transform/derive.rs @@ -1,15 +1,21 @@ //! Derive transform definition. +use nvisy_core::Provider; +use nvisy_rig::provider::{CompletionCredentials, CompletionModel, CompletionProvider}; use serde::{Deserialize, Serialize}; +use uuid::Uuid; -use crate::provider::CompletionProviderParams; +use crate::error::{Error, Result}; /// Derive transform for generating new content from input. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Derive { - /// Completion provider parameters (includes credentials_id and model). + /// Reference to stored credentials. + pub credentials_id: Uuid, + + /// Completion model to use. #[serde(flatten)] - pub provider: CompletionProviderParams, + pub model: CompletionModel, /// The derivation task to perform. pub task: DeriveTask, @@ -19,6 +25,18 @@ pub struct Derive { pub override_prompt: Option, } +impl Derive { + /// Creates a completion provider from these parameters and credentials. + pub async fn into_provider( + self, + credentials: CompletionCredentials, + ) -> Result { + CompletionProvider::connect(self.model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) + } +} + /// Tasks for generating new content from input. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/definition/transform/embedding.rs b/crates/nvisy-runtime/src/definition/transform/embedding.rs index 296a93e..eff4acd 100644 --- a/crates/nvisy-runtime/src/definition/transform/embedding.rs +++ b/crates/nvisy-runtime/src/definition/transform/embedding.rs @@ -1,17 +1,35 @@ //! Embedding transform definition. +use nvisy_core::Provider; +use nvisy_rig::provider::{EmbeddingCredentials, EmbeddingModel, EmbeddingProvider}; use serde::{Deserialize, Serialize}; +use uuid::Uuid; -use crate::provider::EmbeddingProviderParams; +use crate::error::{Error, Result}; /// Embedding transform for generating vector embeddings. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Embedding { - /// Embedding provider parameters (includes credentials_id and model). + /// Reference to stored credentials. + pub credentials_id: Uuid, + + /// Embedding model to use. #[serde(flatten)] - pub provider: EmbeddingProviderParams, + pub model: EmbeddingModel, /// Whether to L2-normalize the output embeddings. #[serde(default)] pub normalize: bool, } + +impl Embedding { + /// Creates an embedding provider from these parameters and credentials. + pub async fn into_provider( + self, + credentials: EmbeddingCredentials, + ) -> Result { + EmbeddingProvider::connect(self.model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) + } +} diff --git a/crates/nvisy-runtime/src/definition/transform/enrich.rs b/crates/nvisy-runtime/src/definition/transform/enrich.rs index d7552a1..b326d9f 100644 --- a/crates/nvisy-runtime/src/definition/transform/enrich.rs +++ b/crates/nvisy-runtime/src/definition/transform/enrich.rs @@ -1,15 +1,21 @@ //! Enrich transform definition. +use nvisy_core::Provider; +use nvisy_rig::provider::{CompletionCredentials, CompletionModel, CompletionProvider}; use serde::{Deserialize, Serialize}; +use uuid::Uuid; -use crate::provider::CompletionProviderParams; +use crate::error::{Error, Result}; /// Enrich transform for adding metadata/descriptions to elements. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Enrich { - /// Completion provider parameters (includes credentials_id and model). + /// Reference to stored credentials. + pub credentials_id: Uuid, + + /// Completion model to use. #[serde(flatten)] - pub provider: CompletionProviderParams, + pub model: CompletionModel, /// The enrichment task to perform. #[serde(flatten)] @@ -20,6 +26,18 @@ pub struct Enrich { pub override_prompt: Option, } +impl Enrich { + /// Creates a completion provider from these parameters and credentials. + pub async fn into_provider( + self, + credentials: CompletionCredentials, + ) -> Result { + CompletionProvider::connect(self.model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) + } +} + /// Tasks for adding metadata/descriptions to elements. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "input_type", content = "task", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/definition/transform/extract.rs b/crates/nvisy-runtime/src/definition/transform/extract.rs index 2afb5ed..fbda857 100644 --- a/crates/nvisy-runtime/src/definition/transform/extract.rs +++ b/crates/nvisy-runtime/src/definition/transform/extract.rs @@ -1,15 +1,21 @@ //! Extract transform definition. +use nvisy_core::Provider; +use nvisy_rig::provider::{CompletionCredentials, CompletionModel, CompletionProvider}; use serde::{Deserialize, Serialize}; +use uuid::Uuid; -use crate::provider::CompletionProviderParams; +use crate::error::{Error, Result}; /// Extract transform for extracting structured data or converting formats. #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct Extract { - /// Completion provider parameters (includes credentials_id and model). + /// Reference to stored credentials. + pub credentials_id: Uuid, + + /// Completion model to use. #[serde(flatten)] - pub provider: CompletionProviderParams, + pub model: CompletionModel, /// The extraction task to perform. #[serde(flatten)] @@ -20,6 +26,18 @@ pub struct Extract { pub override_prompt: Option, } +impl Extract { + /// Creates a completion provider from these parameters and credentials. + pub async fn into_provider( + self, + credentials: CompletionCredentials, + ) -> Result { + CompletionProvider::connect(self.model, credentials) + .await + .map_err(|e| Error::Internal(e.to_string())) + } +} + /// Tasks for extracting structured data or converting formats. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(tag = "task_type", content = "task", rename_all = "snake_case")] diff --git a/crates/nvisy-runtime/src/engine/compiler.rs b/crates/nvisy-runtime/src/engine/compiler.rs index 643f6c3..0cb4a4a 100644 --- a/crates/nvisy-runtime/src/engine/compiler.rs +++ b/crates/nvisy-runtime/src/engine/compiler.rs @@ -15,10 +15,10 @@ use std::collections::HashMap; use nvisy_rig::agent::{ StructuredOutputAgent, TableAgent, TextAnalysisAgent, TextGenerationAgent, VisionAgent, }; -use nvisy_rig::provider::CompletionProvider; use petgraph::graph::{DiGraph, NodeIndex}; use super::context::Context; +use super::credentials::CredentialsRegistry; use crate::definition::{Input, NodeId, NodeKind, Output, Workflow}; use crate::error::{Error, Result}; use crate::graph::{ @@ -26,16 +26,13 @@ use crate::graph::{ CompiledTransform, DeriveProcessor, EdgeData, EmbeddingProcessor, EnrichProcessor, ExtractProcessor, InputStream, OutputStream, PartitionProcessor, }; -use crate::provider::{ - CompletionProviderParams, CredentialsRegistry, EmbeddingProviderParams, InputProvider, - InputProviderConfig, OutputProviderConfig, -}; /// Workflow compiler that transforms definitions into executable graphs. pub struct WorkflowCompiler<'a> { /// Credentials registry for resolving provider credentials. registry: &'a CredentialsRegistry, /// Execution context for provider initialization. + #[allow(dead_code)] ctx: Context, } @@ -212,11 +209,11 @@ impl<'a> WorkflowCompiler<'a> { async fn compile_node(&self, def: &NodeKind) -> Result { match def { NodeKind::Input(input) => { - let stream = self.create_input_stream(input).await?; + let stream = self.create_input_stream(input)?; Ok(CompiledNode::Input(CompiledInput::new(stream))) } NodeKind::Output(output) => { - let stream = self.create_output_stream(output).await?; + let stream = self.create_output_stream(output)?; Ok(CompiledNode::Output(CompiledOutput::new(stream))) } NodeKind::Transform(transformer) => { @@ -230,14 +227,8 @@ impl<'a> WorkflowCompiler<'a> { } /// Creates an input stream from an input definition. - async fn create_input_stream(&self, input: &Input) -> Result { + fn create_input_stream(&self, input: &Input) -> Result { match input { - Input::Provider(provider_def) => { - let stream = self - .create_provider_input_stream(&provider_def.provider) - .await?; - Ok(stream) - } Input::CacheSlot(_) => { // Cache inputs are resolved during cache slot resolution // This shouldn't be called for cache inputs @@ -248,50 +239,9 @@ impl<'a> WorkflowCompiler<'a> { } } - /// Creates an input stream from provider configuration. - async fn create_provider_input_stream( - &self, - config: &InputProviderConfig, - ) -> Result { - let creds = self.registry.get(config.credentials_id)?; - let provider = config.params.clone().into_provider(creds.clone()).await?; - - let stream = self.read_from_provider(&provider).await?; - - // Map the stream to our Result type - use futures::StreamExt; - let mapped = stream.map(|r| r.map_err(|e| Error::Internal(e.to_string()))); - - Ok(InputStream::new(Box::pin(mapped))) - } - - /// Reads from an input provider using the appropriate context type. - async fn read_from_provider( - &self, - provider: &InputProvider, - ) -> Result>> - { - match provider { - InputProvider::S3(_) | InputProvider::Gcs(_) | InputProvider::Azblob(_) => { - let ctx = self.ctx.to_object_context(); - provider.read_object_stream(&ctx).await - } - InputProvider::Postgres(_) | InputProvider::Mysql(_) => { - let ctx = self.ctx.to_relational_context(); - provider.read_relational_stream(&ctx).await - } - } - } - /// Creates an output stream from an output definition. - async fn create_output_stream(&self, output: &Output) -> Result { + fn create_output_stream(&self, output: &Output) -> Result { match output { - Output::Provider(provider_def) => { - let stream = self - .create_provider_output_stream(&provider_def.provider) - .await?; - Ok(stream) - } Output::Cache(_) => { // Cache outputs are resolved during cache slot resolution Err(Error::Internal( @@ -301,18 +251,6 @@ impl<'a> WorkflowCompiler<'a> { } } - /// Creates an output stream from provider configuration. - async fn create_provider_output_stream( - &self, - config: &OutputProviderConfig, - ) -> Result { - let creds = self.registry.get(config.credentials_id)?; - let provider = config.params.clone().into_provider(creds.clone()).await?; - let sink = provider.write_sink(); - - Ok(OutputStream::new(sink)) - } - /// Creates a processor from a transformer definition. async fn create_processor( &self, @@ -326,29 +264,26 @@ impl<'a> WorkflowCompiler<'a> { p.include_page_breaks, p.discard_unsupported, ))), - Transformer::Chunk(c) => { - if c.contextual_chunking { - // Need completion provider for contextual chunking - // For now, we don't have provider params in chunk definition - // So contextual chunking won't have agents - Ok(CompiledTransform::Chunk(ChunkProcessor::new( - c.chunk_strategy.clone(), - ))) - } else { - Ok(CompiledTransform::Chunk(ChunkProcessor::new( - c.chunk_strategy.clone(), - ))) - } - } + Transformer::Chunk(c) => Ok(CompiledTransform::Chunk(ChunkProcessor::new( + c.chunk_strategy.clone(), + ))), Transformer::Embedding(e) => { - let provider = self.create_embedding_provider(&e.provider).await?; + let creds = self.registry.get(e.credentials_id)?.clone(); + let provider = e + .clone() + .into_provider(creds.into_embedding_credentials()?) + .await?; Ok(CompiledTransform::Embedding(EmbeddingProcessor::new( provider, e.normalize, ))) } Transformer::Enrich(e) => { - let provider = self.create_completion_provider(&e.provider).await?; + let creds = self.registry.get(e.credentials_id)?.clone(); + let provider = e + .clone() + .into_provider(creds.into_completion_credentials()?) + .await?; let vision_agent = VisionAgent::new(provider.clone(), false); let table_agent = TableAgent::new(provider, false); Ok(CompiledTransform::Enrich(Box::new(EnrichProcessor::new( @@ -359,7 +294,11 @@ impl<'a> WorkflowCompiler<'a> { )))) } Transformer::Extract(e) => { - let provider = self.create_completion_provider(&e.provider).await?; + let creds = self.registry.get(e.credentials_id)?.clone(); + let provider = e + .clone() + .into_provider(creds.into_completion_credentials()?) + .await?; let text_analysis_agent = TextAnalysisAgent::new(provider.clone(), false); let table_agent = TableAgent::new(provider.clone(), false); let structured_output_agent = StructuredOutputAgent::new(provider, false); @@ -372,7 +311,11 @@ impl<'a> WorkflowCompiler<'a> { )))) } Transformer::Derive(d) => { - let provider = self.create_completion_provider(&d.provider).await?; + let creds = self.registry.get(d.credentials_id)?.clone(); + let provider = d + .clone() + .into_provider(creds.into_completion_credentials()?) + .await?; let agent = TextGenerationAgent::new(provider, false); Ok(CompiledTransform::Derive(DeriveProcessor::new( agent, @@ -383,30 +326,6 @@ impl<'a> WorkflowCompiler<'a> { } } - /// Creates an embedding provider from parameters. - async fn create_embedding_provider( - &self, - params: &EmbeddingProviderParams, - ) -> Result { - let creds = self.registry.get(params.credentials_id())?.clone(); - params - .clone() - .into_provider(creds.into_embedding_credentials()?) - .await - } - - /// Creates a completion provider from parameters. - async fn create_completion_provider( - &self, - params: &CompletionProviderParams, - ) -> Result { - let creds = self.registry.get(params.credentials_id())?.clone(); - params - .clone() - .into_provider(creds.into_completion_credentials()?) - .await - } - /// Builds the petgraph from compiled nodes and resolved edges. fn build_graph( &self, diff --git a/crates/nvisy-runtime/src/engine/context.rs b/crates/nvisy-runtime/src/engine/context.rs index 4743590..39510ce 100644 --- a/crates/nvisy-runtime/src/engine/context.rs +++ b/crates/nvisy-runtime/src/engine/context.rs @@ -3,7 +3,7 @@ use derive_builder::Builder; use nvisy_dal::AnyDataValue; -use crate::provider::CredentialsRegistry; +use super::CredentialsRegistry; /// Context for provider operations during compilation and execution. /// @@ -68,35 +68,20 @@ impl Context { /// Converts to an ObjectContext for object storage providers. pub fn to_object_context(&self) -> nvisy_dal::ObjectContext { - let mut ctx = nvisy_dal::ObjectContext::new(); - if let Some(ref prefix) = self.target { - ctx = ctx.with_prefix(prefix.clone()); + nvisy_dal::ObjectContext { + prefix: self.target.clone(), + token: self.cursor.clone(), + limit: self.limit, } - if let Some(ref token) = self.cursor { - ctx = ctx.with_token(token.clone()); - } - if let Some(limit) = self.limit { - ctx = ctx.with_limit(limit); - } - ctx } /// Converts to a RelationalContext for relational database providers. pub fn to_relational_context(&self) -> nvisy_dal::RelationalContext { - let mut ctx = nvisy_dal::RelationalContext::new(); - if let Some(ref table) = self.target { - ctx = ctx.with_table(table.clone()); - } - if let Some(ref cursor) = self.cursor { - ctx = ctx.with_cursor(cursor.clone()); - } - if let Some(ref tiebreaker) = self.tiebreaker { - ctx = ctx.with_tiebreaker(tiebreaker.clone()); - } - if let Some(limit) = self.limit { - ctx = ctx.with_limit(limit); + nvisy_dal::RelationalContext { + cursor: self.cursor.clone(), + tiebreaker: self.tiebreaker.clone(), + limit: self.limit, } - ctx } } diff --git a/crates/nvisy-runtime/src/engine/credentials.rs b/crates/nvisy-runtime/src/engine/credentials.rs new file mode 100644 index 0000000..e10fafb --- /dev/null +++ b/crates/nvisy-runtime/src/engine/credentials.rs @@ -0,0 +1,101 @@ +//! Credentials management for AI providers. +//! +//! This module provides a registry for storing and retrieving credentials +//! used by AI providers (completion, embedding) during workflow execution. + +use std::collections::HashMap; + +use derive_more::From; +use nvisy_rig::provider::{CompletionCredentials, EmbeddingCredentials}; +use serde::{Deserialize, Serialize}; +use strum::IntoStaticStr; +use uuid::Uuid; + +use crate::error::{Error, Result}; + +/// AI provider credentials. +#[derive(Debug, Clone, From, Serialize, Deserialize, IntoStaticStr)] +#[serde(tag = "provider", rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] +pub enum ProviderCredentials { + /// Completion provider credentials. + Completion(CompletionCredentials), + /// Embedding provider credentials. + Embedding(EmbeddingCredentials), +} + +impl ProviderCredentials { + /// Returns the provider kind as a string. + pub fn kind(&self) -> &'static str { + self.into() + } + + /// Converts to completion credentials if applicable. + pub fn into_completion_credentials(self) -> Result { + match self { + Self::Completion(c) => Ok(c), + other => Err(Error::Internal(format!( + "expected completion credentials, got '{}'", + other.kind() + ))), + } + } + + /// Converts to embedding credentials if applicable. + pub fn into_embedding_credentials(self) -> Result { + match self { + Self::Embedding(c) => Ok(c), + other => Err(Error::Internal(format!( + "expected embedding credentials, got '{}'", + other.kind() + ))), + } + } +} + +/// In-memory registry for AI provider credentials. +/// +/// Credentials are stored by UUID and can be retrieved during workflow compilation. +#[derive(Debug, Clone, Default)] +pub struct CredentialsRegistry { + credentials: HashMap, +} + +impl CredentialsRegistry { + /// Creates a new empty registry. + pub fn new() -> Self { + Self::default() + } + + /// Registers credentials with a UUID. + pub fn register(&mut self, id: Uuid, creds: ProviderCredentials) { + self.credentials.insert(id, creds); + } + + /// Retrieves credentials by UUID. + pub fn get(&self, id: Uuid) -> Result<&ProviderCredentials> { + self.credentials + .get(&id) + .ok_or_else(|| Error::CredentialsNotFound(id)) + } + + /// Removes credentials by UUID. + pub fn remove(&mut self, id: Uuid) -> Option { + self.credentials.remove(&id) + } + + /// Returns the number of registered credentials. + pub fn len(&self) -> usize { + self.credentials.len() + } + + /// Returns true if no credentials are registered. + pub fn is_empty(&self) -> bool { + self.credentials.is_empty() + } + + /// Clears all credentials. + pub fn clear(&mut self) { + self.credentials.clear(); + } +} diff --git a/crates/nvisy-runtime/src/engine/executor.rs b/crates/nvisy-runtime/src/engine/executor.rs index a172143..f192649 100644 --- a/crates/nvisy-runtime/src/engine/executor.rs +++ b/crates/nvisy-runtime/src/engine/executor.rs @@ -8,10 +8,10 @@ use tokio::sync::Semaphore; use super::EngineConfig; use super::compiler::WorkflowCompiler; use super::context::{Context, ExecutionContext}; +use super::credentials::CredentialsRegistry; use crate::definition::{NodeId, Workflow}; use crate::error::{Error, Result}; use crate::graph::{CompiledGraph, CompiledNode, InputStream, OutputStream, Process}; -use crate::provider::CredentialsRegistry; /// Tracing target for engine operations. const TRACING_TARGET: &str = "nvisy_workflow::engine"; diff --git a/crates/nvisy-runtime/src/engine/mod.rs b/crates/nvisy-runtime/src/engine/mod.rs index c74efae..031ef29 100644 --- a/crates/nvisy-runtime/src/engine/mod.rs +++ b/crates/nvisy-runtime/src/engine/mod.rs @@ -4,12 +4,15 @@ //! - [`Engine`]: The main execution engine //! - [`EngineConfig`]: Configuration options //! - [`ExecutionContext`]: Runtime context for workflow execution +//! - [`CredentialsRegistry`]: Registry for AI provider credentials mod compiler; mod config; mod context; +mod credentials; mod executor; pub use config::EngineConfig; pub use context::{Context, ExecutionContext}; +pub use credentials::{CredentialsRegistry, ProviderCredentials}; pub use executor::Engine; diff --git a/crates/nvisy-runtime/src/graph/input/stream.rs b/crates/nvisy-runtime/src/graph/input/stream.rs index 5a0d667..783a821 100644 --- a/crates/nvisy-runtime/src/graph/input/stream.rs +++ b/crates/nvisy-runtime/src/graph/input/stream.rs @@ -5,7 +5,7 @@ use std::task::{Context, Poll}; use futures::stream::BoxStream; use futures::{Stream, StreamExt}; -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use crate::error::Result; diff --git a/crates/nvisy-runtime/src/graph/output/stream.rs b/crates/nvisy-runtime/src/graph/output/stream.rs index 50873da..d7f5f8e 100644 --- a/crates/nvisy-runtime/src/graph/output/stream.rs +++ b/crates/nvisy-runtime/src/graph/output/stream.rs @@ -4,7 +4,7 @@ use std::pin::Pin; use std::task::{Context, Poll}; use futures::{Sink, SinkExt}; -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use crate::error::Error; diff --git a/crates/nvisy-runtime/src/graph/route/file_category.rs b/crates/nvisy-runtime/src/graph/route/file_category.rs index 53e9eb6..bb072d2 100644 --- a/crates/nvisy-runtime/src/graph/route/file_category.rs +++ b/crates/nvisy-runtime/src/graph/route/file_category.rs @@ -1,6 +1,6 @@ //! File category evaluator for routing by file extension. -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use crate::definition::FileCategory; @@ -19,12 +19,23 @@ impl FileCategoryEvaluator { /// Evaluates whether the data matches the file category. pub fn evaluate(&self, data: &AnyDataValue) -> bool { - let ext = match data { - AnyDataValue::Blob(blob) => blob.path.rsplit('.').next(), - _ => return false, + // Extract path from the value based on data type + let path: Option<&str> = match data { + AnyDataValue::Object(obj) => Some(obj.path.as_str()), + AnyDataValue::Document(doc) => doc.metadata.get("path").and_then(|v| v.as_str()), + AnyDataValue::Record(rec) => rec + .columns + .get("path") + .or_else(|| rec.columns.get("key")) + .and_then(|v| v.as_str()), + _ => None, }; - let Some(ext) = ext else { + let Some(path) = path else { + return self.category == FileCategory::Other; + }; + + let Some(ext) = path.rsplit('.').next() else { return self.category == FileCategory::Other; }; @@ -118,34 +129,3 @@ impl From for FileCategoryEvaluator { Self::new(category) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_evaluate_image() { - let evaluator = FileCategoryEvaluator::new(FileCategory::Image); - - let jpg = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("photo.jpg", vec![])); - let png = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("image.PNG", vec![])); - let pdf = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("doc.pdf", vec![])); - - assert!(evaluator.evaluate(&jpg)); - assert!(evaluator.evaluate(&png)); - assert!(!evaluator.evaluate(&pdf)); - } - - #[test] - fn test_evaluate_document() { - let evaluator = FileCategoryEvaluator::new(FileCategory::Document); - - let pdf = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("report.pdf", vec![])); - let docx = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("letter.docx", vec![])); - let txt = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("notes.txt", vec![])); - - assert!(evaluator.evaluate(&pdf)); - assert!(evaluator.evaluate(&docx)); - assert!(!evaluator.evaluate(&txt)); - } -} diff --git a/crates/nvisy-runtime/src/graph/route/language.rs b/crates/nvisy-runtime/src/graph/route/language.rs index 12a224a..41a7408 100644 --- a/crates/nvisy-runtime/src/graph/route/language.rs +++ b/crates/nvisy-runtime/src/graph/route/language.rs @@ -1,6 +1,6 @@ //! Language evaluator for routing by detected content language. -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; /// Evaluates language based on metadata. #[derive(Debug, Clone)] @@ -43,8 +43,9 @@ impl LanguageEvaluator { /// Gets a string metadata value. fn get_metadata_string(&self, data: &AnyDataValue, key: &str) -> Option { match data { - AnyDataValue::Blob(blob) => blob.metadata.get(key).and_then(json_to_string), + AnyDataValue::Object(obj) => obj.metadata.get(key).and_then(json_to_string), AnyDataValue::Record(record) => record.columns.get(key).and_then(json_to_string), + AnyDataValue::Document(doc) => doc.metadata.get(key).and_then(json_to_string), _ => None, } } @@ -52,7 +53,7 @@ impl LanguageEvaluator { /// Gets an f32 metadata value. fn get_metadata_f32(&self, data: &AnyDataValue, key: &str) -> Option { match data { - AnyDataValue::Blob(blob) => blob + AnyDataValue::Object(obj) => obj .metadata .get(key) .and_then(|v| v.as_f64()) @@ -62,6 +63,11 @@ impl LanguageEvaluator { .get(key) .and_then(|v| v.as_f64()) .map(|v| v as f32), + AnyDataValue::Document(doc) => doc + .metadata + .get(key) + .and_then(|v| v.as_f64()) + .map(|v| v as f32), _ => None, } } @@ -76,44 +82,3 @@ fn json_to_string(value: &serde_json::Value) -> Option { _ => None, } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_evaluate_with_confidence() { - let evaluator = LanguageEvaluator::new(vec!["en".into(), "es".into()], 0.8); - - let mut blob = nvisy_dal::datatype::Blob::new("doc.txt", vec![]); - blob.metadata - .insert("language".into(), serde_json::json!("en")); - blob.metadata - .insert("language_confidence".into(), serde_json::json!(0.95)); - let english = AnyDataValue::Blob(blob); - - assert!(evaluator.evaluate(&english)); - - let mut blob = nvisy_dal::datatype::Blob::new("doc.txt", vec![]); - blob.metadata - .insert("language".into(), serde_json::json!("en")); - blob.metadata - .insert("language_confidence".into(), serde_json::json!(0.5)); - let low_conf = AnyDataValue::Blob(blob); - - assert!(!evaluator.evaluate(&low_conf)); - } - - #[test] - fn test_evaluate_without_confidence() { - let evaluator = LanguageEvaluator::new(vec!["fr".into()], 0.8); - - let mut blob = nvisy_dal::datatype::Blob::new("doc.txt", vec![]); - blob.metadata - .insert("language".into(), serde_json::json!("fr")); - let french = AnyDataValue::Blob(blob); - - // Without confidence metadata, still matches by language - assert!(evaluator.evaluate(&french)); - } -} diff --git a/crates/nvisy-runtime/src/graph/route/mod.rs b/crates/nvisy-runtime/src/graph/route/mod.rs index 02a4390..aeeedd0 100644 --- a/crates/nvisy-runtime/src/graph/route/mod.rs +++ b/crates/nvisy-runtime/src/graph/route/mod.rs @@ -5,7 +5,7 @@ mod language; pub use file_category::FileCategoryEvaluator; pub use language::LanguageEvaluator; -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use crate::definition::SwitchDef; @@ -65,24 +65,3 @@ impl From for CompiledSwitch { Self::new(evaluator) } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::definition::{FileCategory, FileCategoryCondition, SwitchCondition}; - - #[test] - fn test_compiled_switch_from_def() { - let def = SwitchDef::new(SwitchCondition::FileCategory(FileCategoryCondition { - category: FileCategory::Image, - })); - - let switch = CompiledSwitch::from(def); - - let jpg = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("photo.jpg", vec![])); - let pdf = AnyDataValue::Blob(nvisy_dal::datatype::Blob::new("doc.pdf", vec![])); - - assert!(switch.evaluate(&jpg)); - assert!(!switch.evaluate(&pdf)); - } -} diff --git a/crates/nvisy-runtime/src/graph/transform/chunk.rs b/crates/nvisy-runtime/src/graph/transform/chunk.rs index 0c632b7..b23302f 100644 --- a/crates/nvisy-runtime/src/graph/transform/chunk.rs +++ b/crates/nvisy-runtime/src/graph/transform/chunk.rs @@ -1,6 +1,6 @@ //! Chunk processor. -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use nvisy_rig::agent::TextGenerationAgent; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/derive.rs b/crates/nvisy-runtime/src/graph/transform/derive.rs index 4de85fa..bb4f815 100644 --- a/crates/nvisy-runtime/src/graph/transform/derive.rs +++ b/crates/nvisy-runtime/src/graph/transform/derive.rs @@ -1,6 +1,6 @@ //! Derive processor. -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use nvisy_rig::agent::TextGenerationAgent; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs index 078e7e4..dbcfe38 100644 --- a/crates/nvisy-runtime/src/graph/transform/embedding.rs +++ b/crates/nvisy-runtime/src/graph/transform/embedding.rs @@ -1,6 +1,6 @@ //! Embedding processor. -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use nvisy_rig::provider::EmbeddingProvider; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs index 4fe6fa9..656901e 100644 --- a/crates/nvisy-runtime/src/graph/transform/enrich.rs +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -1,6 +1,6 @@ //! Enrich processor. -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use nvisy_rig::agent::{TableAgent, VisionAgent}; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/extract.rs b/crates/nvisy-runtime/src/graph/transform/extract.rs index ee2864a..71172a3 100644 --- a/crates/nvisy-runtime/src/graph/transform/extract.rs +++ b/crates/nvisy-runtime/src/graph/transform/extract.rs @@ -1,6 +1,6 @@ //! Extract processor. -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use nvisy_rig::agent::{StructuredOutputAgent, TableAgent, TextAnalysisAgent}; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs index 93818d6..5f6a986 100644 --- a/crates/nvisy-runtime/src/graph/transform/mod.rs +++ b/crates/nvisy-runtime/src/graph/transform/mod.rs @@ -17,7 +17,7 @@ pub use derive::DeriveProcessor; pub use embedding::EmbeddingProcessor; pub use enrich::EnrichProcessor; pub use extract::ExtractProcessor; -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; pub use partition::PartitionProcessor; use crate::error::Result; diff --git a/crates/nvisy-runtime/src/graph/transform/partition.rs b/crates/nvisy-runtime/src/graph/transform/partition.rs index 042858a..e54fc42 100644 --- a/crates/nvisy-runtime/src/graph/transform/partition.rs +++ b/crates/nvisy-runtime/src/graph/transform/partition.rs @@ -1,6 +1,6 @@ //! Partition processor. -use nvisy_dal::AnyDataValue; +use nvisy_dal::datatype::AnyDataValue; use super::Process; use crate::definition::PartitionStrategy; diff --git a/crates/nvisy-runtime/src/lib.rs b/crates/nvisy-runtime/src/lib.rs index 81bc6fa..4045275 100644 --- a/crates/nvisy-runtime/src/lib.rs +++ b/crates/nvisy-runtime/src/lib.rs @@ -6,8 +6,8 @@ pub mod definition; pub mod engine; mod error; pub mod graph; -pub mod provider; +pub use engine::{CredentialsRegistry, ProviderCredentials}; pub use error::{Error, Result}; /// Tracing target for runtime operations. diff --git a/crates/nvisy-runtime/src/provider/ai.rs b/crates/nvisy-runtime/src/provider/ai.rs deleted file mode 100644 index 7b492c5..0000000 --- a/crates/nvisy-runtime/src/provider/ai.rs +++ /dev/null @@ -1,156 +0,0 @@ -//! AI provider types and implementations. -//! -//! Re-exports types from nvisy_rig and provides wrapper enums for provider params. - -use derive_more::From; -use nvisy_core::Provider; -use nvisy_rig::provider::{ - AnthropicModel, CohereCompletionModel, CohereEmbeddingModel, CompletionCredentials, - CompletionModel, CompletionProvider, EmbeddingCredentials, EmbeddingModel, EmbeddingProvider, - GeminiCompletionModel, GeminiEmbeddingModel, OpenAiCompletionModel, OpenAiEmbeddingModel, - PerplexityModel, -}; -use serde::{Deserialize, Serialize}; -use strum::IntoStaticStr; -use uuid::Uuid; - -use crate::error::{Error, Result}; - -// ============================================================================= -// Completion Provider Params -// ============================================================================= - -/// Completion provider parameters with credentials reference. -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize, IntoStaticStr)] -#[serde(tag = "provider", rename_all = "snake_case")] -#[strum(serialize_all = "snake_case")] -pub enum CompletionProviderParams { - /// OpenAI completion. - OpenAi { - credentials_id: Uuid, - model: OpenAiCompletionModel, - }, - /// Anthropic completion. - Anthropic { - credentials_id: Uuid, - model: AnthropicModel, - }, - /// Cohere completion. - Cohere { - credentials_id: Uuid, - model: CohereCompletionModel, - }, - /// Google Gemini completion. - Gemini { - credentials_id: Uuid, - model: GeminiCompletionModel, - }, - /// Perplexity completion. - Perplexity { - credentials_id: Uuid, - model: PerplexityModel, - }, -} - -impl CompletionProviderParams { - /// Returns the credentials ID. - pub fn credentials_id(&self) -> Uuid { - match self { - Self::OpenAi { credentials_id, .. } - | Self::Anthropic { credentials_id, .. } - | Self::Cohere { credentials_id, .. } - | Self::Gemini { credentials_id, .. } - | Self::Perplexity { credentials_id, .. } => *credentials_id, - } - } - - /// Returns the provider kind as a string. - pub fn kind(&self) -> &'static str { - self.into() - } - - /// Creates a completion provider from params and credentials. - pub async fn into_provider( - self, - credentials: CompletionCredentials, - ) -> Result { - let model = match self { - Self::OpenAi { model, .. } => CompletionModel::OpenAi(model), - Self::Anthropic { model, .. } => CompletionModel::Anthropic(model), - Self::Cohere { model, .. } => CompletionModel::Cohere(model), - Self::Gemini { model, .. } => CompletionModel::Gemini(model), - Self::Perplexity { model, .. } => CompletionModel::Perplexity(model), - }; - - CompletionProvider::connect(model, credentials) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} - -// ============================================================================= -// Embedding Provider Params -// ============================================================================= - -/// Embedding provider parameters with credentials reference. -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize, IntoStaticStr)] -#[serde(tag = "provider", rename_all = "snake_case")] -#[strum(serialize_all = "snake_case")] -pub enum EmbeddingProviderParams { - /// OpenAI embedding. - OpenAi { - credentials_id: Uuid, - model: OpenAiEmbeddingModel, - }, - /// Cohere embedding. - Cohere { - credentials_id: Uuid, - model: CohereEmbeddingModel, - }, - /// Google Gemini embedding. - Gemini { - credentials_id: Uuid, - model: GeminiEmbeddingModel, - }, -} - -impl EmbeddingProviderParams { - /// Returns the credentials ID. - pub fn credentials_id(&self) -> Uuid { - match self { - Self::OpenAi { credentials_id, .. } - | Self::Cohere { credentials_id, .. } - | Self::Gemini { credentials_id, .. } => *credentials_id, - } - } - - /// Returns the provider kind as a string. - pub fn kind(&self) -> &'static str { - self.into() - } - - /// Returns the embedding dimensions for this model. - pub fn dimensions(&self) -> usize { - match self { - Self::OpenAi { model, .. } => model.dimensions(), - Self::Cohere { model, .. } => model.dimensions(), - Self::Gemini { model, .. } => model.dimensions(), - } - } - - /// Creates an embedding provider from params and credentials. - pub async fn into_provider( - self, - credentials: EmbeddingCredentials, - ) -> Result { - let model = match self { - Self::OpenAi { model, .. } => EmbeddingModel::OpenAi(model), - Self::Cohere { model, .. } => EmbeddingModel::Cohere(model), - Self::Gemini { model, .. } => EmbeddingModel::Gemini(model), - }; - - EmbeddingProvider::connect(model, credentials) - .await - .map_err(|e| Error::Internal(e.to_string())) - } -} diff --git a/crates/nvisy-runtime/src/provider/inputs.rs b/crates/nvisy-runtime/src/provider/inputs.rs deleted file mode 100644 index 1d61291..0000000 --- a/crates/nvisy-runtime/src/provider/inputs.rs +++ /dev/null @@ -1,233 +0,0 @@ -//! Input provider types and implementations. - -use derive_more::From; -use nvisy_core::Provider; -use nvisy_dal::provider::{ - AzblobParams, AzblobProvider, GcsParams, GcsProvider, MysqlParams, MysqlProvider, - PostgresParams, PostgresProvider, S3Params, S3Provider, -}; -use nvisy_dal::{AnyDataValue, DataTypeId, ObjectContext, RelationalContext}; -use serde::{Deserialize, Serialize}; -use strum::IntoStaticStr; -use uuid::Uuid; - -use super::ProviderCredentials; -use crate::error::{Error, Result}; - -/// Input provider configuration (credentials reference + params). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct InputProviderConfig { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Provider-specific parameters. - #[serde(flatten)] - pub params: InputProviderParams, -} - -impl InputProviderConfig { - /// Creates a new input provider configuration. - pub fn new(credentials_id: Uuid, params: InputProviderParams) -> Self { - Self { - credentials_id, - params, - } - } - - /// Returns the provider kind as a string. - pub fn kind(&self) -> &'static str { - self.params.kind() - } - - /// Returns the output data type for this provider. - pub const fn output_type(&self) -> DataTypeId { - self.params.output_type() - } - - /// Creates an input provider from this configuration and credentials. - pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { - self.params.into_provider(credentials).await - } -} - -/// Input provider parameters (storage backends only, no vector DBs). -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize, IntoStaticStr)] -#[serde(tag = "kind", rename_all = "snake_case")] -#[strum(serialize_all = "snake_case")] -pub enum InputProviderParams { - /// Amazon S3 storage. - S3(S3Params), - /// Google Cloud Storage. - Gcs(GcsParams), - /// Azure Blob Storage. - Azblob(AzblobParams), - /// PostgreSQL database. - Postgres(PostgresParams), - /// MySQL database. - Mysql(MysqlParams), -} - -impl InputProviderParams { - /// Returns the provider kind as a string. - pub fn kind(&self) -> &'static str { - self.into() - } - - /// Returns the output data type for this provider. - pub const fn output_type(&self) -> DataTypeId { - match self { - Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, - Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, - } - } - - /// Creates an input provider from these params and credentials. - pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { - match (self, credentials) { - (Self::S3(p), ProviderCredentials::S3(c)) => Ok(InputProvider::S3( - S3Provider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Gcs(p), ProviderCredentials::Gcs(c)) => Ok(InputProvider::Gcs( - GcsProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Azblob(p), ProviderCredentials::Azblob(c)) => Ok(InputProvider::Azblob( - AzblobProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Postgres(p), ProviderCredentials::Postgres(c)) => Ok(InputProvider::Postgres( - PostgresProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Mysql(p), ProviderCredentials::Mysql(c)) => Ok(InputProvider::Mysql( - MysqlProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (params, creds) => Err(Error::Internal(format!( - "credentials type mismatch: expected '{}', got '{}'", - params.kind(), - creds.kind() - ))), - } - } -} - -/// Input provider instance (created from config). -#[derive(Debug, Clone)] -pub enum InputProvider { - S3(S3Provider), - Gcs(GcsProvider), - Azblob(AzblobProvider), - Postgres(PostgresProvider), - Mysql(MysqlProvider), -} - -impl InputProvider { - /// Returns the output data type for this provider. - pub const fn output_type(&self) -> DataTypeId { - match self { - Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, - Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, - } - } - - /// Reads data from the provider as a stream using object context. - pub async fn read_object_stream( - &self, - ctx: &ObjectContext, - ) -> Result>> { - match self { - Self::S3(p) => read_stream!(p, ctx, Blob), - Self::Gcs(p) => read_stream!(p, ctx, Blob), - Self::Azblob(p) => read_stream!(p, ctx, Blob), - _ => Err(Error::Internal( - "Provider does not support ObjectContext".into(), - )), - } - } - - /// Reads data from the provider as a stream using relational context. - pub async fn read_relational_stream( - &self, - ctx: &RelationalContext, - ) -> Result>> { - match self { - Self::Postgres(p) => read_stream!(p, ctx, Record), - Self::Mysql(p) => read_stream!(p, ctx, Record), - _ => Err(Error::Internal( - "Provider does not support RelationalContext".into(), - )), - } - } - - /// Reads data from the provider using object context. - pub async fn read_object(&self, ctx: &ObjectContext) -> Result> { - match self { - Self::S3(p) => read_data!(p, ctx, Blob), - Self::Gcs(p) => read_data!(p, ctx, Blob), - Self::Azblob(p) => read_data!(p, ctx, Blob), - _ => Err(Error::Internal( - "Provider does not support ObjectContext".into(), - )), - } - } - - /// Reads data from the provider using relational context. - pub async fn read_relational(&self, ctx: &RelationalContext) -> Result> { - match self { - Self::Postgres(p) => read_data!(p, ctx, Record), - Self::Mysql(p) => read_data!(p, ctx, Record), - _ => Err(Error::Internal( - "Provider does not support RelationalContext".into(), - )), - } - } -} - -/// Helper macro to read data from a provider as a boxed stream of AnyDataValue. -macro_rules! read_stream { - ($provider:expr, $ctx:expr, $variant:ident) => {{ - use futures::StreamExt; - use nvisy_dal::core::DataInput; - - let stream = $provider - .read($ctx) - .await - .map_err(|e| Error::Internal(e.to_string()))?; - - let mapped = stream.map(|result| result.map(AnyDataValue::$variant)); - Ok(Box::pin(mapped) as futures::stream::BoxStream<'static, _>) - }}; -} - -use read_stream; - -/// Helper macro to read data from a provider and convert to AnyDataValue. -macro_rules! read_data { - ($provider:expr, $ctx:expr, $variant:ident) => {{ - use futures::StreamExt; - use nvisy_dal::core::DataInput; - use nvisy_dal::datatype::$variant; - - let stream = $provider - .read($ctx) - .await - .map_err(|e| Error::Internal(e.to_string()))?; - - let items: Vec<$variant> = stream - .collect::>() - .await - .into_iter() - .collect::, _>>() - .map_err(|e| Error::Internal(e.to_string()))?; - - Ok(items.into_iter().map(AnyDataValue::$variant).collect()) - }}; -} - -use read_data; diff --git a/crates/nvisy-runtime/src/provider/mod.rs b/crates/nvisy-runtime/src/provider/mod.rs deleted file mode 100644 index eb09fb2..0000000 --- a/crates/nvisy-runtime/src/provider/mod.rs +++ /dev/null @@ -1,96 +0,0 @@ -//! Provider params, credentials, and registry. -//! -//! This module separates provider configuration into: -//! - [`ProviderCredentials`]: All credentials (storage + AI, stored per workspace) -//! - [`InputProviderConfig`] / [`OutputProviderConfig`]: Config with credentials reference + params -//! - [`InputProviderParams`] / [`OutputProviderParams`]: Non-sensitive parameters (part of node definition) -//! - [`CompletionProviderParams`] / [`EmbeddingProviderParams`]: AI provider parameters -//! - [`CredentialsRegistry`]: In-memory registry for credentials lookup - -mod ai; -mod inputs; -mod outputs; -mod registry; -pub mod runtime; - -pub use ai::{CompletionProviderParams, EmbeddingProviderParams}; -use derive_more::From; -pub use inputs::{InputProvider, InputProviderConfig, InputProviderParams}; -// Re-export dal credentials -pub use nvisy_dal::provider::{ - AzblobCredentials, GcsCredentials, MysqlCredentials, PgVectorCredentials, PineconeCredentials, - PostgresCredentials, QdrantCredentials, S3Credentials, -}; -// Re-export rig types -pub use nvisy_rig::provider::{ - AnthropicModel, CohereCompletionModel, CohereEmbeddingModel, CompletionCredentials, - EmbeddingCredentials, GeminiCompletionModel, GeminiEmbeddingModel, OpenAiCompletionModel, - OpenAiEmbeddingModel, PerplexityModel, -}; -pub use outputs::{OutputProvider, OutputProviderConfig, OutputProviderParams}; -pub use registry::CredentialsRegistry; -use serde::{Deserialize, Serialize}; -use strum::IntoStaticStr; - -use crate::error::{Error, Result}; - -/// Provider credentials (sensitive). -#[derive(Debug, Clone, From, Serialize, Deserialize, IntoStaticStr)] -#[serde(tag = "provider", rename_all = "snake_case")] -#[strum(serialize_all = "snake_case")] -pub enum ProviderCredentials { - // Storage backends - /// Amazon S3 credentials. - S3(S3Credentials), - /// Google Cloud Storage credentials. - Gcs(GcsCredentials), - /// Azure Blob Storage credentials. - Azblob(AzblobCredentials), - /// PostgreSQL credentials. - Postgres(PostgresCredentials), - /// MySQL credentials. - Mysql(MysqlCredentials), - - // Vector databases - /// Qdrant credentials. - Qdrant(QdrantCredentials), - /// Pinecone credentials. - Pinecone(PineconeCredentials), - /// pgvector credentials. - PgVector(PgVectorCredentials), - - // AI providers (completion) - /// Completion provider credentials. - Completion(CompletionCredentials), - /// Embedding provider credentials. - Embedding(EmbeddingCredentials), -} - -impl ProviderCredentials { - /// Returns the provider kind as a string. - pub fn kind(&self) -> &'static str { - self.into() - } - - /// Converts to completion credentials if applicable. - pub fn into_completion_credentials(self) -> Result { - match self { - Self::Completion(c) => Ok(c), - other => Err(Error::Internal(format!( - "expected completion credentials, got '{}'", - other.kind() - ))), - } - } - - /// Converts to embedding credentials if applicable. - pub fn into_embedding_credentials(self) -> Result { - match self { - Self::Embedding(c) => Ok(c), - other => Err(Error::Internal(format!( - "expected embedding credentials, got '{}'", - other.kind() - ))), - } - } -} diff --git a/crates/nvisy-runtime/src/provider/outputs.rs b/crates/nvisy-runtime/src/provider/outputs.rs deleted file mode 100644 index dc654d3..0000000 --- a/crates/nvisy-runtime/src/provider/outputs.rs +++ /dev/null @@ -1,300 +0,0 @@ -//! Output provider types and implementations. - -use std::pin::Pin; -use std::sync::Arc; -use std::task::{Context as TaskContext, Poll}; - -use derive_more::From; -use futures::Sink; -use nvisy_core::Provider; -use nvisy_dal::provider::{ - AzblobParams, AzblobProvider, GcsParams, GcsProvider, MysqlParams, MysqlProvider, - PgVectorParams, PgVectorProvider, PineconeParams, PineconeProvider, PostgresParams, - PostgresProvider, QdrantParams, QdrantProvider, S3Params, S3Provider, -}; -use nvisy_dal::{AnyDataValue, DataTypeId}; -use serde::{Deserialize, Serialize}; -use strum::IntoStaticStr; -use tokio::sync::Mutex; -use uuid::Uuid; - -use super::ProviderCredentials; -use crate::error::{Error, Result}; -use crate::graph::DataSink; - -/// Output provider configuration (credentials reference + params). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct OutputProviderConfig { - /// Reference to stored credentials. - pub credentials_id: Uuid, - /// Provider-specific parameters. - #[serde(flatten)] - pub params: OutputProviderParams, -} - -impl OutputProviderConfig { - /// Creates a new output provider configuration. - pub fn new(credentials_id: Uuid, params: OutputProviderParams) -> Self { - Self { - credentials_id, - params, - } - } - - /// Returns the provider kind as a string. - pub fn kind(&self) -> &'static str { - self.params.kind() - } - - /// Returns the output data type for this provider. - pub const fn output_type(&self) -> DataTypeId { - self.params.output_type() - } - - /// Creates an output provider from this configuration and credentials. - pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { - self.params.into_provider(credentials).await - } -} - -/// Output provider parameters (storage backends + vector DBs). -#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize, IntoStaticStr)] -#[serde(tag = "kind", rename_all = "snake_case")] -#[strum(serialize_all = "snake_case")] -pub enum OutputProviderParams { - /// Amazon S3 storage. - S3(S3Params), - /// Google Cloud Storage. - Gcs(GcsParams), - /// Azure Blob Storage. - Azblob(AzblobParams), - /// PostgreSQL database. - Postgres(PostgresParams), - /// MySQL database. - Mysql(MysqlParams), - /// Qdrant vector database. - Qdrant(QdrantParams), - /// Pinecone vector database. - Pinecone(PineconeParams), - /// pgvector (PostgreSQL extension). - PgVector(PgVectorParams), -} - -impl OutputProviderParams { - /// Returns the provider kind as a string. - pub fn kind(&self) -> &'static str { - self.into() - } - - /// Returns the output data type for this provider. - pub const fn output_type(&self) -> DataTypeId { - match self { - Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, - Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, - Self::Qdrant(_) | Self::Pinecone(_) | Self::PgVector(_) => DataTypeId::Embedding, - } - } - - /// Creates an output provider from these params and credentials. - pub async fn into_provider(self, credentials: ProviderCredentials) -> Result { - match (self, credentials) { - (Self::S3(p), ProviderCredentials::S3(c)) => Ok(OutputProvider::S3( - S3Provider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Gcs(p), ProviderCredentials::Gcs(c)) => Ok(OutputProvider::Gcs( - GcsProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Azblob(p), ProviderCredentials::Azblob(c)) => Ok(OutputProvider::Azblob( - AzblobProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Postgres(p), ProviderCredentials::Postgres(c)) => Ok(OutputProvider::Postgres( - PostgresProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Mysql(p), ProviderCredentials::Mysql(c)) => Ok(OutputProvider::Mysql( - MysqlProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (Self::Qdrant(p), ProviderCredentials::Qdrant(c)) => { - Ok(OutputProvider::Qdrant(Box::new( - QdrantProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - ))) - } - (Self::Pinecone(p), ProviderCredentials::Pinecone(c)) => { - Ok(OutputProvider::Pinecone(Box::new( - PineconeProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - ))) - } - - (Self::PgVector(p), ProviderCredentials::PgVector(c)) => Ok(OutputProvider::PgVector( - PgVectorProvider::connect(p, c) - .await - .map_err(|e| Error::Internal(e.to_string()))?, - )), - (params, creds) => Err(Error::Internal(format!( - "credentials type mismatch: expected '{}', got '{}'", - params.kind(), - creds.kind() - ))), - } - } -} - -/// Output provider instance (created from config). -#[derive(Debug)] -pub enum OutputProvider { - S3(S3Provider), - Gcs(GcsProvider), - Azblob(AzblobProvider), - Postgres(PostgresProvider), - Mysql(MysqlProvider), - Qdrant(Box), - Pinecone(Box), - - PgVector(PgVectorProvider), -} - -impl OutputProvider { - /// Returns the input data type expected by this provider. - pub const fn input_type(&self) -> DataTypeId { - match self { - Self::S3(_) | Self::Gcs(_) | Self::Azblob(_) => DataTypeId::Blob, - Self::Postgres(_) | Self::Mysql(_) => DataTypeId::Record, - Self::Qdrant(_) | Self::Pinecone(_) | Self::PgVector(_) => DataTypeId::Embedding, - } - } - - /// Creates a sink for streaming writes to the provider. - /// - /// The sink buffers items and writes them on flush/close. - pub fn write_sink(self) -> DataSink { - let sink = ProviderSink::new(self); - Box::pin(sink) - } - - /// Writes data to the provider, accepting type-erased values. - pub async fn write(&self, data: Vec) -> Result<()> { - match self { - Self::S3(p) => write_data!(p, data, Blob, into_blob), - Self::Gcs(p) => write_data!(p, data, Blob, into_blob), - Self::Azblob(p) => write_data!(p, data, Blob, into_blob), - Self::Postgres(p) => write_data!(p, data, Record, into_record), - Self::Mysql(p) => write_data!(p, data, Record, into_record), - Self::Qdrant(p) => write_data!(**p, data, Embedding, into_embedding), - Self::Pinecone(p) => write_data!(**p, data, Embedding, into_embedding), - - Self::PgVector(p) => write_data!(p, data, Embedding, into_embedding), - } - } -} - -/// A sink that buffers items and writes them to an output provider. -struct ProviderSink { - provider: Arc, - buffer: Arc>>, - flush_future: Option> + Send>>>, -} - -impl ProviderSink { - fn new(provider: OutputProvider) -> Self { - Self { - provider: Arc::new(provider), - buffer: Arc::new(Mutex::new(Vec::new())), - flush_future: None, - } - } -} - -impl Sink for ProviderSink { - type Error = Error; - - fn poll_ready( - self: Pin<&mut Self>, - _cx: &mut TaskContext<'_>, - ) -> Poll> { - Poll::Ready(Ok(())) - } - - fn start_send( - self: Pin<&mut Self>, - item: AnyDataValue, - ) -> std::result::Result<(), Self::Error> { - let buffer = self.buffer.clone(); - if let Ok(mut guard) = buffer.try_lock() { - guard.push(item); - Ok(()) - } else { - Err(Error::Internal("buffer lock contention".into())) - } - } - - fn poll_flush( - mut self: Pin<&mut Self>, - cx: &mut TaskContext<'_>, - ) -> Poll> { - if let Some(ref mut future) = self.flush_future { - return match future.as_mut().poll(cx) { - Poll::Ready(result) => { - self.flush_future = None; - Poll::Ready(result) - } - Poll::Pending => Poll::Pending, - }; - } - - let buffer = self.buffer.clone(); - let provider = self.provider.clone(); - - let future = Box::pin(async move { - let items = { - let mut guard = buffer.lock().await; - std::mem::take(&mut *guard) - }; - - if items.is_empty() { - return Ok(()); - } - - provider.write(items).await - }); - - self.flush_future = Some(future); - self.poll_flush(cx) - } - - fn poll_close( - self: Pin<&mut Self>, - cx: &mut TaskContext<'_>, - ) -> Poll> { - self.poll_flush(cx) - } -} - -/// Helper macro to write data to a provider from AnyDataValue. -macro_rules! write_data { - ($provider:expr, $data:expr, $type:ident, $converter:ident) => {{ - use nvisy_dal::core::DataOutput; - use nvisy_dal::datatype::$type; - - let items: Vec<$type> = $data.into_iter().filter_map(|v| v.$converter()).collect(); - - $provider - .write(items) - .await - .map_err(|e| Error::Internal(e.to_string())) - }}; -} - -use write_data; diff --git a/crates/nvisy-runtime/src/provider/registry.rs b/crates/nvisy-runtime/src/provider/registry.rs deleted file mode 100644 index 6eb3dbd..0000000 --- a/crates/nvisy-runtime/src/provider/registry.rs +++ /dev/null @@ -1,58 +0,0 @@ -//! Credentials registry for workflow execution. - -use std::collections::HashMap; - -use uuid::Uuid; - -use super::ProviderCredentials; -use crate::error::{Error, Result}; - -/// In-memory credentials registry. -/// -/// Stores credentials by UUID for lookup during workflow execution. -#[derive(Debug, Clone, Default)] -pub struct CredentialsRegistry { - credentials: HashMap, -} - -impl CredentialsRegistry { - /// Creates a new registry from a JSON value. - /// - /// Expects a JSON object with UUID keys and credential objects as values. - pub fn new(value: serde_json::Value) -> Result { - let map: HashMap = - serde_json::from_value(value).map_err(Error::CredentialsRegistry)?; - Ok(Self { credentials: map }) - } - - /// Retrieves credentials by ID. - pub fn get(&self, credentials_id: Uuid) -> Result<&ProviderCredentials> { - self.credentials - .get(&credentials_id) - .ok_or(Error::CredentialsNotFound(credentials_id)) - } - - /// Inserts credentials with a new UUID v4. - /// - /// Generates a unique UUID that doesn't conflict with existing entries. - pub fn insert(&mut self, credentials: ProviderCredentials) -> Uuid { - use std::collections::hash_map::Entry; - loop { - let id = Uuid::new_v4(); - if let Entry::Vacant(entry) = self.credentials.entry(id) { - entry.insert(credentials); - return id; - } - } - } - - /// Removes credentials by ID. - pub fn remove(&mut self, credentials_id: Uuid) -> Option { - self.credentials.remove(&credentials_id) - } - - /// Lists all credential IDs. - pub fn list(&self) -> Vec { - self.credentials.keys().copied().collect() - } -} diff --git a/crates/nvisy-runtime/src/provider/runtime/config.rs b/crates/nvisy-runtime/src/provider/runtime/config.rs deleted file mode 100644 index 0b215a6..0000000 --- a/crates/nvisy-runtime/src/provider/runtime/config.rs +++ /dev/null @@ -1,79 +0,0 @@ -//! Runtime configuration. - -use serde::{Deserialize, Serialize}; - -/// Default maximum file size: 12 MB. -const DEFAULT_MAX_FILE_SIZE: u64 = 12 * 1024 * 1024; - -/// Configuration for the runtime service with sensible defaults. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RuntimeConfig { - /// Maximum file size in bytes (optional). - pub max_file_size: Option, -} - -impl RuntimeConfig { - /// Creates a new runtime configuration with defaults. - #[must_use] - pub fn new() -> Self { - Self { - max_file_size: None, - } - } - - /// Returns the maximum file size, using the default if not set. - #[inline] - #[must_use] - pub fn max_file_size(&self) -> u64 { - self.max_file_size.unwrap_or(DEFAULT_MAX_FILE_SIZE) - } - - /// Set the maximum file size in bytes. - #[must_use] - pub fn with_max_file_size(mut self, size: u64) -> Self { - self.max_file_size = Some(size); - self - } - - /// Validate the configuration and return any issues. - pub fn validate(&self) -> Result<(), String> { - if self.max_file_size == Some(0) { - return Err("Maximum file size cannot be zero".to_string()); - } - Ok(()) - } -} - -impl Default for RuntimeConfig { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_new_config() { - let config = RuntimeConfig::new(); - assert_eq!(config.max_file_size(), DEFAULT_MAX_FILE_SIZE); - assert!(config.validate().is_ok()); - } - - #[test] - fn test_config_builder() { - let config = RuntimeConfig::new().with_max_file_size(50 * 1024 * 1024); - assert_eq!(config.max_file_size(), 50 * 1024 * 1024); - assert!(config.validate().is_ok()); - } - - #[test] - fn test_config_validation() { - let valid_config = RuntimeConfig::new(); - assert!(valid_config.validate().is_ok()); - - let zero_size = RuntimeConfig::new().with_max_file_size(0); - assert!(zero_size.validate().is_err()); - } -} diff --git a/crates/nvisy-runtime/src/provider/runtime/mod.rs b/crates/nvisy-runtime/src/provider/runtime/mod.rs deleted file mode 100644 index d4d9e72..0000000 --- a/crates/nvisy-runtime/src/provider/runtime/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -//! Runtime services for document processing. - -mod config; -mod service; - -pub use config::RuntimeConfig; -// Re-export commonly used types from the runtime crates -pub use nvisy_rt_core as rt_core; -pub use nvisy_rt_engine as rt_engine; -pub use nvisy_rt_engine::{ - BoundingBox, Capabilities, Document, DocumentFormat, Engine, EngineConfig, FormatRegistry, - LoadedDocument, Point, Region, RegionId, RegionKind, doc, -}; -pub use service::RuntimeService; diff --git a/crates/nvisy-runtime/src/provider/runtime/service.rs b/crates/nvisy-runtime/src/provider/runtime/service.rs deleted file mode 100644 index a208924..0000000 --- a/crates/nvisy-runtime/src/provider/runtime/service.rs +++ /dev/null @@ -1,79 +0,0 @@ -//! Runtime service for document processing. - -use derive_more::{Deref, DerefMut}; -use nvisy_rt_engine::{Engine, EngineConfig}; - -use super::RuntimeConfig; - -/// Runtime service for document processing. -/// -/// Wraps the nvisy runtime engine and provides document loading -/// and processing capabilities for workflows. -/// -/// This service derefs to the underlying [`Engine`], allowing direct -/// access to all engine methods. -#[derive(Debug, Clone, Deref, DerefMut)] -pub struct RuntimeService { - #[deref] - #[deref_mut] - engine: Engine, -} - -impl RuntimeService { - /// Creates a new runtime service with default configuration. - #[must_use] - pub fn new() -> Self { - Self { - engine: Engine::new(), - } - } - - /// Creates a new runtime service with custom configuration. - #[must_use] - pub fn with_config(config: &RuntimeConfig) -> Self { - let engine_config = EngineConfig { - max_file_size: Some(config.max_file_size()), - ..Default::default() - }; - Self { - engine: Engine::with_config(engine_config), - } - } - - /// Returns a reference to the underlying engine. - #[must_use] - pub fn engine(&self) -> &Engine { - &self.engine - } - - /// Returns a mutable reference to the underlying engine. - #[must_use] - pub fn engine_mut(&mut self) -> &mut Engine { - &mut self.engine - } -} - -impl Default for RuntimeService { - fn default() -> Self { - Self::new() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_service_deref() { - let service = RuntimeService::new(); - // Test that we can call Engine methods directly via Deref - let _extensions = service.supported_extensions(); - let _mimes = service.supported_mime_types(); - } - - #[test] - fn test_service_with_config() { - let config = RuntimeConfig::new().with_max_file_size(10 * 1024 * 1024); - let _service = RuntimeService::with_config(&config); - } -} diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md new file mode 100644 index 0000000..9846425 --- /dev/null +++ b/docs/PROVIDERS.md @@ -0,0 +1,321 @@ +# Provider Architecture + +Data providers enable reading from and writing to external systems (storage, databases, vector stores). This document defines the architecture for implementing providers in Python while maintaining type safety with the Rust core. + +## Design Principles + +1. **Rust owns the API boundary** - All HTTP schemas defined in Rust, Python conforms to them +2. **Python owns integrations** - Provider implementations leverage Python's ecosystem +3. **Type safety across the boundary** - Schemas generated from Rust, validated in Python +4. **Async-first** - No synchronous APIs, no blocking calls +5. **Minimal coupling** - Providers are independent, share only core protocols + +## Architecture + +``` +┌────────────────────────────────────────────────────┐ +│ Rust Core │ +│ │ +│ OpenAPI Schema ◄── schemars ◄── Rust Types │ +│ │ │ │ +│ ▼ ▼ │ +│ JSON Schema files nvisy-dal traits │ +│ │ │ │ +└────────┼────────────────────────────┼──────────────┘ + │ │ + ▼ ▼ +┌────────────────────────────────────────────────────┐ +│ Python Providers │ +│ │ +│ datamodel-codegen ──► Pydantic Models │ +│ │ │ +│ ▼ │ +│ Provider Protocols │ +│ │ │ +│ ▼ │ +│ Provider Implementations │ +│ │ +└────────────────────────────────────────────────────┘ +``` + +## Schema Flow + +### 1. Define in Rust + +Schemas are defined once in Rust using `schemars`: + +```rust +#[derive(JsonSchema, Serialize, Deserialize)] +pub struct ObjectContext { + pub prefix: Option, + pub continuation_token: Option, + pub limit: Option, +} +``` + +### 2. Export to JSON Schema + +Build script exports schemas to `schemas/`: + +``` +schemas/ +├── contexts/ +│ ├── object.json +│ ├── relational.json +│ └── vector.json +├── credentials/ +│ ├── s3.json +│ ├── gcs.json +│ └── ... +└── datatypes/ + ├── blob.json + ├── document.json + └── ... +``` + +### 3. Generate Python Models + +Python models generated from JSON Schema at build time: + +```bash +uv run datamodel-codegen \ + --input schemas/ \ + --output packages/nvisy-dal-core/nvisy_dal_core/generated/ +``` + +### 4. Validate at Runtime + +Generated models used in provider implementations with Pydantic validation. + +## Provider Interface + +Providers implement async protocols for reading and writing data. + +### Input Protocol + +```python +@runtime_checkable +class DataInput(Protocol[T_co, Ctx_contra]): + """Protocol for reading data from external sources.""" + + async def read(self, ctx: Ctx_contra) -> AsyncIterator[T_co]: + """Yield items from the source based on context.""" + ... +``` + +### Output Protocol + +```python +@runtime_checkable +class DataOutput(Protocol[T_contra, Ctx_contra]): + """Protocol for writing data to external sinks.""" + + async def write(self, ctx: Ctx_contra, items: Sequence[T_contra]) -> None: + """Write a batch of items to the sink.""" + ... +``` + +### Provider Protocol + +```python +@runtime_checkable +class Provider(Protocol[Cred, Params]): + """Protocol for provider lifecycle management.""" + + @classmethod + async def connect(cls, credentials: Cred, params: Params) -> Self: + """Establish connection to the external service.""" + ... + + async def disconnect(self) -> None: + """Release resources and close connections.""" + ... +``` + +## Package Structure + +Single package with optional dependencies per provider: + +``` +packages/nvisy-dal/ +├── pyproject.toml +├── py.typed # PEP 561 marker +└── src/ + └── nvisy_dal/ + ├── __init__.py + ├── protocols.py # DataInput, DataOutput, Provider + ├── errors.py # DalError, error kinds + ├── _generated/ # From JSON Schema (committed) + │ ├── __init__.py + │ ├── contexts.py + │ └── datatypes.py + └── providers/ + ├── __init__.py + ├── s3.py + ├── gcs.py + ├── azure.py + ├── postgres.py + ├── mysql.py + ├── qdrant.py + └── pinecone.py +``` + +### Layout Rationale + +- **Single package** - Internal code, not publishing separately to PyPI +- **`src/` layout** - Prevents accidental imports from project root during development +- **Flat providers** - One module per provider, no nested input/output structure +- **`_generated/` committed** - Reproducible builds, `_` prefix indicates internal +- **Optional deps** - `pip install nvisy-dal[s3,postgres]` for selective installation + +### Dependencies + +```toml +# pyproject.toml +[project] +name = "nvisy-dal" +dependencies = [ + "pydantic>=2.0", +] + +[project.optional-dependencies] +s3 = ["boto3>=1.35", "types-boto3"] +gcs = ["google-cloud-storage>=2.18"] +azure = ["azure-storage-blob>=12.23"] +postgres = ["asyncpg>=0.30"] +mysql = ["aiomysql>=0.2"] +qdrant = ["qdrant-client>=1.12"] +pinecone = ["pinecone-client>=5.0"] +all = ["nvisy-dal[s3,gcs,azure,postgres,mysql,qdrant,pinecone]"] +dev = ["nvisy-dal[all]", "pytest>=8.0", "pytest-asyncio>=0.24", "moto>=5.0"] +``` + +## Python Standards + +### Tooling + +| Tool | Purpose | +|------|---------| +| `uv` | Package management, virtualenv, lockfile | +| `ruff` | Linting + formatting (replaces black, isort, flake8) | +| `pyright` | Type checking in strict mode | +| `pytest` | Testing with `pytest-asyncio` | + +### Configuration + +All config in `pyproject.toml`: + +```toml +[project] +requires-python = ">=3.12" + +[tool.ruff] +target-version = "py312" +line-length = 100 + +[tool.ruff.lint] +select = ["ALL"] +ignore = ["D", "ANN101", "ANN102", "COM812", "ISC001"] + +[tool.ruff.lint.isort] +known-first-party = ["nvisy_dal"] + +[tool.pyright] +pythonVersion = "3.12" +typeCheckingMode = "strict" + +[tool.pytest.ini_options] +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" +``` + +### Code Style + +- Type hints on all public APIs +- Protocols over ABCs (structural typing) +- `Final` for constants, `ClassVar` for class attributes +- `Sequence` over `list` in parameters (covariance) +- `Mapping` over `dict` in parameters +- `async def` always, no sync wrappers +- Context managers for resource cleanup +- `structlog` for structured logging + +### Error Handling + +```python +from enum import StrEnum +from typing import final + +class ErrorKind(StrEnum): + """Classification of provider errors.""" + + CONNECTION = "connection" + NOT_FOUND = "not_found" + INVALID_INPUT = "invalid_input" + TIMEOUT = "timeout" + PROVIDER = "provider" + +@final +class DalError(Exception): + """Base error for all provider operations.""" + + __slots__ = ("message", "kind", "source") + + def __init__( + self, + message: str, + kind: ErrorKind = ErrorKind.PROVIDER, + source: BaseException | None = None, + ) -> None: + super().__init__(message) + self.message = message + self.kind = kind + self.source = source +``` + +## PyO3 Bridge + +The bridge module in `nvisy-dal` handles: + +1. **Runtime management** - Python interpreter lifecycle +2. **Async bridging** - Rust futures ↔ Python coroutines +3. **Type conversion** - Via `pythonize` using shared JSON Schema +4. **Error propagation** - Python exceptions → Rust errors +5. **GIL coordination** - Release during I/O for concurrency + +### Guarantees + +- Provider methods are called with validated inputs (Pydantic) +- Outputs conform to expected schema (Pydantic serialization) +- Errors include Python traceback for debugging +- GIL released during all I/O operations + +## Testing Strategy + +### Unit Tests (Python) + +- Mock external services (`moto` for AWS, `responses` for HTTP) +- Test protocol conformance +- Test error handling paths + +### Integration Tests (Rust) + +- Test PyO3 bridge with real Python runtime +- Verify type conversion round-trips +- Test async behavior across boundary + +### Contract Tests + +- Validate generated Python models against Rust schemas +- Run on CI after schema changes + +## Adding a Provider + +1. Define credentials/params schema in Rust (`crates/nvisy-dal/src/schemas/`) +2. Export JSON Schema (`make schemas`) +3. Regenerate Python models (`make codegen`) +4. Add optional dependency to `pyproject.toml` +5. Create provider module in `src/nvisy_dal/providers/` +6. Implement `DataInput` and/or `DataOutput` protocols +7. Add unit tests with mocked external service +8. Register in PyO3 bridge diff --git a/docs/README.md b/docs/README.md index c1e5e3f..ba28667 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,12 +2,35 @@ ## Overview -Nvisy transforms uploaded files into structured, normalized representations that enable cross-file intelligence. The system focuses on four core capabilities: reading, search, comparison, and extraction. +Nvisy transforms uploaded files into structured, normalized representations that enable cross-file intelligence. The knowledge graph—not the files—is the primary asset. + +## Problem + +Document intelligence tools typically treat files as the unit of work. This prevents cross-file reasoning, entity resolution across documents, and institutional memory accumulation. + +## Design Principles + +| Principle | Description | +|-----------|-------------| +| Structure over blobs | Every file converts to machine-readable structure with content and metadata | +| Canonical representation | Single internal schema normalizes all source formats | +| Grounded reasoning | Every conclusion links to source: file, section, exact text, confidence | +| Isolation & trust | Tenant-aware embeddings, permission-filtered retrieval, audit logs | +| Time awareness | Versioned representations, semantic diffing, temporal queries | + +## Core Capabilities + +| Capability | Description | +|------------|-------------| +| Reading | Parse and normalize any supported file format | +| Search | Hybrid search combining vector, symbolic, and graph queries | +| Comparison | Identify differences, conflicts, and drift across documents | +| Extraction | Pull entities, tables, claims, and structured data | ## Documentation | Document | Description | |----------|-------------| -| [Vision](./VISION.md) | Core philosophy and design principles | | [Architecture](./ARCHITECTURE.md) | System design, data model, and technology stack | | [Intelligence](./INTELLIGENCE.md) | Cross-file reasoning, search, and extraction | +| [Providers](./PROVIDERS.md) | Data provider architecture with PyO3 | diff --git a/docs/VISION.md b/docs/VISION.md deleted file mode 100644 index d79b0f5..0000000 --- a/docs/VISION.md +++ /dev/null @@ -1,50 +0,0 @@ -# Vision & Design Principles - -## Problem Statement - -Document intelligence tools typically treat files as the unit of work. This approach prevents cross-file reasoning, entity resolution across documents, and institutional memory accumulation. - -Nvisy addresses this by transforming uploaded files into structured, normalized representations. The knowledge graph—not the files—is the primary asset. - -## Design Principles - -### 1. Structure Over Blobs - -Every file type is converted into machine-readable structure containing both content and structure (headings, tables, sections, entities). Raw files are archived; structured representations are the working data. - -### 2. Canonical Representation - -A single internal schema normalizes all source formats. This enables comparisons across documents, unified search, and cross-file reasoning regardless of original file type. - -### 3. Grounded Reasoning - -Every conclusion links back to source material: file, section, exact text, and confidence score. Without provenance, enterprise users cannot validate or trust outputs. - -### 4. Isolation & Trust - -Cross-file intelligence requires strict isolation: -- Tenant-aware embeddings (tenant data never mixed) -- Permission-filtered retrieval (filter before search, not after) -- Comprehensive audit logs -- Provenance tracking - -### 5. Time Awareness - -Documents evolve. The system maintains versioned representations and supports semantic diffing (changes in meaning, not just text) and temporal queries across document history. - -## Core Capabilities - -| Capability | Description | -|------------|-------------| -| Reading | Parse and normalize any supported file format | -| Search | Hybrid search combining vector, symbolic, and graph queries | -| Comparison | Identify differences, conflicts, and drift across documents | -| Extraction | Pull entities, tables, claims, and structured data | - -## Differentiation - -The knowledge graph compounds over time. Tools that process files in isolation cannot replicate: -- Evolving cross-file graphs -- Entity resolution across time and authors -- Institutional memory accumulation -- Continuous learning from document corpus diff --git a/packages/nvisy-dal/README.md b/packages/nvisy-dal/README.md new file mode 100644 index 0000000..604f363 --- /dev/null +++ b/packages/nvisy-dal/README.md @@ -0,0 +1,151 @@ +# nvisy-dal + +Data abstraction layer for external integrations. Provides unified async interfaces for storage, databases, and vector stores. + +## Installation + +```bash +# Core package +uv add nvisy-dal + +# With specific providers +uv add "nvisy-dal[postgres,s3,pinecone]" + +# All providers +uv add "nvisy-dal[all]" +``` + +## Available Providers + +| Provider | Extra | Description | +|----------|-------|-------------| +| PostgreSQL | `postgres` | Relational database via asyncpg | +| MySQL | `mysql` | Relational database via aiomysql | +| S3 | `s3` | Object storage (AWS S3, MinIO) | +| GCS | `gcs` | Google Cloud Storage | +| Azure Blob | `azure` | Azure Blob Storage | +| Qdrant | `qdrant` | Vector database | +| Pinecone | `pinecone` | Vector database | + +## Usage + +```python +from nvisy_dal import Provider, DataInput, DataOutput +from nvisy_dal.providers.postgres import PostgresProvider, PostgresCredentials, PostgresParams + +# Connect to provider +provider = await PostgresProvider.connect( + credentials=PostgresCredentials( + host="localhost", + port=5432, + user="postgres", + password="password", + database="mydb", + ), + params=PostgresParams(table="users"), +) + +# Read data +async for record in provider.read(ctx): + print(record) + +# Write data +await provider.write(ctx, records) + +# Disconnect +await provider.disconnect() +``` + +## Architecture + +This package is the Python half of the nvisy DAL system: + +- **Rust (nvisy-dal crate)**: Streaming, observability, unified interface, server integration +- **Python (nvisy-dal package)**: Provider implementations, client libraries, external integrations + +The Rust layer loads this package via PyO3 to delegate actual provider calls to Python. + +## Protocols + +All providers implement these core protocols: + +```python +class Provider(Protocol[Cred, Params]): + @classmethod + async def connect(cls, credentials: Cred, params: Params) -> Self: ... + async def disconnect(self) -> None: ... + +class DataInput(Protocol[T, Ctx]): + async def read(self, ctx: Ctx) -> AsyncIterator[T]: ... + +class DataOutput(Protocol[T, Ctx]): + async def write(self, ctx: Ctx, items: Sequence[T]) -> None: ... +``` + +## Development + +```bash +# Install dev dependencies +uv sync --extra dev + +# Run tests +uv run pytest + +# Type check +uv run pyright + +# Lint +uv run ruff check . +``` + +## TODO + +- [x] Core protocols and error types + +### Relational Databases +- [ ] PostgreSQL provider +- [ ] MySQL provider +- [ ] SQLite provider +- [ ] SQL Server provider +- [ ] Oracle provider + +### Object Storage +- [ ] S3 provider +- [ ] GCS provider +- [ ] Azure Blob provider +- [ ] MinIO provider +- [ ] Cloudflare R2 provider + +### Vector Databases +- [ ] Pinecone provider +- [ ] Qdrant provider +- [ ] Weaviate provider +- [ ] Milvus provider +- [ ] Chroma provider +- [ ] pgvector provider + +### Document Databases +- [ ] MongoDB provider +- [ ] DynamoDB provider +- [ ] Firestore provider +- [ ] CouchDB provider + +### Key-Value Stores +- [ ] Redis provider +- [ ] Memcached provider +- [ ] etcd provider + +### Message Queues +- [ ] Kafka provider +- [ ] RabbitMQ provider +- [ ] NATS provider +- [ ] SQS provider + +### Graph Databases +- [ ] Neo4j provider +- [ ] Neptune provider + +### Search Engines +- [ ] Elasticsearch provider +- [ ] OpenSearch provider +- [ ] Algolia provider diff --git a/packages/nvisy-dal/pyproject.toml b/packages/nvisy-dal/pyproject.toml index c0ce344..9440352 100644 --- a/packages/nvisy-dal/pyproject.toml +++ b/packages/nvisy-dal/pyproject.toml @@ -6,13 +6,13 @@ requires-python = ">=3.12" dependencies = ["pydantic>=2.10"] [project.optional-dependencies] -s3 = ["boto3>=1.35", "types-boto3"] +s3 = ["boto3>=1.35", "boto3-stubs[s3]"] gcs = ["google-cloud-storage>=2.18"] azure = ["azure-storage-blob>=12.23"] -postgres = ["asyncpg>=0.30"] +postgres = ["asyncpg>=0.30", "asyncpg-stubs>=0.30"] mysql = ["aiomysql>=0.2"] qdrant = ["qdrant-client>=1.12"] -pinecone = ["pinecone-client>=5.0"] +pinecone = ["pinecone>=5.0"] all = ["nvisy-dal[s3,gcs,azure,postgres,mysql,qdrant,pinecone]"] dev = ["nvisy-dal[all]", "pytest>=8.0", "pytest-asyncio>=0.24", "moto>=5.0"] @@ -37,6 +37,8 @@ known-first-party = ["nvisy_dal"] [tool.basedpyright] pythonVersion = "3.12" typeCheckingMode = "strict" +# Third-party libraries (boto3, pinecone) have incomplete type stubs with **kwargs: Unknown +reportUnknownMemberType = "warning" [tool.pytest.ini_options] asyncio_mode = "auto" diff --git a/packages/nvisy-dal/src/nvisy_dal/_generated/__init__.py b/packages/nvisy-dal/src/nvisy_dal/_generated/__init__.py deleted file mode 100644 index b405340..0000000 --- a/packages/nvisy-dal/src/nvisy_dal/_generated/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Generated types from Rust JSON schemas. - -This module contains Pydantic models generated from the Rust schema definitions. -Do not edit manually - regenerate with `make codegen`. -""" - -from nvisy_dal._generated.contexts import ObjectContext, RelationalContext, VectorContext - -__all__ = [ - "ObjectContext", - "RelationalContext", - "VectorContext", -] diff --git a/packages/nvisy-dal/src/nvisy_dal/errors.py b/packages/nvisy-dal/src/nvisy_dal/errors.py index a05f6ac..4a40404 100644 --- a/packages/nvisy-dal/src/nvisy_dal/errors.py +++ b/packages/nvisy-dal/src/nvisy_dal/errors.py @@ -1,7 +1,7 @@ """Error types for provider operations.""" from enum import StrEnum -from typing import final +from typing import final, override class ErrorKind(StrEnum): @@ -23,6 +23,7 @@ class DalError(Exception): def __init__( self, message: str, + *, kind: ErrorKind = ErrorKind.PROVIDER, source: BaseException | None = None, ) -> None: @@ -31,5 +32,6 @@ def __init__( self.kind = kind self.source = source + @override def __repr__(self) -> str: return f"DalError({self.message!r}, kind={self.kind!r})" diff --git a/packages/nvisy-dal/src/nvisy_dal/generated/__init__.py b/packages/nvisy-dal/src/nvisy_dal/generated/__init__.py new file mode 100644 index 0000000..92878c8 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/generated/__init__.py @@ -0,0 +1,48 @@ +"""Generated types from Rust JSON schemas. + +This module contains Pydantic models generated from the Rust schema definitions. +Do not edit manually - regenerate with `make codegen`. +""" + +from nvisy_dal.generated.contexts import ObjectContext, RelationalContext, VectorContext +from nvisy_dal.generated.datatypes import ( + Document, + Edge, + Embedding, + Graph, + JsonValue, + Message, + Metadata, + Node, + Object, + Record, +) +from nvisy_dal.generated.params import ( + DistanceMetric, + ObjectParams, + RelationalParams, + VectorParams, +) + +__all__ = [ + # Contexts (runtime state) + "ObjectContext", + "RelationalContext", + "VectorContext", + # Params (configuration) + "DistanceMetric", + "ObjectParams", + "RelationalParams", + "VectorParams", + # Data types + "Document", + "Edge", + "Embedding", + "Graph", + "JsonValue", + "Message", + "Metadata", + "Node", + "Object", + "Record", +] diff --git a/packages/nvisy-dal/src/nvisy_dal/_generated/contexts.py b/packages/nvisy-dal/src/nvisy_dal/generated/contexts.py similarity index 62% rename from packages/nvisy-dal/src/nvisy_dal/_generated/contexts.py rename to packages/nvisy-dal/src/nvisy_dal/generated/contexts.py index cd325f6..34af2b6 100644 --- a/packages/nvisy-dal/src/nvisy_dal/_generated/contexts.py +++ b/packages/nvisy-dal/src/nvisy_dal/generated/contexts.py @@ -7,25 +7,23 @@ class ObjectContext(BaseModel, frozen=True): - """Context for object storage operations.""" + """Context for object storage operations (S3, GCS, Azure Blob).""" prefix: str | None = None - continuation_token: str | None = None + token: str | None = None limit: int | None = None class RelationalContext(BaseModel, frozen=True): - """Context for relational database operations.""" + """Context for relational database operations (Postgres, MySQL).""" - table: str cursor: str | None = None tiebreaker: str | None = None limit: int | None = None class VectorContext(BaseModel, frozen=True): - """Context for vector store operations.""" + """Context for vector database operations (Qdrant, Pinecone, pgvector).""" - collection: str - cursor: str | None = None + token: str | None = None limit: int | None = None diff --git a/packages/nvisy-dal/src/nvisy_dal/generated/datatypes.py b/packages/nvisy-dal/src/nvisy_dal/generated/datatypes.py new file mode 100644 index 0000000..e8b843b --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/generated/datatypes.py @@ -0,0 +1,77 @@ +"""Data types for provider input/output. + +Generated from Rust schemas. Do not edit manually. +""" + +from pydantic import BaseModel, Field + +# JSON-compatible value type (matches serde_json::Value) +type JsonValue = str | int | float | bool | None | list["JsonValue"] | dict[str, "JsonValue"] + +# Metadata associated with data items +type Metadata = dict[str, JsonValue] + + +class Object(BaseModel): + """An object representing a file or binary data (S3, GCS, Azure Blob).""" + + path: str + data: bytes + content_type: str | None = None + metadata: Metadata = Field(default_factory=dict) + + +class Document(BaseModel): + """A document with flexible JSON content.""" + + id: str + content: JsonValue + metadata: Metadata = Field(default_factory=dict) + + +class Embedding(BaseModel): + """A vector embedding with metadata.""" + + id: str + vector: list[float] + metadata: Metadata = Field(default_factory=dict) + + +class Record(BaseModel): + """A record representing a row in a relational table.""" + + columns: dict[str, JsonValue] = Field(default_factory=dict) + + +class Message(BaseModel): + """A message from a queue or stream.""" + + id: str + payload: bytes + headers: dict[str, str] = Field(default_factory=dict) + timestamp: str | None = None + + +class Node(BaseModel): + """A node in a graph.""" + + id: str + labels: list[str] = Field(default_factory=list) + properties: dict[str, JsonValue] = Field(default_factory=dict) + + +class Edge(BaseModel): + """An edge in a graph.""" + + id: str + from_: str = Field(alias="from") + to: str + label: str + properties: dict[str, JsonValue] = Field(default_factory=dict) + + +class Graph(BaseModel): + """A graph containing nodes and edges.""" + + nodes: list[Node] = Field(default_factory=list) + edges: list[Edge] = Field(default_factory=list) diff --git a/packages/nvisy-dal/src/nvisy_dal/generated/params.py b/packages/nvisy-dal/src/nvisy_dal/generated/params.py new file mode 100644 index 0000000..db7b784 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/generated/params.py @@ -0,0 +1,42 @@ +"""Parameter types for provider configuration. + +Generated from Rust schemas. Do not edit manually. +""" + +from enum import Enum + +from pydantic import BaseModel, Field + + +class RelationalParams(BaseModel, frozen=True): + """Common parameters for relational database operations.""" + + table: str | None = None + cursor_column: str | None = None + tiebreaker_column: str | None = None + batch_size: int = Field(default=1000) + + +class ObjectParams(BaseModel, frozen=True): + """Common parameters for object storage operations.""" + + bucket: str | None = None + prefix: str | None = None + batch_size: int = Field(default=1000) + + +class DistanceMetric(str, Enum): + """Distance metric for vector similarity search.""" + + COSINE = "cosine" + EUCLIDEAN = "euclidean" + DOT_PRODUCT = "dot_product" + + +class VectorParams(BaseModel, frozen=True): + """Common parameters for vector database operations.""" + + collection: str | None = None + dimension: int | None = None + metric: DistanceMetric = DistanceMetric.COSINE + batch_size: int = Field(default=1000) diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py b/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py index a4e3347..7510fd5 100644 --- a/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py +++ b/packages/nvisy-dal/src/nvisy_dal/providers/__init__.py @@ -1 +1,18 @@ -"""Provider implementations for external services.""" +"""Provider implementations for external services. + +Each provider module exports a `Provider` class alias for the main provider class, +along with its credentials, params, and context types. + +Available providers (require optional dependencies): +- postgres: PostgreSQL via asyncpg +- s3: AWS S3 / MinIO via boto3 +- pinecone: Pinecone vector database +""" + +from nvisy_dal.providers import pinecone, postgres, s3 + +__all__ = [ + "pinecone", + "postgres", + "s3", +] diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/pinecone.py b/packages/nvisy-dal/src/nvisy_dal/providers/pinecone.py new file mode 100644 index 0000000..223359e --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/providers/pinecone.py @@ -0,0 +1,104 @@ +"""Pinecone vector database provider.""" + +from collections.abc import Mapping, Sequence +from typing import TYPE_CHECKING, ClassVar, Self, cast + +from pydantic import BaseModel + +from nvisy_dal.errors import DalError, ErrorKind + +if TYPE_CHECKING: + from pinecone import Pinecone + from pinecone.db_data.index import Index + +try: + from pinecone import Pinecone, UpsertResponse, Vector +except ImportError as e: + _msg = "pinecone is required for Pinecone support. Install with: uv add 'nvisy-dal[pinecone]'" + raise ImportError(_msg) from e + +# Pinecone metadata value types +type MetadataValue = str | int | float | list[str] | list[int] | list[float] +type Metadata = Mapping[str, MetadataValue] + + +class PineconeCredentials(BaseModel): + """Credentials for Pinecone connection.""" + + api_key: str + + +class PineconeParams(BaseModel): + """Parameters for Pinecone operations.""" + + index_name: str + namespace: str = "" + + +class PineconeVector(BaseModel): + """Representation of a Pinecone vector.""" + + id: str + values: list[float] + metadata: dict[str, MetadataValue] | None = None + + +class PineconeProvider: + """Pinecone provider for vector upsert operations.""" + + __slots__: ClassVar[tuple[str, str, str]] = ("_client", "_index", "_params") + + _client: "Pinecone" + _index: "Index" + _params: PineconeParams + + def __init__(self, client: "Pinecone", index: "Index", params: PineconeParams) -> None: + self._client = client + self._index = index + self._params = params + + @classmethod + async def connect(cls, credentials: PineconeCredentials, params: PineconeParams) -> Self: + """Create Pinecone client and connect to index.""" + try: + client = Pinecone(api_key=credentials.api_key) + index = client.Index(params.index_name) # pyright: ignore[reportUnknownMemberType] + # Verify connection + _ = index.describe_index_stats() # pyright: ignore[reportUnknownMemberType] + except Exception as e: + msg = f"Failed to connect to Pinecone: {e}" + raise DalError(msg, kind=ErrorKind.CONNECTION, source=e) from e + + return cls(client, index, params) + + async def disconnect(self) -> None: + """Close the Pinecone client (no-op).""" + + async def upsert(self, vectors: Sequence[PineconeVector]) -> int: + """Upsert vectors to Pinecone. Returns count of upserted vectors.""" + if not vectors: + return 0 + + try: + records = [Vector(id=v.id, values=v.values, metadata=v.metadata) for v in vectors] + + upserted = 0 + batch_size = 100 + for i in range(0, len(records), batch_size): + batch = list(records[i : i + batch_size]) + response = cast( + UpsertResponse, + self._index.upsert( # pyright: ignore[reportUnknownMemberType] + vectors=batch, + namespace=self._params.namespace, + ), + ) + upserted += response.upserted_count or len(batch) + except Exception as e: + msg = f"Failed to upsert to Pinecone: {e}" + raise DalError(msg, source=e) from e + else: + return upserted + + +Provider = PineconeProvider diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py b/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py new file mode 100644 index 0000000..6a301f8 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py @@ -0,0 +1,178 @@ +"""PostgreSQL provider using asyncpg.""" + +from collections.abc import AsyncIterator, Sequence +from typing import TYPE_CHECKING, ClassVar, Self + +from pydantic import BaseModel + +from nvisy_dal.errors import DalError, ErrorKind + +if TYPE_CHECKING: + from asyncpg import Pool + +try: + import asyncpg +except ImportError as e: + _msg = "asyncpg is required for PostgreSQL support. Install with: uv add 'nvisy-dal[postgres]'" + raise ImportError(_msg) from e + + +class PostgresCredentials(BaseModel): + """Credentials for PostgreSQL connection.""" + + host: str = "localhost" + port: int = 5432 + user: str = "postgres" + password: str + database: str + + +class PostgresParams(BaseModel): + """Parameters for PostgreSQL operations.""" + + table: str + schema_name: str = "public" + batch_size: int = 1000 + + +class PostgresContext(BaseModel): + """Context for read/write operations.""" + + columns: list[str] | None = None + where: dict[str, object] | None = None + order_by: str | None = None + limit: int | None = None + offset: int | None = None + + +class PostgresProvider: + """PostgreSQL provider for relational data operations.""" + + __slots__: ClassVar[tuple[str, str]] = ("_params", "_pool") + + _params: PostgresParams + _pool: "Pool" + + def __init__(self, pool: "Pool", params: PostgresParams) -> None: + self._pool = pool + self._params = params + + @classmethod + async def connect( + cls, + credentials: PostgresCredentials, + params: PostgresParams, + ) -> Self: + """Establish connection pool to PostgreSQL.""" + try: + pool = await asyncpg.create_pool( + host=credentials.host, + port=credentials.port, + user=credentials.user, + password=credentials.password, + database=credentials.database, + min_size=1, + max_size=10, + ) + except Exception as e: + msg = f"Failed to connect to PostgreSQL: {e}" + raise DalError(msg, kind=ErrorKind.CONNECTION, source=e) from e + + return cls(pool, params) + + async def disconnect(self) -> None: + """Close the connection pool.""" + await self._pool.close() + + async def read(self, ctx: PostgresContext) -> AsyncIterator[dict[str, object]]: + """Read records from the database using parameterized queries.""" + try: + async with self._pool.acquire() as conn: + # Build query with proper parameter binding + columns = ", ".join(f'"{c}"' for c in ctx.columns) if ctx.columns else "*" + table = f'"{self._params.schema_name}"."{self._params.table}"' + + query_parts: list[str] = [f"SELECT {columns} FROM {table}"] # noqa: S608 + params: list[object] = [] + + if ctx.where: + conditions: list[str] = [] + for key, value in ctx.where.items(): + if value is None: + conditions.append(f'"{key}" IS NULL') + else: + params.append(value) + conditions.append(f'"{key}" = ${len(params)}') + if conditions: + query_parts.append("WHERE " + " AND ".join(conditions)) + + if ctx.order_by: + # Order by should be validated/sanitized by caller + query_parts.append(f"ORDER BY {ctx.order_by}") + + if ctx.limit is not None: + params.append(ctx.limit) + query_parts.append(f"LIMIT ${len(params)}") + + if ctx.offset is not None: + params.append(ctx.offset) + query_parts.append(f"OFFSET ${len(params)}") + + query = " ".join(query_parts) + async for record in conn.cursor(query, *params): + yield dict(record) + except Exception as e: + msg = f"Failed to read from PostgreSQL: {e}" + raise DalError(msg, source=e) from e + + async def write(self, items: Sequence[dict[str, object]]) -> None: + """Write records to the database.""" + if not items: + return + + columns = list(items[0].keys()) + placeholders = ", ".join(f"${i + 1}" for i in range(len(columns))) + column_names = ", ".join(f'"{c}"' for c in columns) + table = f'"{self._params.schema_name}"."{self._params.table}"' + query = f"INSERT INTO {table} ({column_names}) VALUES ({placeholders})" # noqa: S608 + + try: + async with self._pool.acquire() as conn: + for i in range(0, len(items), self._params.batch_size): + batch = items[i : i + self._params.batch_size] + await conn.executemany(query, [tuple(item.values()) for item in batch]) + except Exception as e: + msg = f"Failed to write to PostgreSQL: {e}" + raise DalError(msg, source=e) from e + + async def execute(self, query: str, *args: object) -> str: + """Execute a raw SQL query.""" + try: + async with self._pool.acquire() as conn: + return await conn.execute(query, *args) + except Exception as e: + msg = f"Failed to execute query: {e}" + raise DalError(msg, source=e) from e + + async def fetch_one(self, query: str, *args: object) -> dict[str, object] | None: + """Fetch a single record.""" + try: + async with self._pool.acquire() as conn: + record = await conn.fetchrow(query, *args) + return dict(record) if record else None + except Exception as e: + msg = f"Failed to fetch record: {e}" + raise DalError(msg, source=e) from e + + async def fetch_all(self, query: str, *args: object) -> list[dict[str, object]]: + """Fetch all records.""" + try: + async with self._pool.acquire() as conn: + records = await conn.fetch(query, *args) + return [dict(record) for record in records] + except Exception as e: + msg = f"Failed to fetch records: {e}" + raise DalError(msg, source=e) from e + + +Provider = PostgresProvider diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/s3.py b/packages/nvisy-dal/src/nvisy_dal/providers/s3.py new file mode 100644 index 0000000..ed1bd62 --- /dev/null +++ b/packages/nvisy-dal/src/nvisy_dal/providers/s3.py @@ -0,0 +1,242 @@ +"""S3 provider using boto3.""" + +from collections.abc import AsyncIterator, Sequence +from typing import TYPE_CHECKING, ClassVar, Self + +from pydantic import BaseModel + +from nvisy_dal.errors import DalError, ErrorKind + +if TYPE_CHECKING: + from mypy_boto3_s3 import S3Client + +try: + import boto3 + from botocore.exceptions import ClientError +except ImportError as e: + _msg = "boto3 is required for S3 support. Install with: uv add 'nvisy-dal[s3]'" + raise ImportError(_msg) from e + + +class S3Credentials(BaseModel): + """Credentials for S3 connection.""" + + access_key_id: str + secret_access_key: str + region: str = "us-east-1" + endpoint_url: str | None = None + + +class S3Params(BaseModel): + """Parameters for S3 operations.""" + + bucket: str + prefix: str = "" + + +class S3Context(BaseModel): + """Context for read/write operations.""" + + key: str | None = None + prefix: str | None = None + max_keys: int = 1000 + content_type: str = "application/octet-stream" + + +class S3Object(BaseModel): + """Representation of an S3 object.""" + + key: str + size: int + last_modified: str + etag: str + content: bytes | None = None + + +class S3Provider: + """S3 provider for object storage operations.""" + + __slots__: ClassVar[tuple[str, str]] = ("_client", "_params") + + _client: "S3Client" + _params: S3Params + + def __init__(self, client: "S3Client", params: S3Params) -> None: + self._client = client + self._params = params + + @classmethod + async def connect(cls, credentials: S3Credentials, params: S3Params) -> Self: + """Create S3 client.""" + try: + client: S3Client = boto3.client( # pyright: ignore[reportUnknownMemberType] + "s3", + aws_access_key_id=credentials.access_key_id, + aws_secret_access_key=credentials.secret_access_key, + region_name=credentials.region, + endpoint_url=credentials.endpoint_url, + ) + # Verify connection by checking bucket exists + _ = client.head_bucket(Bucket=params.bucket) + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code", "Unknown") + if error_code == "404": + msg = f"Bucket '{params.bucket}' not found" + raise DalError(msg, kind=ErrorKind.NOT_FOUND, source=e) from e + msg = f"Failed to connect to S3: {e}" + raise DalError(msg, kind=ErrorKind.CONNECTION, source=e) from e + except Exception as e: + msg = f"Failed to connect to S3: {e}" + raise DalError(msg, kind=ErrorKind.CONNECTION, source=e) from e + + return cls(client, params) + + async def disconnect(self) -> None: + """Close the S3 client (no-op for boto3).""" + + async def read(self, ctx: S3Context) -> AsyncIterator[S3Object]: + """List and optionally read objects from S3.""" + prefix = ctx.prefix or self._params.prefix + continuation_token: str | None = None + + try: + while True: + if continuation_token: + response = self._client.list_objects_v2( + Bucket=self._params.bucket, + Prefix=prefix, + MaxKeys=ctx.max_keys, + ContinuationToken=continuation_token, + ) + else: + response = self._client.list_objects_v2( + Bucket=self._params.bucket, + Prefix=prefix, + MaxKeys=ctx.max_keys, + ) + + for obj in response.get("Contents", []): + obj_key = obj.get("Key") + obj_size = obj.get("Size") + obj_modified = obj.get("LastModified") + obj_etag = obj.get("ETag") + + if not obj_key or obj_size is None or not obj_modified or not obj_etag: + continue + + content = None + if ctx.key and obj_key == ctx.key: + get_response = self._client.get_object( + Bucket=self._params.bucket, + Key=obj_key, + ) + content = get_response["Body"].read() + + yield S3Object( + key=obj_key, + size=obj_size, + last_modified=obj_modified.isoformat(), + etag=obj_etag.strip('"'), + content=content, + ) + + if not response.get("IsTruncated"): + break + + continuation_token = response.get("NextContinuationToken") + + except ClientError as e: + msg = f"Failed to read from S3: {e}" + raise DalError(msg, source=e) from e + + async def write(self, ctx: S3Context, items: Sequence[S3Object]) -> None: + """Write objects to S3.""" + try: + for item in items: + if item.content is None: + continue + + key = self._resolve_key(item.key) + _ = self._client.put_object( + Bucket=self._params.bucket, + Key=key, + Body=item.content, + ContentType=ctx.content_type, + ) + except ClientError as e: + msg = f"Failed to write to S3: {e}" + raise DalError(msg, source=e) from e + + async def get(self, key: str) -> bytes: + """Get object content by key.""" + try: + full_key = self._resolve_key(key) + response = self._client.get_object( + Bucket=self._params.bucket, + Key=full_key, + ) + return response["Body"].read() + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code", "Unknown") + if error_code == "NoSuchKey": + msg = f"Object '{key}' not found" + raise DalError(msg, kind=ErrorKind.NOT_FOUND, source=e) from e + msg = f"Failed to get object: {e}" + raise DalError(msg, source=e) from e + + async def put( + self, + key: str, + content: bytes, + content_type: str = "application/octet-stream", + ) -> None: + """Put object content by key.""" + try: + full_key = self._resolve_key(key) + _ = self._client.put_object( + Bucket=self._params.bucket, + Key=full_key, + Body=content, + ContentType=content_type, + ) + except ClientError as e: + msg = f"Failed to put object: {e}" + raise DalError(msg, source=e) from e + + async def delete(self, key: str) -> None: + """Delete object by key.""" + try: + full_key = self._resolve_key(key) + _ = self._client.delete_object( + Bucket=self._params.bucket, + Key=full_key, + ) + except ClientError as e: + msg = f"Failed to remove object: {e}" + raise DalError(msg, source=e) from e + + async def exists(self, key: str) -> bool: + """Check if object exists.""" + try: + full_key = self._resolve_key(key) + _ = self._client.head_object( + Bucket=self._params.bucket, + Key=full_key, + ) + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code", "Unknown") + if error_code == "404": + return False + msg = f"Failed to check object existence: {e}" + raise DalError(msg, source=e) from e + else: + return True + + def _resolve_key(self, key: str) -> str: + """Resolve key with prefix if needed.""" + if self._params.prefix and not key.startswith(self._params.prefix): + return f"{self._params.prefix}{key}" + return key + + +Provider = S3Provider diff --git a/packages/nvisy-dal/src/nvisy_dal/py.typed b/packages/nvisy-dal/src/nvisy_dal/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/packages/nvisy-dal/uv.lock b/packages/nvisy-dal/uv.lock index b2f9501..6c5d179 100644 --- a/packages/nvisy-dal/uv.lock +++ b/packages/nvisy-dal/uv.lock @@ -81,6 +81,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062 }, ] +[[package]] +name = "asyncpg-stubs" +version = "0.31.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asyncpg" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e0/e1/a51adefd76533eeff03d442bb4acbc96c2e27e04c85ce4be410b2ea92f33/asyncpg_stubs-0.31.1.tar.gz", hash = "sha256:6d7342417f867365c98b67d5ae40cb57ce6b2a9eb921fff39d9296961fca18be", size = 20591 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/ab/2301aace8c32be52832f3af75aadfd3c8516b8e7764ba8fa82c6008a99aa/asyncpg_stubs-0.31.1-py3-none-any.whl", hash = "sha256:96c0cf3786948f313207b990d26bf3430daf385ca2913ba65d9dd0ede6bf8bf4", size = 27651 }, +] + [[package]] name = "azure-core" version = "1.38.0" @@ -123,6 +136,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/55/25c543864abc270f5fdd7814fa7b69fd23de1c40fb3d7993f4b6391f8d3b/boto3-1.42.34-py3-none-any.whl", hash = "sha256:db3fb539e3f806b911ec4ca991f2f8bff333c5f0b87132a82e28b521fc5ec164", size = 140574 }, ] +[[package]] +name = "boto3-stubs" +version = "1.42.34" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore-stubs" }, + { name = "types-s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/e4/959e63b009194cae2fad6ddff8ef1c0e7e2f9113bca4c7ec20fa579e4d7a/boto3_stubs-1.42.34.tar.gz", hash = "sha256:fafcc3713c331bac11bf55fe913e5a3a01820f0cde640cfc4694df5a94aa9557", size = 100898 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/c4/1aba1653afc3cf5ef985235cea05d3e9e6736033f10ebbf102a23fc0152d/boto3_stubs-1.42.34-py3-none-any.whl", hash = "sha256:eb98cf3cc0a74ed75ea4945152cf10da57c8c9628104a13db16cde10176219ab", size = 69782 }, +] + +[package.optional-dependencies] +s3 = [ + { name = "mypy-boto3-s3" }, +] + [[package]] name = "botocore" version = "1.42.34" @@ -688,6 +719,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/2f/f50892fdb28097917b87d358a5fcefd30976289884ff142893edcb0243ba/moto-5.1.20-py3-none-any.whl", hash = "sha256:58c82c8e6b2ef659ef3a562fa415dce14da84bc7a797943245d9a338496ea0ea", size = 6392751 }, ] +[[package]] +name = "mypy-boto3-s3" +version = "1.42.21" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a0/32/aa7208348dc8db8bd4ea357e5e6e1e8bcba44419033d03456c3b767a6c98/mypy_boto3_s3-1.42.21.tar.gz", hash = "sha256:cab71c918aac7d98c4d742544c722e37d8e7178acb8bc88a0aead7b1035026d2", size = 76024 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/c0/01babfa8cef5f992a2a0f3d52fc1123fbbc336ab6decfdfc8f702e88a8af/mypy_boto3_s3-1.42.21-py3-none-any.whl", hash = "sha256:f5b7d1ed718ba5b00f67e95a9a38c6a021159d3071ea235e6cf496e584115ded", size = 83169 }, +] + [[package]] name = "nodeenv" version = "1.10.0" @@ -770,12 +810,13 @@ dependencies = [ all = [ { name = "aiomysql" }, { name = "asyncpg" }, + { name = "asyncpg-stubs" }, { name = "azure-storage-blob" }, { name = "boto3" }, + { name = "boto3-stubs", extra = ["s3"] }, { name = "google-cloud-storage" }, - { name = "pinecone-client" }, + { name = "pinecone" }, { name = "qdrant-client" }, - { name = "types-boto3" }, ] azure = [ { name = "azure-storage-blob" }, @@ -783,15 +824,16 @@ azure = [ dev = [ { name = "aiomysql" }, { name = "asyncpg" }, + { name = "asyncpg-stubs" }, { name = "azure-storage-blob" }, { name = "boto3" }, + { name = "boto3-stubs", extra = ["s3"] }, { name = "google-cloud-storage" }, { name = "moto" }, - { name = "pinecone-client" }, + { name = "pinecone" }, { name = "pytest" }, { name = "pytest-asyncio" }, { name = "qdrant-client" }, - { name = "types-boto3" }, ] gcs = [ { name = "google-cloud-storage" }, @@ -800,17 +842,18 @@ mysql = [ { name = "aiomysql" }, ] pinecone = [ - { name = "pinecone-client" }, + { name = "pinecone" }, ] postgres = [ { name = "asyncpg" }, + { name = "asyncpg-stubs" }, ] qdrant = [ { name = "qdrant-client" }, ] s3 = [ { name = "boto3" }, - { name = "types-boto3" }, + { name = "boto3-stubs", extra = ["s3"] }, ] [package.dev-dependencies] @@ -823,18 +866,19 @@ dev = [ requires-dist = [ { name = "aiomysql", marker = "extra == 'mysql'", specifier = ">=0.2" }, { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.30" }, + { name = "asyncpg-stubs", marker = "extra == 'postgres'", specifier = ">=0.30" }, { name = "azure-storage-blob", marker = "extra == 'azure'", specifier = ">=12.23" }, { name = "boto3", marker = "extra == 's3'", specifier = ">=1.35" }, + { name = "boto3-stubs", extras = ["s3"], marker = "extra == 's3'" }, { name = "google-cloud-storage", marker = "extra == 'gcs'", specifier = ">=2.18" }, { name = "moto", marker = "extra == 'dev'", specifier = ">=5.0" }, { name = "nvisy-dal", extras = ["all"], marker = "extra == 'dev'" }, { name = "nvisy-dal", extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone"], marker = "extra == 'all'" }, - { name = "pinecone-client", marker = "extra == 'pinecone'", specifier = ">=5.0" }, + { name = "pinecone", marker = "extra == 'pinecone'", specifier = ">=5.0" }, { name = "pydantic", specifier = ">=2.10" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24" }, { name = "qdrant-client", marker = "extra == 'qdrant'", specifier = ">=1.12" }, - { name = "types-boto3", marker = "extra == 's3'" }, ] provides-extras = ["s3", "gcs", "azure", "postgres", "mysql", "qdrant", "pinecone", "all", "dev"] @@ -844,29 +888,97 @@ dev = [ { name = "ruff", specifier = ">=0.14.14" }, ] +[[package]] +name = "orjson" +version = "3.11.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/04/b8/333fdb27840f3bf04022d21b654a35f58e15407183aeb16f3b41aa053446/orjson-3.11.5.tar.gz", hash = "sha256:82393ab47b4fe44ffd0a7659fa9cfaacc717eb617c93cde83795f14af5c2e9d5", size = 5972347 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a4/8052a029029b096a78955eadd68ab594ce2197e24ec50e6b6d2ab3f4e33b/orjson-3.11.5-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:334e5b4bff9ad101237c2d799d9fd45737752929753bf4faf4b207335a416b7d", size = 245347 }, + { url = "https://files.pythonhosted.org/packages/64/67/574a7732bd9d9d79ac620c8790b4cfe0717a3d5a6eb2b539e6e8995e24a0/orjson-3.11.5-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:ff770589960a86eae279f5d8aa536196ebda8273a2a07db2a54e82b93bc86626", size = 129435 }, + { url = "https://files.pythonhosted.org/packages/52/8d/544e77d7a29d90cf4d9eecd0ae801c688e7f3d1adfa2ebae5e1e94d38ab9/orjson-3.11.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed24250e55efbcb0b35bed7caaec8cedf858ab2f9f2201f17b8938c618c8ca6f", size = 132074 }, + { url = "https://files.pythonhosted.org/packages/6e/57/b9f5b5b6fbff9c26f77e785baf56ae8460ef74acdb3eae4931c25b8f5ba9/orjson-3.11.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a66d7769e98a08a12a139049aac2f0ca3adae989817f8c43337455fbc7669b85", size = 130520 }, + { url = "https://files.pythonhosted.org/packages/f6/6d/d34970bf9eb33f9ec7c979a262cad86076814859e54eb9a059a52f6dc13d/orjson-3.11.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:86cfc555bfd5794d24c6a1903e558b50644e5e68e6471d66502ce5cb5fdef3f9", size = 136209 }, + { url = "https://files.pythonhosted.org/packages/e7/39/bc373b63cc0e117a105ea12e57280f83ae52fdee426890d57412432d63b3/orjson-3.11.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a230065027bc2a025e944f9d4714976a81e7ecfa940923283bca7bbc1f10f626", size = 139837 }, + { url = "https://files.pythonhosted.org/packages/cb/aa/7c4818c8d7d324da220f4f1af55c343956003aa4d1ce1857bdc1d396ba69/orjson-3.11.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b29d36b60e606df01959c4b982729c8845c69d1963f88686608be9ced96dbfaa", size = 137307 }, + { url = "https://files.pythonhosted.org/packages/46/bf/0993b5a056759ba65145effe3a79dd5a939d4a070eaa5da2ee3180fbb13f/orjson-3.11.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c74099c6b230d4261fdc3169d50efc09abf38ace1a42ea2f9994b1d79153d477", size = 139020 }, + { url = "https://files.pythonhosted.org/packages/65/e8/83a6c95db3039e504eda60fc388f9faedbb4f6472f5aba7084e06552d9aa/orjson-3.11.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e697d06ad57dd0c7a737771d470eedc18e68dfdefcdd3b7de7f33dfda5b6212e", size = 141099 }, + { url = "https://files.pythonhosted.org/packages/b9/b4/24fdc024abfce31c2f6812973b0a693688037ece5dc64b7a60c1ce69e2f2/orjson-3.11.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e08ca8a6c851e95aaecc32bc44a5aa75d0ad26af8cdac7c77e4ed93acf3d5b69", size = 413540 }, + { url = "https://files.pythonhosted.org/packages/d9/37/01c0ec95d55ed0c11e4cae3e10427e479bba40c77312b63e1f9665e0737d/orjson-3.11.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e8b5f96c05fce7d0218df3fdfeb962d6b8cfff7e3e20264306b46dd8b217c0f3", size = 151530 }, + { url = "https://files.pythonhosted.org/packages/f9/d4/f9ebc57182705bb4bbe63f5bbe14af43722a2533135e1d2fb7affa0c355d/orjson-3.11.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ddbfdb5099b3e6ba6d6ea818f61997bb66de14b411357d24c4612cf1ebad08ca", size = 141863 }, + { url = "https://files.pythonhosted.org/packages/0d/04/02102b8d19fdcb009d72d622bb5781e8f3fae1646bf3e18c53d1bc8115b5/orjson-3.11.5-cp312-cp312-win32.whl", hash = "sha256:9172578c4eb09dbfcf1657d43198de59b6cef4054de385365060ed50c458ac98", size = 135255 }, + { url = "https://files.pythonhosted.org/packages/d4/fb/f05646c43d5450492cb387de5549f6de90a71001682c17882d9f66476af5/orjson-3.11.5-cp312-cp312-win_amd64.whl", hash = "sha256:2b91126e7b470ff2e75746f6f6ee32b9ab67b7a93c8ba1d15d3a0caaf16ec875", size = 133252 }, + { url = "https://files.pythonhosted.org/packages/dc/a6/7b8c0b26ba18c793533ac1cd145e131e46fcf43952aa94c109b5b913c1f0/orjson-3.11.5-cp312-cp312-win_arm64.whl", hash = "sha256:acbc5fac7e06777555b0722b8ad5f574739e99ffe99467ed63da98f97f9ca0fe", size = 126777 }, + { url = "https://files.pythonhosted.org/packages/10/43/61a77040ce59f1569edf38f0b9faadc90c8cf7e9bec2e0df51d0132c6bb7/orjson-3.11.5-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:3b01799262081a4c47c035dd77c1301d40f568f77cc7ec1bb7db5d63b0a01629", size = 245271 }, + { url = "https://files.pythonhosted.org/packages/55/f9/0f79be617388227866d50edd2fd320cb8fb94dc1501184bb1620981a0aba/orjson-3.11.5-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:61de247948108484779f57a9f406e4c84d636fa5a59e411e6352484985e8a7c3", size = 129422 }, + { url = "https://files.pythonhosted.org/packages/77/42/f1bf1549b432d4a78bfa95735b79b5dac75b65b5bb815bba86ad406ead0a/orjson-3.11.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:894aea2e63d4f24a7f04a1908307c738d0dce992e9249e744b8f4e8dd9197f39", size = 132060 }, + { url = "https://files.pythonhosted.org/packages/25/49/825aa6b929f1a6ed244c78acd7b22c1481fd7e5fda047dc8bf4c1a807eb6/orjson-3.11.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ddc21521598dbe369d83d4d40338e23d4101dad21dae0e79fa20465dbace019f", size = 130391 }, + { url = "https://files.pythonhosted.org/packages/42/ec/de55391858b49e16e1aa8f0bbbb7e5997b7345d8e984a2dec3746d13065b/orjson-3.11.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cce16ae2f5fb2c53c3eafdd1706cb7b6530a67cc1c17abe8ec747f5cd7c0c51", size = 135964 }, + { url = "https://files.pythonhosted.org/packages/1c/40/820bc63121d2d28818556a2d0a09384a9f0262407cf9fa305e091a8048df/orjson-3.11.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e46c762d9f0e1cfb4ccc8515de7f349abbc95b59cb5a2bd68df5973fdef913f8", size = 139817 }, + { url = "https://files.pythonhosted.org/packages/09/c7/3a445ca9a84a0d59d26365fd8898ff52bdfcdcb825bcc6519830371d2364/orjson-3.11.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d7345c759276b798ccd6d77a87136029e71e66a8bbf2d2755cbdde1d82e78706", size = 137336 }, + { url = "https://files.pythonhosted.org/packages/9a/b3/dc0d3771f2e5d1f13368f56b339c6782f955c6a20b50465a91acb79fe961/orjson-3.11.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75bc2e59e6a2ac1dd28901d07115abdebc4563b5b07dd612bf64260a201b1c7f", size = 138993 }, + { url = "https://files.pythonhosted.org/packages/d1/a2/65267e959de6abe23444659b6e19c888f242bf7725ff927e2292776f6b89/orjson-3.11.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:54aae9b654554c3b4edd61896b978568c6daa16af96fa4681c9b5babd469f863", size = 141070 }, + { url = "https://files.pythonhosted.org/packages/63/c9/da44a321b288727a322c6ab17e1754195708786a04f4f9d2220a5076a649/orjson-3.11.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:4bdd8d164a871c4ec773f9de0f6fe8769c2d6727879c37a9666ba4183b7f8228", size = 413505 }, + { url = "https://files.pythonhosted.org/packages/7f/17/68dc14fa7000eefb3d4d6d7326a190c99bb65e319f02747ef3ebf2452f12/orjson-3.11.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:a261fef929bcf98a60713bf5e95ad067cea16ae345d9a35034e73c3990e927d2", size = 151342 }, + { url = "https://files.pythonhosted.org/packages/c4/c5/ccee774b67225bed630a57478529fc026eda33d94fe4c0eac8fe58d4aa52/orjson-3.11.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c028a394c766693c5c9909dec76b24f37e6a1b91999e8d0c0d5feecbe93c3e05", size = 141823 }, + { url = "https://files.pythonhosted.org/packages/67/80/5d00e4155d0cd7390ae2087130637671da713959bb558db9bac5e6f6b042/orjson-3.11.5-cp313-cp313-win32.whl", hash = "sha256:2cc79aaad1dfabe1bd2d50ee09814a1253164b3da4c00a78c458d82d04b3bdef", size = 135236 }, + { url = "https://files.pythonhosted.org/packages/95/fe/792cc06a84808dbdc20ac6eab6811c53091b42f8e51ecebf14b540e9cfe4/orjson-3.11.5-cp313-cp313-win_amd64.whl", hash = "sha256:ff7877d376add4e16b274e35a3f58b7f37b362abf4aa31863dadacdd20e3a583", size = 133167 }, + { url = "https://files.pythonhosted.org/packages/46/2c/d158bd8b50e3b1cfdcf406a7e463f6ffe3f0d167b99634717acdaf5e299f/orjson-3.11.5-cp313-cp313-win_arm64.whl", hash = "sha256:59ac72ea775c88b163ba8d21b0177628bd015c5dd060647bbab6e22da3aad287", size = 126712 }, + { url = "https://files.pythonhosted.org/packages/c2/60/77d7b839e317ead7bb225d55bb50f7ea75f47afc489c81199befc5435b50/orjson-3.11.5-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:e446a8ea0a4c366ceafc7d97067bfd55292969143b57e3c846d87fc701e797a0", size = 245252 }, + { url = "https://files.pythonhosted.org/packages/f1/aa/d4639163b400f8044cef0fb9aa51b0337be0da3a27187a20d1166e742370/orjson-3.11.5-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:53deb5addae9c22bbe3739298f5f2196afa881ea75944e7720681c7080909a81", size = 129419 }, + { url = "https://files.pythonhosted.org/packages/30/94/9eabf94f2e11c671111139edf5ec410d2f21e6feee717804f7e8872d883f/orjson-3.11.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82cd00d49d6063d2b8791da5d4f9d20539c5951f965e45ccf4e96d33505ce68f", size = 132050 }, + { url = "https://files.pythonhosted.org/packages/3d/c8/ca10f5c5322f341ea9a9f1097e140be17a88f88d1cfdd29df522970d9744/orjson-3.11.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3fd15f9fc8c203aeceff4fda211157fad114dde66e92e24097b3647a08f4ee9e", size = 130370 }, + { url = "https://files.pythonhosted.org/packages/25/d4/e96824476d361ee2edd5c6290ceb8d7edf88d81148a6ce172fc00278ca7f/orjson-3.11.5-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9df95000fbe6777bf9820ae82ab7578e8662051bb5f83d71a28992f539d2cda7", size = 136012 }, + { url = "https://files.pythonhosted.org/packages/85/8e/9bc3423308c425c588903f2d103cfcfe2539e07a25d6522900645a6f257f/orjson-3.11.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92a8d676748fca47ade5bc3da7430ed7767afe51b2f8100e3cd65e151c0eaceb", size = 139809 }, + { url = "https://files.pythonhosted.org/packages/e9/3c/b404e94e0b02a232b957c54643ce68d0268dacb67ac33ffdee24008c8b27/orjson-3.11.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aa0f513be38b40234c77975e68805506cad5d57b3dfd8fe3baa7f4f4051e15b4", size = 137332 }, + { url = "https://files.pythonhosted.org/packages/51/30/cc2d69d5ce0ad9b84811cdf4a0cd5362ac27205a921da524ff42f26d65e0/orjson-3.11.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa1863e75b92891f553b7922ce4ee10ed06db061e104f2b7815de80cdcb135ad", size = 138983 }, + { url = "https://files.pythonhosted.org/packages/0e/87/de3223944a3e297d4707d2fe3b1ffb71437550e165eaf0ca8bbe43ccbcb1/orjson-3.11.5-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d4be86b58e9ea262617b8ca6251a2f0d63cc132a6da4b5fcc8e0a4128782c829", size = 141069 }, + { url = "https://files.pythonhosted.org/packages/65/30/81d5087ae74be33bcae3ff2d80f5ccaa4a8fedc6d39bf65a427a95b8977f/orjson-3.11.5-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:b923c1c13fa02084eb38c9c065afd860a5cff58026813319a06949c3af5732ac", size = 413491 }, + { url = "https://files.pythonhosted.org/packages/d0/6f/f6058c21e2fc1efaf918986dbc2da5cd38044f1a2d4b7b91ad17c4acf786/orjson-3.11.5-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:1b6bd351202b2cd987f35a13b5e16471cf4d952b42a73c391cc537974c43ef6d", size = 151375 }, + { url = "https://files.pythonhosted.org/packages/54/92/c6921f17d45e110892899a7a563a925b2273d929959ce2ad89e2525b885b/orjson-3.11.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bb150d529637d541e6af06bbe3d02f5498d628b7f98267ff87647584293ab439", size = 141850 }, + { url = "https://files.pythonhosted.org/packages/88/86/cdecb0140a05e1a477b81f24739da93b25070ee01ce7f7242f44a6437594/orjson-3.11.5-cp314-cp314-win32.whl", hash = "sha256:9cc1e55c884921434a84a0c3dd2699eb9f92e7b441d7f53f3941079ec6ce7499", size = 135278 }, + { url = "https://files.pythonhosted.org/packages/e4/97/b638d69b1e947d24f6109216997e38922d54dcdcdb1b11c18d7efd2d3c59/orjson-3.11.5-cp314-cp314-win_amd64.whl", hash = "sha256:a4f3cb2d874e03bc7767c8f88adaa1a9a05cecea3712649c3b58589ec7317310", size = 133170 }, + { url = "https://files.pythonhosted.org/packages/8f/dd/f4fff4a6fe601b4f8f3ba3aa6da8ac33d17d124491a3b804c662a70e1636/orjson-3.11.5-cp314-cp314-win_arm64.whl", hash = "sha256:38b22f476c351f9a1c43e5b07d8b5a02eb24a6ab8e75f700f7d479d4568346a5", size = 126713 }, +] + [[package]] name = "packaging" -version = "26.0" +version = "24.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416 } +sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366 }, + { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 }, ] [[package]] -name = "pinecone-client" -version = "6.0.0" +name = "pinecone" +version = "8.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "certifi" }, + { name = "orjson" }, + { name = "pinecone-plugin-assistant" }, { name = "pinecone-plugin-interface" }, { name = "python-dateutil" }, { name = "typing-extensions" }, - { name = "urllib3", marker = "python_full_version < '4.0'" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/33/13/f4c481a6a93dab92132d6d863b70a0e6c903f62940389435b31cf0c7d7d2/pinecone-8.0.0.tar.gz", hash = "sha256:feca7ff607706c09ffbd127ec93fa3b7110896b30c0d7a57672da73c69698d53", size = 1092653 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/6c/1d870a9211eb8f0bf60214182de001b480f94590eca9d6164a5d6d7de031/pinecone-8.0.0-py3-none-any.whl", hash = "sha256:95f714a496a91d80f3405165aedfea76ca8ac16e51e618df0434241838e353f8", size = 745902 }, +] + +[[package]] +name = "pinecone-plugin-assistant" +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/ab/3ab3b81e8ad82fbfcaa4f446c7f962b18968d61543c8c9e2c38bd777c056/pinecone_client-6.0.0.tar.gz", hash = "sha256:f224fc999205e4858c4737c40922bdf42d178b361c8859bc486ec00d45b359a9", size = 7004 } +sdist = { url = "https://files.pythonhosted.org/packages/08/1a/33249870c9e8c774dafc038419b48aa63b380b461e9a1c1cb042db31be49/pinecone_plugin_assistant-3.0.1.tar.gz", hash = "sha256:6b00e94ef1bf55ed601d2316ee6f71f96f93bf2155277a826638395e1090dde3", size = 152060 } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/e4/7780cd631dc6dad0172a245e958b41b28a70779594c0790fa08b952aa97f/pinecone_client-6.0.0-py3-none-any.whl", hash = "sha256:d81a9e73cae441e4ab6dfc9c1d8b51c9895dae2488cda64f3e21b9dfc10c8d94", size = 6654 }, + { url = "https://files.pythonhosted.org/packages/06/88/4b801675b4d58c5f8acd96bfd4847e6d7bc1a93ee4ff916e913dd6bda2de/pinecone_plugin_assistant-3.0.1-py3-none-any.whl", hash = "sha256:cd86ca5c98137221170e90fe81e03bbe71999992096da68c77f4af3503017622", size = 280865 }, ] [[package]] @@ -1287,19 +1399,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5e/fd/ddca80617f230bd833f99b4fb959abebffd8651f520493cae2e96276b1bd/types_awscrt-0.31.1-py3-none-any.whl", hash = "sha256:7e4364ac635f72bd57f52b093883640b1448a6eded0ecbac6e900bf4b1e4777b", size = 42516 }, ] -[[package]] -name = "types-boto3" -version = "1.42.34" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "botocore-stubs" }, - { name = "types-s3transfer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/59/d7/3e2722311c9405cfcb0105d87b3e17a1c6ad6a5caab76d58b73d2983597e/types_boto3-1.42.34.tar.gz", hash = "sha256:86caec7ba201047ec78b170f87442cfe8ce288ce61199ea53bad255b33e8e00b", size = 101284 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/af/93/3edb443030240e0a452bb16420516ccca451fca38f8dc22ac95cf96fea29/types_boto3-1.42.34-py3-none-any.whl", hash = "sha256:a1ec9aad643b0f8455257ba12141f4f4f38875c586f00ff0fc96dfe75aa9d1b5", size = 69674 }, -] - [[package]] name = "types-s3transfer" version = "0.16.0" diff --git a/packages/nvisy-rig/README.md b/packages/nvisy-rig/README.md new file mode 100644 index 0000000..8e2fb72 --- /dev/null +++ b/packages/nvisy-rig/README.md @@ -0,0 +1,72 @@ +# nvisy-rig + +AI/LLM orchestration layer. Provides unified interfaces for LLM providers and agent workflows. + +## Installation + +```bash +# Core package +uv add nvisy-rig + +# With specific providers +uv add "nvisy-rig[openai,anthropic]" + +# All providers +uv add "nvisy-rig[all]" +``` + +## Available Providers + +| Provider | Extra | Description | +|----------|-------|-------------| +| OpenAI | `openai` | GPT models, embeddings | +| Anthropic | `anthropic` | Claude models | +| Cohere | `cohere` | Command models, embeddings | + +## Usage + +```python +from nvisy_rig.agents import Agent + +# Create an agent +agent = Agent( + model="gpt-4", + system_prompt="You are a helpful assistant.", +) + +# Run completion +response = await agent.complete("Hello, world!") +print(response) +``` + +## Architecture + +This package provides the Python AI/LLM layer for the nvisy system: + +- **nvisy-dal**: Data access layer (storage, databases, vector stores) +- **nvisy-rig**: AI orchestration layer (LLM providers, agents, RAG) + +## Development + +```bash +# Install dev dependencies +uv sync --extra dev + +# Run tests +uv run pytest + +# Type check +uv run pyright + +# Lint +uv run ruff check . +``` + +## TODO + +- [ ] OpenAI provider +- [ ] Anthropic provider +- [ ] Cohere provider +- [ ] Agent framework +- [ ] RAG pipelines +- [ ] Tool integration diff --git a/packages/nvisy-rig/src/nvisy_rig/_generated/__init__.py b/packages/nvisy-rig/src/nvisy_rig/generated/__init__.py similarity index 100% rename from packages/nvisy-rig/src/nvisy_rig/_generated/__init__.py rename to packages/nvisy-rig/src/nvisy_rig/generated/__init__.py From 2d0180fa71ccf00a6d193ef34e205bc1bb36552f Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Mon, 26 Jan 2026 22:50:20 +0100 Subject: [PATCH 28/28] refactor(dal): consolidate datatypes, simplify provider API - Move datatype module to core/datatypes.rs - Add AnyCredentials, AnyParams, AnyProvider enums in provider/mod.rs - Simplify PostgresCredentials to connection string only (dsn field) - Add as_data_input/as_data_output helpers to PyProvider - Add python::connect free function for cleaner provider creation - Add disconnect method to Provider trait - Remove separator comments from datatypes - Fix nvisy-runtime imports for AnyDataValue - Fix nvisy-webhook error conversion to use Error::new pattern - Move TODO section from README to docs/ROADMAP.md --- crates/nvisy-core/src/error.rs | 75 -------- crates/nvisy-core/src/provider.rs | 8 + crates/nvisy-dal/src/core/datatypes.rs | 176 ++++++++++++++++++ crates/nvisy-dal/src/core/mod.rs | 5 + crates/nvisy-dal/src/datatype/document.rs | 20 -- crates/nvisy-dal/src/datatype/embedding.rs | 19 -- crates/nvisy-dal/src/datatype/graph.rs | 49 ----- crates/nvisy-dal/src/datatype/message.rs | 47 ----- crates/nvisy-dal/src/datatype/mod.rs | 44 ----- crates/nvisy-dal/src/datatype/object.rs | 44 ----- crates/nvisy-dal/src/datatype/record.rs | 17 -- crates/nvisy-dal/src/lib.rs | 8 +- crates/nvisy-dal/src/provider/mod.rs | 40 +++- crates/nvisy-dal/src/provider/pinecone.rs | 30 +-- crates/nvisy-dal/src/provider/postgres.rs | 48 ++--- crates/nvisy-dal/src/provider/s3.rs | 34 +--- crates/nvisy-dal/src/python/loader.rs | 16 +- crates/nvisy-dal/src/python/mod.rs | 26 +++ crates/nvisy-dal/src/python/provider.rs | 14 +- .../nvisy-runtime/src/graph/input/stream.rs | 2 +- .../nvisy-runtime/src/graph/output/stream.rs | 2 +- .../src/graph/route/file_category.rs | 2 +- .../nvisy-runtime/src/graph/route/language.rs | 2 +- crates/nvisy-runtime/src/graph/route/mod.rs | 2 +- .../src/graph/transform/chunk.rs | 2 +- .../src/graph/transform/derive.rs | 2 +- .../src/graph/transform/embedding.rs | 2 +- .../src/graph/transform/enrich.rs | 2 +- .../src/graph/transform/extract.rs | 2 +- .../nvisy-runtime/src/graph/transform/mod.rs | 2 +- .../src/graph/transform/partition.rs | 2 +- crates/nvisy-webhook/src/reqwest/error.rs | 8 +- docs/ROADMAP.md | 63 +++++++ packages/nvisy-dal/README.md | 67 ++----- .../src/nvisy_dal/providers/postgres.py | 17 +- packages/nvisy-rig/README.md | 22 ++- 36 files changed, 427 insertions(+), 494 deletions(-) create mode 100644 crates/nvisy-dal/src/core/datatypes.rs delete mode 100644 crates/nvisy-dal/src/datatype/document.rs delete mode 100644 crates/nvisy-dal/src/datatype/embedding.rs delete mode 100644 crates/nvisy-dal/src/datatype/graph.rs delete mode 100644 crates/nvisy-dal/src/datatype/message.rs delete mode 100644 crates/nvisy-dal/src/datatype/mod.rs delete mode 100644 crates/nvisy-dal/src/datatype/object.rs delete mode 100644 crates/nvisy-dal/src/datatype/record.rs create mode 100644 docs/ROADMAP.md diff --git a/crates/nvisy-core/src/error.rs b/crates/nvisy-core/src/error.rs index 805dfb2..7f7a52e 100644 --- a/crates/nvisy-core/src/error.rs +++ b/crates/nvisy-core/src/error.rs @@ -79,79 +79,4 @@ impl Error { self.source = Some(Box::new(source)); self } - - /// Creates a new invalid input error. - pub fn invalid_input() -> Self { - Self::new(ErrorKind::InvalidInput) - } - - /// Creates a new network error. - pub fn network_error() -> Self { - Self::new(ErrorKind::NetworkError) - } - - /// Creates a new authentication error. - pub fn authentication() -> Self { - Self::new(ErrorKind::Authentication) - } - - /// Creates a new authorization error. - pub fn authorization() -> Self { - Self::new(ErrorKind::Authorization) - } - - /// Creates a new rate limited error. - pub fn rate_limited() -> Self { - Self::new(ErrorKind::RateLimited) - } - - /// Creates a new service unavailable error. - pub fn service_unavailable() -> Self { - Self::new(ErrorKind::ServiceUnavailable) - } - - /// Creates a new internal error. - pub fn internal_error() -> Self { - Self::new(ErrorKind::InternalError) - } - - /// Creates a new external error. - pub fn external_error() -> Self { - Self::new(ErrorKind::ExternalError) - } - - /// Creates a new configuration error. - pub fn configuration() -> Self { - Self::new(ErrorKind::Configuration) - } - - /// Creates a new not found error. - pub fn not_found() -> Self { - Self::new(ErrorKind::NotFound) - } - - /// Creates a new timeout error. - pub fn timeout() -> Self { - Self::new(ErrorKind::Timeout) - } - - /// Creates a new serialization error. - pub fn serialization() -> Self { - Self::new(ErrorKind::Serialization) - } - - /// Creates a new unknown error. - pub fn unknown() -> Self { - Self::new(ErrorKind::Unknown) - } - - /// Returns the error kind. - pub fn kind(&self) -> ErrorKind { - self.kind - } - - /// Returns the error kind as a string. - pub fn kind_str(&self) -> &'static str { - self.kind.into() - } } diff --git a/crates/nvisy-core/src/provider.rs b/crates/nvisy-core/src/provider.rs index a700896..ebb8a79 100644 --- a/crates/nvisy-core/src/provider.rs +++ b/crates/nvisy-core/src/provider.rs @@ -37,4 +37,12 @@ pub trait Provider: Send { async fn connect(params: Self::Params, credentials: Self::Credentials) -> Result where Self: Sized; + + /// Disconnects and cleans up the provider. + async fn disconnect(self) -> Result<()> + where + Self: Sized, + { + Ok(()) + } } diff --git a/crates/nvisy-dal/src/core/datatypes.rs b/crates/nvisy-dal/src/core/datatypes.rs new file mode 100644 index 0000000..6de0879 --- /dev/null +++ b/crates/nvisy-dal/src/core/datatypes.rs @@ -0,0 +1,176 @@ +//! Data types for the DAL. +//! +//! These types represent the data items that flow through providers: +//! - `Object` for object storage (S3, GCS, Azure Blob) +//! - `Document` for JSON documents +//! - `Embedding` for vector embeddings +//! - `Record` for relational rows +//! - `Message` for queue/stream messages +//! - `Graph`, `Node`, `Edge` for graph data + +use std::collections::HashMap; + +use bytes::Bytes; +use derive_more::From; +use jiff::Timestamp; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +/// Metadata associated with data items. +pub type Metadata = HashMap; + +/// Marker trait for data types that can be read/written through the DAL. +pub trait DataType: Send + Sync + 'static {} + +/// Type-erased data value for runtime dispatch. +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyDataValue { + /// Object storage item (S3, GCS, etc.). + Object(Object), + /// JSON document. + Document(Document), + /// Vector embedding. + Embedding(Embedding), + /// Graph with nodes and edges. + Graph(Graph), + /// Relational record/row. + Record(Record), + /// Queue/stream message. + Message(Message), +} + +/// An object representing a file or binary data (S3, GCS, Azure Blob). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Object { + /// Path or key identifying this object. + pub path: String, + /// Raw binary data. + #[serde(with = "serde_bytes")] + pub data: Bytes, + /// Content type (MIME type). + #[serde(skip_serializing_if = "Option::is_none")] + pub content_type: Option, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl DataType for Object {} + +/// A document with flexible JSON content. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Document { + /// Unique identifier. + pub id: String, + /// Document content as JSON. + pub content: Value, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl DataType for Document {} + +/// A vector embedding with metadata. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Embedding { + /// Unique identifier. + pub id: String, + /// The embedding vector. + pub vector: Vec, + /// Additional metadata. + #[serde(default)] + pub metadata: Metadata, +} + +impl DataType for Embedding {} + +/// A record representing a row in a relational table. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Record { + /// Column values keyed by column name. + pub columns: HashMap, +} + +impl DataType for Record {} + +/// A message from a queue or stream. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Message { + /// Unique identifier. + pub id: String, + /// Message payload. + #[serde(with = "serde_bytes")] + pub payload: Bytes, + /// Message headers. + #[serde(default)] + pub headers: HashMap, + /// Timestamp when the message was created. + #[serde(skip_serializing_if = "Option::is_none")] + pub timestamp: Option, +} + +impl DataType for Message {} + +/// A graph containing nodes and edges. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Graph { + /// Nodes in the graph. + #[serde(default)] + pub nodes: Vec, + /// Edges in the graph. + #[serde(default)] + pub edges: Vec, +} + +impl DataType for Graph {} + +/// A node in a graph. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Node { + /// Unique identifier. + pub id: String, + /// Node labels (types). + #[serde(default)] + pub labels: Vec, + /// Node properties. + #[serde(default)] + pub properties: HashMap, +} + +/// An edge in a graph. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Edge { + /// Unique identifier. + pub id: String, + /// Source node ID. + pub from: String, + /// Target node ID. + pub to: String, + /// Edge label (relationship type). + pub label: String, + /// Edge properties. + #[serde(default)] + pub properties: HashMap, +} + +mod serde_bytes { + use bytes::Bytes; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + + pub fn serialize(bytes: &Bytes, serializer: S) -> Result + where + S: Serializer, + { + bytes.as_ref().serialize(serializer) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let vec = Vec::::deserialize(deserializer)?; + Ok(Bytes::from(vec)) + } +} diff --git a/crates/nvisy-dal/src/core/mod.rs b/crates/nvisy-dal/src/core/mod.rs index 1462750..986279d 100644 --- a/crates/nvisy-dal/src/core/mod.rs +++ b/crates/nvisy-dal/src/core/mod.rs @@ -1,10 +1,15 @@ //! Core types and traits for data operations. mod contexts; +mod datatypes; mod params; mod streams; pub use contexts::{AnyContext, ObjectContext, RelationalContext, VectorContext}; +pub use datatypes::{ + AnyDataValue, DataType, Document, Edge, Embedding, Graph, Message, Metadata, Node, Object, + Record, +}; pub use nvisy_core::Provider; pub use params::{DistanceMetric, ObjectParams, RelationalParams, VectorParams}; pub use streams::{InputStream, ItemSink, ItemStream, OutputStream}; diff --git a/crates/nvisy-dal/src/datatype/document.rs b/crates/nvisy-dal/src/datatype/document.rs deleted file mode 100644 index f3389da..0000000 --- a/crates/nvisy-dal/src/datatype/document.rs +++ /dev/null @@ -1,20 +0,0 @@ -//! Document data type for JSON documents. - -use serde::{Deserialize, Serialize}; -use serde_json::Value; - -use super::{DataType, Metadata}; - -/// A document with flexible JSON content. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Document { - /// Unique identifier. - pub id: String, - /// Document content as JSON. - pub content: Value, - /// Additional metadata. - #[serde(default)] - pub metadata: Metadata, -} - -impl DataType for Document {} diff --git a/crates/nvisy-dal/src/datatype/embedding.rs b/crates/nvisy-dal/src/datatype/embedding.rs deleted file mode 100644 index b467741..0000000 --- a/crates/nvisy-dal/src/datatype/embedding.rs +++ /dev/null @@ -1,19 +0,0 @@ -//! Embedding data type for vector data. - -use serde::{Deserialize, Serialize}; - -use super::{DataType, Metadata}; - -/// A vector embedding with metadata. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Embedding { - /// Unique identifier. - pub id: String, - /// The embedding vector. - pub vector: Vec, - /// Additional metadata. - #[serde(default)] - pub metadata: Metadata, -} - -impl DataType for Embedding {} diff --git a/crates/nvisy-dal/src/datatype/graph.rs b/crates/nvisy-dal/src/datatype/graph.rs deleted file mode 100644 index 47533eb..0000000 --- a/crates/nvisy-dal/src/datatype/graph.rs +++ /dev/null @@ -1,49 +0,0 @@ -//! Graph data type with nodes and edges. - -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; - -use super::DataType; - -/// A graph containing nodes and edges. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct Graph { - /// Nodes in the graph. - #[serde(default)] - pub nodes: Vec, - /// Edges in the graph. - #[serde(default)] - pub edges: Vec, -} - -impl DataType for Graph {} - -/// A node in a graph. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Node { - /// Unique identifier. - pub id: String, - /// Node labels (types). - #[serde(default)] - pub labels: Vec, - /// Node properties. - #[serde(default)] - pub properties: HashMap, -} - -/// An edge in a graph. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Edge { - /// Unique identifier. - pub id: String, - /// Source node ID. - pub from: String, - /// Target node ID. - pub to: String, - /// Edge label (relationship type). - pub label: String, - /// Edge properties. - #[serde(default)] - pub properties: HashMap, -} diff --git a/crates/nvisy-dal/src/datatype/message.rs b/crates/nvisy-dal/src/datatype/message.rs deleted file mode 100644 index 189ac07..0000000 --- a/crates/nvisy-dal/src/datatype/message.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! Message data type for queue messages. - -use std::collections::HashMap; - -use bytes::Bytes; -use jiff::Timestamp; -use serde::{Deserialize, Serialize}; - -use super::DataType; - -/// A message from a queue or stream. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Message { - /// Unique identifier. - pub id: String, - /// Message payload. - #[serde(with = "serde_bytes")] - pub payload: Bytes, - /// Message headers. - #[serde(default)] - pub headers: HashMap, - /// Timestamp when the message was created. - #[serde(skip_serializing_if = "Option::is_none")] - pub timestamp: Option, -} - -impl DataType for Message {} - -mod serde_bytes { - use bytes::Bytes; - use serde::{Deserialize, Deserializer, Serialize, Serializer}; - - pub fn serialize(bytes: &Bytes, serializer: S) -> Result - where - S: Serializer, - { - bytes.as_ref().serialize(serializer) - } - - pub fn deserialize<'de, D>(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let vec = Vec::::deserialize(deserializer)?; - Ok(Bytes::from(vec)) - } -} diff --git a/crates/nvisy-dal/src/datatype/mod.rs b/crates/nvisy-dal/src/datatype/mod.rs deleted file mode 100644 index a3bced8..0000000 --- a/crates/nvisy-dal/src/datatype/mod.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Data types for the DAL. - -mod document; -mod embedding; -mod graph; -mod message; -mod object; -mod record; - -use std::collections::HashMap; - -use derive_more::From; -use serde::{Deserialize, Serialize}; - -pub use document::Document; -pub use embedding::Embedding; -pub use graph::{Edge, Graph, Node}; -pub use message::Message; -pub use object::Object; -pub use record::Record; - -/// Metadata associated with data items. -pub type Metadata = HashMap; - -/// Marker trait for data types that can be read/written through the DAL. -pub trait DataType: Send + Sync + 'static {} - -/// Type-erased data value for runtime dispatch. -#[derive(Debug, Clone, From, Serialize, Deserialize)] -#[serde(tag = "type", content = "data", rename_all = "snake_case")] -pub enum AnyDataValue { - /// Object storage item (S3, GCS, etc.). - Object(Object), - /// JSON document. - Document(Document), - /// Vector embedding. - Embedding(Embedding), - /// Graph with nodes and edges. - Graph(Graph), - /// Relational record/row. - Record(Record), - /// Queue/stream message. - Message(Message), -} diff --git a/crates/nvisy-dal/src/datatype/object.rs b/crates/nvisy-dal/src/datatype/object.rs deleted file mode 100644 index 009b07f..0000000 --- a/crates/nvisy-dal/src/datatype/object.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Object data type for files and binary objects. - -use bytes::Bytes; -use serde::{Deserialize, Serialize}; - -use super::{DataType, Metadata}; - -/// An object representing a file or binary data (S3, GCS, Azure Blob). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Object { - /// Path or key identifying this object. - pub path: String, - /// Raw binary data. - #[serde(with = "serde_bytes")] - pub data: Bytes, - /// Content type (MIME type). - #[serde(skip_serializing_if = "Option::is_none")] - pub content_type: Option, - /// Additional metadata. - #[serde(default)] - pub metadata: Metadata, -} - -impl DataType for Object {} - -mod serde_bytes { - use bytes::Bytes; - use serde::{Deserialize, Deserializer, Serialize, Serializer}; - - pub fn serialize(bytes: &Bytes, serializer: S) -> Result - where - S: Serializer, - { - bytes.as_ref().serialize(serializer) - } - - pub fn deserialize<'de, D>(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let vec = Vec::::deserialize(deserializer)?; - Ok(Bytes::from(vec)) - } -} diff --git a/crates/nvisy-dal/src/datatype/record.rs b/crates/nvisy-dal/src/datatype/record.rs deleted file mode 100644 index 8255a00..0000000 --- a/crates/nvisy-dal/src/datatype/record.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Record data type for relational data. - -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; -use serde_json::Value; - -use super::DataType; - -/// A record representing a row in a relational table. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct Record { - /// Column values keyed by column name. - pub columns: HashMap, -} - -impl DataType for Record {} diff --git a/crates/nvisy-dal/src/lib.rs b/crates/nvisy-dal/src/lib.rs index bfe6a7e..4949cd5 100644 --- a/crates/nvisy-dal/src/lib.rs +++ b/crates/nvisy-dal/src/lib.rs @@ -13,7 +13,6 @@ #![cfg_attr(docsrs, feature(doc_cfg))] pub mod core; -pub mod datatype; pub mod provider; mod python; @@ -21,9 +20,10 @@ mod python; mod error; pub use core::{ - AnyContext, DataInput, DataOutput, InputStream, ItemSink, ItemStream, ObjectContext, - OutputStream, Provider, RelationalContext, VectorContext, + AnyContext, AnyDataValue, DataInput, DataOutput, DataType, Document, Edge, Embedding, Graph, + InputStream, ItemSink, ItemStream, Message, Metadata, Node, Object, ObjectContext, + OutputStream, Provider, Record, RelationalContext, VectorContext, }; -pub use datatype::{AnyDataValue, DataType, Document, Embedding, Graph, Message, Object, Record}; pub use error::{BoxError, Error, ErrorKind, Result}; +pub use provider::{AnyCredentials, AnyParams, AnyProvider}; diff --git a/crates/nvisy-dal/src/provider/mod.rs b/crates/nvisy-dal/src/provider/mod.rs index 3c970c3..9db8751 100644 --- a/crates/nvisy-dal/src/provider/mod.rs +++ b/crates/nvisy-dal/src/provider/mod.rs @@ -3,7 +3,7 @@ //! Each provider module exports credentials and params types //! along with the main provider struct. //! -//! Data types for input/output are in the `datatype` module: +//! Data types for input/output are in the `core` module: //! - `Record` for PostgreSQL rows //! - `Object` for S3 objects //! - `Embedding` for Pinecone vectors @@ -18,6 +18,9 @@ //! - `s3`: AWS S3 / MinIO object storage //! - `pinecone`: Pinecone vector database +use derive_more::From; +use serde::{Deserialize, Serialize}; + mod pinecone; mod postgres; mod s3; @@ -25,3 +28,38 @@ mod s3; pub use self::pinecone::{PineconeCredentials, PineconeParams, PineconeProvider}; pub use self::postgres::{PostgresCredentials, PostgresParams, PostgresProvider}; pub use self::s3::{S3Credentials, S3Params, S3Provider}; + +/// Type-erased credentials for any provider. +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyCredentials { + /// PostgreSQL credentials. + Postgres(PostgresCredentials), + /// S3 credentials. + S3(S3Credentials), + /// Pinecone credentials. + Pinecone(PineconeCredentials), +} + +/// Type-erased parameters for any provider. +#[derive(Debug, Clone, From, Serialize, Deserialize)] +#[serde(tag = "type", content = "data", rename_all = "snake_case")] +pub enum AnyParams { + /// PostgreSQL parameters. + Postgres(PostgresParams), + /// S3 parameters. + S3(S3Params), + /// Pinecone parameters. + Pinecone(PineconeParams), +} + +/// Type-erased provider instance. +#[derive(Debug, From)] +pub enum AnyProvider { + /// PostgreSQL provider. + Postgres(PostgresProvider), + /// S3 provider. + S3(S3Provider), + /// Pinecone provider. + Pinecone(PineconeProvider), +} diff --git a/crates/nvisy-dal/src/provider/pinecone.rs b/crates/nvisy-dal/src/provider/pinecone.rs index f2a23f3..e78ba1d 100644 --- a/crates/nvisy-dal/src/provider/pinecone.rs +++ b/crates/nvisy-dal/src/provider/pinecone.rs @@ -5,9 +5,8 @@ use serde::{Deserialize, Serialize}; use crate::Result; -use crate::core::{DataOutput, Provider}; -use crate::datatype::Embedding; -use crate::python::{PyDataOutput, PyProvider, PyProviderLoader}; +use crate::core::{DataOutput, Embedding, Provider}; +use crate::python::{self, PyDataOutput, PyProvider}; /// Credentials for Pinecone connection. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -31,13 +30,6 @@ pub struct PineconeProvider { output: PyDataOutput, } -impl PineconeProvider { - /// Disconnects from Pinecone. - pub async fn disconnect(self) -> Result<()> { - self.inner.disconnect().await - } -} - #[async_trait::async_trait] impl Provider for PineconeProvider { type Credentials = PineconeCredentials; @@ -47,17 +39,15 @@ impl Provider for PineconeProvider { params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { - let loader = PyProviderLoader::new().map_err(crate::Error::from)?; - let creds_json = serde_json::to_value(&credentials).map_err(crate::Error::from)?; - let params_json = serde_json::to_value(¶ms).map_err(crate::Error::from)?; - - let inner = loader - .load("pinecone", creds_json, params_json) - .await - .map_err(crate::Error::from)?; - let output = PyDataOutput::new(PyProvider::new(inner.clone_py_object())); + let inner = python::connect("pinecone", credentials, params).await?; + Ok(Self { + output: inner.as_data_output(), + inner, + }) + } - Ok(Self { inner, output }) + async fn disconnect(self) -> nvisy_core::Result<()> { + self.inner.disconnect().await.map_err(Into::into) } } diff --git a/crates/nvisy-dal/src/provider/postgres.rs b/crates/nvisy-dal/src/provider/postgres.rs index 13076c3..d7bcc82 100644 --- a/crates/nvisy-dal/src/provider/postgres.rs +++ b/crates/nvisy-dal/src/provider/postgres.rs @@ -6,24 +6,17 @@ use serde::{Deserialize, Serialize}; use crate::Result; use crate::core::{ - DataInput, DataOutput, InputStream, Provider, RelationalContext, RelationalParams, + DataInput, DataOutput, InputStream, Provider, Record, RelationalContext, RelationalParams, }; -use crate::datatype::Record; -use crate::python::{PyDataInput, PyDataOutput, PyProvider, PyProviderLoader}; +use crate::python::{self, PyDataInput, PyDataOutput, PyProvider}; /// Credentials for PostgreSQL connection. +/// +/// Uses a connection string (DSN) format: `postgres://user:pass@host:port/database` #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PostgresCredentials { - /// Database host. - pub host: String, - /// Database port. - pub port: u16, - /// Database user. - pub user: String, - /// Database password. - pub password: String, - /// Database name. - pub database: String, + /// PostgreSQL connection string (DSN). + pub dsn: String, } /// Parameters for PostgreSQL operations. @@ -48,13 +41,6 @@ pub struct PostgresProvider { output: PyDataOutput, } -impl PostgresProvider { - /// Disconnects from the database. - pub async fn disconnect(self) -> Result<()> { - self.inner.disconnect().await - } -} - #[async_trait::async_trait] impl Provider for PostgresProvider { type Credentials = PostgresCredentials; @@ -64,29 +50,23 @@ impl Provider for PostgresProvider { params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { - let loader = PyProviderLoader::new().map_err(crate::Error::from)?; - let creds_json = serde_json::to_value(&credentials).map_err(crate::Error::from)?; - let params_json = serde_json::to_value(¶ms).map_err(crate::Error::from)?; - - let inner = loader - .load("postgres", creds_json, params_json) - .await - .map_err(crate::Error::from)?; - let input = PyDataInput::new(PyProvider::new(inner.clone_py_object())); - let output = PyDataOutput::new(PyProvider::new(inner.clone_py_object())); - + let inner = python::connect("postgres", credentials, params).await?; Ok(Self { + input: inner.as_data_input(), + output: inner.as_data_output(), inner, - input, - output, }) } + + async fn disconnect(self) -> nvisy_core::Result<()> { + self.inner.disconnect().await.map_err(Into::into) + } } #[async_trait::async_trait] impl DataInput for PostgresProvider { - type Item = Record; type Context = RelationalContext; + type Item = Record; async fn read(&self, ctx: &Self::Context) -> Result> { self.input.read(ctx).await diff --git a/crates/nvisy-dal/src/provider/s3.rs b/crates/nvisy-dal/src/provider/s3.rs index f358f3a..75ded4d 100644 --- a/crates/nvisy-dal/src/provider/s3.rs +++ b/crates/nvisy-dal/src/provider/s3.rs @@ -5,9 +5,8 @@ use serde::{Deserialize, Serialize}; use crate::Result; -use crate::core::{DataInput, DataOutput, InputStream, ObjectContext, Provider}; -use crate::datatype::Object; -use crate::python::{PyDataInput, PyDataOutput, PyProvider, PyProviderLoader}; +use crate::core::{DataInput, DataOutput, InputStream, Object, ObjectContext, Provider}; +use crate::python::{self, PyDataInput, PyDataOutput, PyProvider}; /// Credentials for S3 connection. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -39,13 +38,6 @@ pub struct S3Provider { output: PyDataOutput, } -impl S3Provider { - /// Disconnects from S3. - pub async fn disconnect(self) -> Result<()> { - self.inner.disconnect().await - } -} - #[async_trait::async_trait] impl Provider for S3Provider { type Credentials = S3Credentials; @@ -55,29 +47,23 @@ impl Provider for S3Provider { params: Self::Params, credentials: Self::Credentials, ) -> nvisy_core::Result { - let loader = PyProviderLoader::new().map_err(crate::Error::from)?; - let creds_json = serde_json::to_value(&credentials).map_err(crate::Error::from)?; - let params_json = serde_json::to_value(¶ms).map_err(crate::Error::from)?; - - let inner = loader - .load("s3", creds_json, params_json) - .await - .map_err(crate::Error::from)?; - let input = PyDataInput::new(PyProvider::new(inner.clone_py_object())); - let output = PyDataOutput::new(PyProvider::new(inner.clone_py_object())); - + let inner = python::connect("s3", credentials, params).await?; Ok(Self { + input: inner.as_data_input(), + output: inner.as_data_output(), inner, - input, - output, }) } + + async fn disconnect(self) -> nvisy_core::Result<()> { + self.inner.disconnect().await.map_err(Into::into) + } } #[async_trait::async_trait] impl DataInput for S3Provider { - type Item = Object; type Context = ObjectContext; + type Item = Object; async fn read(&self, ctx: &Self::Context) -> Result> { self.input.read(ctx).await diff --git a/crates/nvisy-dal/src/python/loader.rs b/crates/nvisy-dal/src/python/loader.rs index fa7e62a..ff5d0f3 100644 --- a/crates/nvisy-dal/src/python/loader.rs +++ b/crates/nvisy-dal/src/python/loader.rs @@ -5,8 +5,7 @@ use std::sync::OnceLock; use pyo3::prelude::*; use pyo3::types::{PyDict, PyList, PyModule}; -use super::error::{PyError, PyResult}; -use super::provider::PyProvider; +use super::{PyError, PyProvider, PyResult}; /// Global reference to the nvisy_dal Python module. static NVISY_DAL_MODULE: OnceLock> = OnceLock::new(); @@ -47,13 +46,13 @@ impl PyProviderLoader { }) } - /// Loads a provider by name and connects with the given credentials. + /// Loads a provider by name and connects with pre-serialized JSON values. /// /// # Arguments /// /// * `name` - Provider name (e.g., "qdrant", "pinecone", "s3") - /// * `credentials` - JSON-serializable credentials - /// * `params` - JSON-serializable connection parameters + /// * `credentials` - JSON credentials + /// * `params` - JSON connection parameters pub async fn load( &self, name: &str, @@ -101,7 +100,6 @@ impl PyProviderLoader { })?; let instance = coro.await.map_err(PyError::from)?; - Ok(PyProvider::new(instance)) } @@ -120,7 +118,7 @@ impl Default for PyProviderLoader { } /// Converts a serde_json::Value to a Python dict. -pub(super) fn json_to_pydict<'py>( +pub fn json_to_pydict<'py>( py: Python<'py>, value: &serde_json::Value, ) -> PyResult> { @@ -138,7 +136,7 @@ pub(super) fn json_to_pydict<'py>( } /// Converts a serde_json::Value to a Python object. -pub(super) fn json_to_pyobject<'py>( +pub fn json_to_pyobject<'py>( py: Python<'py>, value: &serde_json::Value, ) -> PyResult> { @@ -183,7 +181,7 @@ pub(super) fn json_to_pyobject<'py>( } /// Converts a Python object to a serde_json::Value. -pub(super) fn pyobject_to_json(obj: &Bound<'_, PyAny>) -> PyResult { +pub fn pyobject_to_json(obj: &Bound<'_, PyAny>) -> PyResult { if obj.is_none() { return Ok(serde_json::Value::Null); } diff --git a/crates/nvisy-dal/src/python/mod.rs b/crates/nvisy-dal/src/python/mod.rs index 3a84a99..7b6a8f6 100644 --- a/crates/nvisy-dal/src/python/mod.rs +++ b/crates/nvisy-dal/src/python/mod.rs @@ -7,5 +7,31 @@ mod error; mod loader; mod provider; +pub(crate) use error::{PyError, PyResult}; pub(crate) use loader::PyProviderLoader; pub(crate) use provider::{PyDataInput, PyDataOutput, PyProvider}; + +/// Connects to a Python provider by name with the given credentials and parameters. +/// +/// # Arguments +/// +/// * `name` - Provider name (e.g., "postgres", "pinecone", "s3") +/// * `credentials` - Serializable credentials +/// * `params` - Serializable connection parameters +pub(crate) async fn connect( + name: &str, + credentials: C, + params: P, +) -> crate::Result +where + C: serde::Serialize, + P: serde::Serialize, +{ + let loader = PyProviderLoader::new().map_err(crate::Error::from)?; + let creds_json = serde_json::to_value(credentials).map_err(crate::Error::from)?; + let params_json = serde_json::to_value(params).map_err(crate::Error::from)?; + loader + .load(name, creds_json, params_json) + .await + .map_err(crate::Error::from) +} diff --git a/crates/nvisy-dal/src/python/provider.rs b/crates/nvisy-dal/src/python/provider.rs index 4e2f06b..0a6f67e 100644 --- a/crates/nvisy-dal/src/python/provider.rs +++ b/crates/nvisy-dal/src/python/provider.rs @@ -6,7 +6,7 @@ use async_stream::try_stream; use futures::Stream; use pyo3::prelude::*; -use super::error::PyError; +use super::PyError; use super::loader::pyobject_to_json; use crate::Result; use crate::core::{DataInput, DataOutput, InputStream}; @@ -30,6 +30,16 @@ impl PyProvider { Python::attach(|py| self.instance.clone_ref(py)) } + /// Creates a typed `DataInput` wrapper from this provider. + pub fn as_data_input(&self) -> PyDataInput { + PyDataInput::new(Self::new(self.clone_py_object())) + } + + /// Creates a typed `DataOutput` wrapper from this provider. + pub fn as_data_output(&self) -> PyDataOutput { + PyDataOutput::new(Self::new(self.clone_py_object())) + } + /// Disconnects the provider. pub async fn disconnect(&self) -> Result<()> { let coro = Python::attach(|py| { @@ -77,8 +87,8 @@ where T: for<'de> serde::Deserialize<'de> + Send + Sync + 'static, Ctx: serde::Serialize + Send + Sync, { - type Item = T; type Context = Ctx; + type Item = T; async fn read(&self, ctx: &Self::Context) -> Result> { let ctx_json = serde_json::to_value(ctx) diff --git a/crates/nvisy-runtime/src/graph/input/stream.rs b/crates/nvisy-runtime/src/graph/input/stream.rs index 783a821..5a0d667 100644 --- a/crates/nvisy-runtime/src/graph/input/stream.rs +++ b/crates/nvisy-runtime/src/graph/input/stream.rs @@ -5,7 +5,7 @@ use std::task::{Context, Poll}; use futures::stream::BoxStream; use futures::{Stream, StreamExt}; -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use crate::error::Result; diff --git a/crates/nvisy-runtime/src/graph/output/stream.rs b/crates/nvisy-runtime/src/graph/output/stream.rs index d7f5f8e..50873da 100644 --- a/crates/nvisy-runtime/src/graph/output/stream.rs +++ b/crates/nvisy-runtime/src/graph/output/stream.rs @@ -4,7 +4,7 @@ use std::pin::Pin; use std::task::{Context, Poll}; use futures::{Sink, SinkExt}; -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use crate::error::Error; diff --git a/crates/nvisy-runtime/src/graph/route/file_category.rs b/crates/nvisy-runtime/src/graph/route/file_category.rs index bb072d2..e319e1d 100644 --- a/crates/nvisy-runtime/src/graph/route/file_category.rs +++ b/crates/nvisy-runtime/src/graph/route/file_category.rs @@ -1,6 +1,6 @@ //! File category evaluator for routing by file extension. -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use crate::definition::FileCategory; diff --git a/crates/nvisy-runtime/src/graph/route/language.rs b/crates/nvisy-runtime/src/graph/route/language.rs index 41a7408..c1bed33 100644 --- a/crates/nvisy-runtime/src/graph/route/language.rs +++ b/crates/nvisy-runtime/src/graph/route/language.rs @@ -1,6 +1,6 @@ //! Language evaluator for routing by detected content language. -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; /// Evaluates language based on metadata. #[derive(Debug, Clone)] diff --git a/crates/nvisy-runtime/src/graph/route/mod.rs b/crates/nvisy-runtime/src/graph/route/mod.rs index aeeedd0..ade7107 100644 --- a/crates/nvisy-runtime/src/graph/route/mod.rs +++ b/crates/nvisy-runtime/src/graph/route/mod.rs @@ -5,7 +5,7 @@ mod language; pub use file_category::FileCategoryEvaluator; pub use language::LanguageEvaluator; -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use crate::definition::SwitchDef; diff --git a/crates/nvisy-runtime/src/graph/transform/chunk.rs b/crates/nvisy-runtime/src/graph/transform/chunk.rs index b23302f..0c632b7 100644 --- a/crates/nvisy-runtime/src/graph/transform/chunk.rs +++ b/crates/nvisy-runtime/src/graph/transform/chunk.rs @@ -1,6 +1,6 @@ //! Chunk processor. -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use nvisy_rig::agent::TextGenerationAgent; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/derive.rs b/crates/nvisy-runtime/src/graph/transform/derive.rs index bb4f815..4de85fa 100644 --- a/crates/nvisy-runtime/src/graph/transform/derive.rs +++ b/crates/nvisy-runtime/src/graph/transform/derive.rs @@ -1,6 +1,6 @@ //! Derive processor. -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use nvisy_rig::agent::TextGenerationAgent; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/embedding.rs b/crates/nvisy-runtime/src/graph/transform/embedding.rs index dbcfe38..078e7e4 100644 --- a/crates/nvisy-runtime/src/graph/transform/embedding.rs +++ b/crates/nvisy-runtime/src/graph/transform/embedding.rs @@ -1,6 +1,6 @@ //! Embedding processor. -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use nvisy_rig::provider::EmbeddingProvider; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/enrich.rs b/crates/nvisy-runtime/src/graph/transform/enrich.rs index 656901e..4fe6fa9 100644 --- a/crates/nvisy-runtime/src/graph/transform/enrich.rs +++ b/crates/nvisy-runtime/src/graph/transform/enrich.rs @@ -1,6 +1,6 @@ //! Enrich processor. -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use nvisy_rig::agent::{TableAgent, VisionAgent}; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/extract.rs b/crates/nvisy-runtime/src/graph/transform/extract.rs index 71172a3..ee2864a 100644 --- a/crates/nvisy-runtime/src/graph/transform/extract.rs +++ b/crates/nvisy-runtime/src/graph/transform/extract.rs @@ -1,6 +1,6 @@ //! Extract processor. -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use nvisy_rig::agent::{StructuredOutputAgent, TableAgent, TextAnalysisAgent}; use super::Process; diff --git a/crates/nvisy-runtime/src/graph/transform/mod.rs b/crates/nvisy-runtime/src/graph/transform/mod.rs index 5f6a986..93818d6 100644 --- a/crates/nvisy-runtime/src/graph/transform/mod.rs +++ b/crates/nvisy-runtime/src/graph/transform/mod.rs @@ -17,7 +17,7 @@ pub use derive::DeriveProcessor; pub use embedding::EmbeddingProcessor; pub use enrich::EnrichProcessor; pub use extract::ExtractProcessor; -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; pub use partition::PartitionProcessor; use crate::error::Result; diff --git a/crates/nvisy-runtime/src/graph/transform/partition.rs b/crates/nvisy-runtime/src/graph/transform/partition.rs index e54fc42..042858a 100644 --- a/crates/nvisy-runtime/src/graph/transform/partition.rs +++ b/crates/nvisy-runtime/src/graph/transform/partition.rs @@ -1,6 +1,6 @@ //! Partition processor. -use nvisy_dal::datatype::AnyDataValue; +use nvisy_dal::AnyDataValue; use super::Process; use crate::definition::PartitionStrategy; diff --git a/crates/nvisy-webhook/src/reqwest/error.rs b/crates/nvisy-webhook/src/reqwest/error.rs index fa0ecc5..cbff2af 100644 --- a/crates/nvisy-webhook/src/reqwest/error.rs +++ b/crates/nvisy-webhook/src/reqwest/error.rs @@ -21,20 +21,20 @@ impl From for crate::Error { match err { Error::Reqwest(e) => { if e.is_timeout() { - crate::Error::timeout() + crate::Error::new(crate::ErrorKind::Timeout) .with_message(e.to_string()) .with_source(e) } else if e.is_connect() { - crate::Error::network_error() + crate::Error::new(crate::ErrorKind::NetworkError) .with_message("Connection failed") .with_source(e) } else { - crate::Error::network_error() + crate::Error::new(crate::ErrorKind::NetworkError) .with_message(e.to_string()) .with_source(e) } } - Error::Serde(e) => crate::Error::serialization() + Error::Serde(e) => crate::Error::new(crate::ErrorKind::Serialization) .with_message(e.to_string()) .with_source(e), } diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md new file mode 100644 index 0000000..f87f1ad --- /dev/null +++ b/docs/ROADMAP.md @@ -0,0 +1,63 @@ +# DAL Provider Roadmap + +Status of provider implementations for the Data Abstraction Layer. + +## Completed + +- [x] Core protocols and error types + +## Relational Databases + +- [ ] PostgreSQL provider +- [ ] MySQL provider +- [ ] SQLite provider +- [ ] SQL Server provider +- [ ] Oracle provider + +## Object Storage + +- [ ] S3 provider +- [ ] GCS provider +- [ ] Azure Blob provider +- [ ] MinIO provider +- [ ] Cloudflare R2 provider + +## Vector Databases + +- [ ] Pinecone provider +- [ ] Qdrant provider +- [ ] Weaviate provider +- [ ] Milvus provider +- [ ] Chroma provider +- [ ] pgvector provider + +## Document Databases + +- [ ] MongoDB provider +- [ ] DynamoDB provider +- [ ] Firestore provider +- [ ] CouchDB provider + +## Key-Value Stores + +- [ ] Redis provider +- [ ] Memcached provider +- [ ] etcd provider + +## Message Queues + +- [ ] Kafka provider +- [ ] RabbitMQ provider +- [ ] NATS provider +- [ ] SQS provider + +## Graph Databases + +- [ ] Neo4j provider +- [ ] Neptune provider + +## Search Engines + +- [ ] Elasticsearch provider +- [ ] OpenSearch provider +- [ ] Algolia provider diff --git a/packages/nvisy-dal/README.md b/packages/nvisy-dal/README.md index 604f363..84e4f50 100644 --- a/packages/nvisy-dal/README.md +++ b/packages/nvisy-dal/README.md @@ -1,5 +1,7 @@ # nvisy-dal +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + Data abstraction layer for external integrations. Provides unified async interfaces for storage, databases, and vector stores. ## Installation @@ -98,54 +100,17 @@ uv run pyright uv run ruff check . ``` -## TODO - -- [x] Core protocols and error types - -### Relational Databases -- [ ] PostgreSQL provider -- [ ] MySQL provider -- [ ] SQLite provider -- [ ] SQL Server provider -- [ ] Oracle provider - -### Object Storage -- [ ] S3 provider -- [ ] GCS provider -- [ ] Azure Blob provider -- [ ] MinIO provider -- [ ] Cloudflare R2 provider - -### Vector Databases -- [ ] Pinecone provider -- [ ] Qdrant provider -- [ ] Weaviate provider -- [ ] Milvus provider -- [ ] Chroma provider -- [ ] pgvector provider - -### Document Databases -- [ ] MongoDB provider -- [ ] DynamoDB provider -- [ ] Firestore provider -- [ ] CouchDB provider - -### Key-Value Stores -- [ ] Redis provider -- [ ] Memcached provider -- [ ] etcd provider - -### Message Queues -- [ ] Kafka provider -- [ ] RabbitMQ provider -- [ ] NATS provider -- [ ] SQS provider - -### Graph Databases -- [ ] Neo4j provider -- [ ] Neptune provider - -### Search Engines -- [ ] Elasticsearch provider -- [ ] OpenSearch provider -- [ ] Algolia provider +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support + +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py b/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py index 6a301f8..ce7b09b 100644 --- a/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py +++ b/packages/nvisy-dal/src/nvisy_dal/providers/postgres.py @@ -18,13 +18,12 @@ class PostgresCredentials(BaseModel): - """Credentials for PostgreSQL connection.""" + """Credentials for PostgreSQL connection. - host: str = "localhost" - port: int = 5432 - user: str = "postgres" - password: str - database: str + Uses a connection string (DSN) format: postgres://user:pass@host:port/database + """ + + dsn: str class PostgresParams(BaseModel): @@ -66,11 +65,7 @@ async def connect( """Establish connection pool to PostgreSQL.""" try: pool = await asyncpg.create_pool( - host=credentials.host, - port=credentials.port, - user=credentials.user, - password=credentials.password, - database=credentials.database, + dsn=credentials.dsn, min_size=1, max_size=10, ) diff --git a/packages/nvisy-rig/README.md b/packages/nvisy-rig/README.md index 8e2fb72..9e9a08f 100644 --- a/packages/nvisy-rig/README.md +++ b/packages/nvisy-rig/README.md @@ -1,5 +1,7 @@ # nvisy-rig +[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml) + AI/LLM orchestration layer. Provides unified interfaces for LLM providers and agent workflows. ## Installation @@ -62,11 +64,17 @@ uv run pyright uv run ruff check . ``` -## TODO +## Changelog + +See [CHANGELOG.md](../../CHANGELOG.md) for release notes and version history. + +## License + +Apache 2.0 License - see [LICENSE.txt](../../LICENSE.txt) + +## Support -- [ ] OpenAI provider -- [ ] Anthropic provider -- [ ] Cohere provider -- [ ] Agent framework -- [ ] RAG pipelines -- [ ] Tool integration +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev)