From e5dab4fe13ca5fabcc8cf7d66b1543af02271206 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sat, 17 Jan 2026 08:37:16 +0100 Subject: [PATCH 1/4] docs: rework README files - Remove badges from all READMEs - Update crate names to nvisy-rt-* - Simplify and standardize format - Fix doc tests and broken links --- README.md | 146 ++++++++------------------------ crates/nvisy-archive/README.md | 44 +++++----- crates/nvisy-core/README.md | 56 ++++-------- crates/nvisy-document/README.md | 70 ++++++--------- crates/nvisy-docx/README.md | 15 ++-- crates/nvisy-engine/README.md | 35 +++++--- crates/nvisy-image/README.md | 18 ++-- crates/nvisy-pdf/README.md | 15 ++-- crates/nvisy-text/README.md | 116 +++++++------------------ 9 files changed, 174 insertions(+), 341 deletions(-) diff --git a/README.md b/README.md index 1187bf8..c166704 100644 --- a/README.md +++ b/README.md @@ -1,136 +1,58 @@ -# Nvisy Runtime for Rust +# Nvisy Runtime -[![build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yaml?branch=main&color=000000&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yaml) -[![crates.io](https://img.shields.io/crates/v/nvisy-core?color=000000&style=flat-square)](https://crates.io/crates/nvisy-core) -[![docs.rs](https://img.shields.io/docsrs/nvisy-core?color=000000&style=flat-square)](https://docs.rs/nvisy-core) -[![rust version](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) +Document processing runtime for the Nvisy platform. -High-performance runtime library for data redaction and sensitive information -processing. +## Crates -## Features +| Crate | Description | +|-------|-------------| +| `nvisy-rt-core` | Core types, error handling, content management | +| `nvisy-rt-document` | Document abstraction layer and traits | +| `nvisy-rt-engine` | Processing engine with format registry | +| `nvisy-rt-archive` | Archive handling (ZIP, TAR, compression) | +| `nvisy-rt-text` | Text formats (Markdown, JSON, CSV, XML, YAML) | +| `nvisy-rt-pdf` | PDF document support (stub) | +| `nvisy-rt-docx` | Microsoft Word support (stub) | +| `nvisy-rt-image` | Image formats (JPEG, PNG) (stub) | -- Modern Rust 2024 edition with strict type safety -- High-performance async runtime powered by Tokio -- Flexible pattern matching and data detection -- Built-in archive and compression support -- Comprehensive error handling with structured diagnostics -- Modular architecture with optimized crate separation - -## Installation - -Add the core library to your `Cargo.toml`: - -```toml -[dependencies] -nvisy-core = "0.1" -``` - -Or install additional crates as needed: +## Usage ```toml [dependencies] -nvisy-core = "0.1" -nvisy-engine = "0.1" -nvisy-archive = "0.1" +nvisy-rt-engine = "0.1" ``` -## Quick Start - -### Using the Core Library - ```rust -use nvisy_core::prelude::*; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Initialize the runtime - let runtime = Runtime::new().await?; - - // Process sensitive data - let result = runtime.process("example data").await?; - - Ok(()) -} -``` - -## Architecture - -The runtime is organized into specialized crates: - -- **nvisy-core** - Core types, traits, and runtime primitives -- **nvisy-engine** - Processing engine and orchestration -- **nvisy-archive** - Archive handling and compression - -## Requirements - -- Rust 1.89 or higher -- Cargo with workspace support - -## Development +use nvisy_rt_engine::Engine; +use nvisy_rt_document::{ContentData, TextExtractor}; -### Building - -```bash -# Build all crates -cargo build - -# Build with release optimizations -cargo build --release - -# Build specific crate -cargo build -p nvisy-core +let engine = Engine::new(); +let data = ContentData::from("Hello, world!"); +let doc = engine.load_by_extension("txt", data).await?; +let text = doc.extract_text().await?; ``` -### Testing +## Development ```bash -# Run all tests -cargo test +# Build +cargo build --workspace -# Run tests for specific crate -cargo test -p nvisy-core - -# Run with coverage -cargo test --all-features -``` - -### Linting and Formatting - -```bash -# Check formatting -cargo fmt --check +# Test +cargo test --workspace --all-features -# Format code -cargo fmt +# Lint +cargo clippy --workspace --all-targets --all-features -- -D warnings -# Run clippy -cargo clippy --all-targets --all-features +# Format +cargo +nightly fmt --all ``` -## Performance - -The runtime is designed for high-throughput scenarios: - -- Async I/O with Tokio for concurrent request handling -- Memory-mapped file processing for large datasets -- Parallel pattern matching with Rayon -- Zero-copy operations where possible - -## Changelog - -See [CHANGELOG.md](CHANGELOG.md) for release notes and version history. - -## Contributing +## Requirements -See [CONTRIBUTING.md](CONTRIBUTING.md) for development guidelines. +- Rust 1.92+ +- Cargo with workspace support ## License -MIT License - see [LICENSE.txt](LICENSE.txt) for details. - -## Support - -- Documentation: [docs.nvisy.com](https://docs.nvisy.com) -- Issues: [GitHub Issues](https://github.com/nvisycom/runtime/issues) -- Email: [support@nvisy.com](mailto:support@nvisy.com) +MIT diff --git a/crates/nvisy-archive/README.md b/crates/nvisy-archive/README.md index fce88ab..d957f09 100644 --- a/crates/nvisy-archive/README.md +++ b/crates/nvisy-archive/README.md @@ -1,32 +1,30 @@ -# nvisy-archive +# nvisy-rt-archive -Archive handling and compression library for the Nvisy runtime. +Archive handling and compression for the Nvisy runtime. -[![rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) +## Supported Formats + +- **ZIP** - Standard ZIP archives +- **TAR** - Tape archives with optional compression +- **GZIP** - GNU zip compression +- **BZIP2** - Block-sorting compression +- **XZ** - LZMA2 compression +- **7z** - 7-Zip archives (optional) ## Features -- `zip` - ZIP archive support (enabled by default) -- `tar` - TAR archive support (enabled by default) +All features except `sevenz` are enabled by default: + +- `zip` - ZIP archive support +- `tar` - TAR archive support +- `gzip` - GZIP compression +- `bzip2` - BZIP2 compression +- `xz` - XZ/LZMA compression - `sevenz` - 7z archive support -- `gzip` - GZIP compression support (enabled by default) -- `bzip2` - BZIP2 compression support (enabled by default) -- `xz` - XZ/LZMA compression support (enabled by default) ## Capabilities -- **Multiple Formats** - ZIP, TAR, TAR.GZ, TAR.BZ2, TAR.XZ, GZIP, BZIP2, and XZ -- **Async Operations** - Full async/await support with Tokio -- **Flexible Loading** - Load from file paths, memory, or byte streams -- **Type Safety** - Strong typing with `ArchiveType` enum -- **Memory Efficient** - Stream-based processing for large archives -- **Cross-Platform** - Works on Windows, macOS, and Linux - -## Dependencies - -- `tokio` - Async runtime for I/O operations -- `tar` - TAR archive format support -- `zip` - ZIP archive format support -- `flate2` - GZIP compression -- `bzip2` - BZIP2 compression -- `xz2` - XZ compression +- Async operations with Tokio +- Load from paths, memory, or byte streams +- Stream-based processing for large archives +- Cross-platform (Windows, macOS, Linux) diff --git a/crates/nvisy-core/README.md b/crates/nvisy-core/README.md index 30369d4..8534649 100644 --- a/crates/nvisy-core/README.md +++ b/crates/nvisy-core/README.md @@ -1,53 +1,27 @@ -# nvisy-core +# nvisy-rt-core -Core types, traits, runtime primitives, and error handling for the Nvisy data -processing system. - -[![rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) -[![tokio](https://img.shields.io/badge/Tokio-1.0+-000000?style=flat-square&logo=rust&logoColor=white)](https://tokio.rs/) +Core types, traits, and error handling for the Nvisy runtime. ## Overview -This crate provides the foundational building blocks for the Nvisy ecosystem, -including data processing primitives, structured error handling, and content -tracking. +Foundational building blocks for the Nvisy ecosystem: content management, +structured error handling, and data tracking primitives. ## Core Types -- [`fs::DataSensitivity`] - Sensitivity levels for risk assessment -- [`fs::ContentFile`] - File operations with content tracking -- [`fs::ContentKind`] - Classification of content types by file extension -- [`fs::ContentMetadata`] - Metadata information for content files -- [`io::Content`] - Content types and data structures -- [`io::ContentData`] - Container for content data with metadata +- [`fs::ContentFile`] - Async file operations with content tracking +- [`fs::ContentKind`] - Content type classification by file extension +- [`fs::ContentMetadata`] - File metadata with path and extension info +- [`fs::DataSensitivity`] - Sensitivity levels for data classification +- [`io::Content`] - Content container with data and metadata +- [`io::ContentData`] - Byte container with SHA256 hashing - [`io::DataReference`] - Data references with source tracking - [`path::ContentSource`] - UUIDv7-based content source identification -- [`error::Error`] - Structured error handling with source classification +- [`error::Error`] - Structured error with type and source classification ## Features -### Data Processing - -- **Content Management** - Unified content structures with SHA256 hashing and - metadata -- **File Operations** - Async file handling with content source tracking -- **Data Classification** - Sensitivity levels for risk assessment -- **Format Detection** - Automatic content kind detection from file extensions -- **I/O Abstractions** - Modern async traits for content reading and writing -- **Zero-Copy Operations** - Efficient data handling using `bytes::Bytes` - -### Error Handling - -- **Structured Errors** - Rich error types with source classification and - context tracking -- **Builder Pattern** - Fluent API with `with_type()`, `with_resource()`, - `with_source()`, and `with_context()` methods -- **Result Types** - Ergonomic error handling with custom `Result` type - -## Dependencies - -- `tokio` - Async runtime for I/O operations -- `bytes` - Zero-copy byte buffer management -- `uuid` - Unique identifiers with `UUIDv7` support -- `jiff` - Timestamp support for content source tracking -- `strum` - Derive macros for enums +- **Content Management** - SHA256 hashing, metadata tracking, zero-copy bytes +- **Async I/O** - Modern async traits for content reading and writing +- **Error Handling** - Rich errors with builder pattern and context tracking +- **Format Detection** - Automatic content kind detection from extensions diff --git a/crates/nvisy-document/README.md b/crates/nvisy-document/README.md index 15024a2..551e327 100644 --- a/crates/nvisy-document/README.md +++ b/crates/nvisy-document/README.md @@ -1,59 +1,37 @@ -# nvisy-document +# nvisy-rt-document -Document manipulation library for VLM-driven editing workflows. +Document abstraction layer for the Nvisy runtime. -This crate provides a format-agnostic abstraction for document editing, -designed to support Vision Language Model (VLM) function calls for -operations like redaction, text replacement, splitting, and merging. +## Overview -## Core Concepts +Format-agnostic document handling designed for VLM-driven workflows. +Supports text extraction, table parsing, metadata, thumbnails, and format conversion. -- **[`DocumentFormat`]** - A format handler that can load and create documents. - Implementations know about format capabilities and how to parse/serialize - documents. +## Core Types -- **[`Document`]** - A loaded document instance for reading document content. - -- **[`Region`]** - Semantic units within a document (text blocks, images, - tables) with stable IDs that persist across edit sessions. +- [`DocumentFormat`] - Format handler that loads and creates documents +- [`Document`] - Loaded document instance with regions and content +- [`Region`] - Semantic unit (text block, image, table) with stable ID +- [`Capabilities`] - Declares what a format supports ## Extension Traits -Document implementations can optionally implement these extension traits: - -- [`Conversion`] - Convert documents to other formats -- [`Metadata`] - Extract and modify document metadata -- [`ThumbnailGenerator`] - Generate thumbnail images - -## Features - -- **Document Format Trait** - Common interface for PDF, DOCX, and other formats -- **Format Registry** - Register and look up formats by MIME type or extension -- **Region-based Editing** - Reference and modify document regions with stable IDs -- **Edit Operations** - Redaction, text replacement, structural changes -- **Streaming Support** - Handle large documents with pagination +- [`TextExtractor`] - Extract text content from documents +- [`TableExtractor`] - Extract structured table data +- [`Metadata`] - Read and modify document metadata +- [`ThumbnailGenerator`] - Generate preview images +- [`Conversion`] - Convert between document formats ## Architecture ```text -┌─────────────────────────────────────────────────────────────────┐ -│ nvisy-engine │ -│ (Edit sessions, undo/redo, region caching) │ -└─────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ nvisy-document │ -│ (DocumentFormat trait, EditOperation, Region, Registry) │ -└─────────────────────────────────────────────────────────────────┘ - │ - ┌─────────────────┼─────────────────┐ - ▼ ▼ ▼ - ┌──────────┐ ┌──────────┐ ┌──────────┐ - │nvisy-pdf │ │nvisy-docx│ │nvisy-text│ - └──────────┘ └──────────┘ └──────────┘ +nvisy-rt-engine (sessions, format registry) + | + v +nvisy-rt-document (traits, types, capabilities) + | + +-- nvisy-rt-pdf + +-- nvisy-rt-docx + +-- nvisy-rt-text + +-- nvisy-rt-image ``` - -## License - -MIT License - see [LICENSE.txt](../../LICENSE.txt) for details. diff --git a/crates/nvisy-docx/README.md b/crates/nvisy-docx/README.md index aaa6490..3d0a1dd 100644 --- a/crates/nvisy-docx/README.md +++ b/crates/nvisy-docx/README.md @@ -1,13 +1,14 @@ -# nvisy-docx +# nvisy-rt-docx -DOCX document format support for nvisy. - -This crate provides a `DocumentFormat` implementation for Microsoft Word DOCX files (.docx). +Microsoft Word DOCX format support for the Nvisy runtime. ## Status -This crate is currently a stub. DOCX parsing and manipulation are not yet implemented. +Stub implementation. DOCX parsing not yet implemented. -## License +## Planned Features -MIT +- Text extraction with formatting +- Table and list detection +- Metadata extraction +- Style information diff --git a/crates/nvisy-engine/README.md b/crates/nvisy-engine/README.md index 6540af8..95dde9d 100644 --- a/crates/nvisy-engine/README.md +++ b/crates/nvisy-engine/README.md @@ -1,21 +1,32 @@ -# nvisy-engine +# nvisy-rt-engine -Document editing session management for the Nvisy system. +Document processing engine for the Nvisy runtime. ## Overview -This crate provides session management for document editing workflows, -including undo/redo support, region caching, and streaming for large documents. +Central engine for document operations: format registry, session management, +and document loading with tracing instrumentation. + +## Core Types + +- [`Engine`] - Main entry point with format registry +- [`FormatRegistry`] - Register and lookup formats by extension or MIME type +- [`FormatRef`] - Reference to a registered format +- [`LoadedDocument`] - Wrapper for loaded document instances +- `Session` - Document editing session with history ## Features -- **Edit Sessions** - Wrap documents with stable region IDs and undo/redo -- **Edit History** - Track operations for undo/redo support -- **Region Caching** - Quick lookup of document regions -- **Streaming Support** - Lazy loading for large multi-page documents +- Format registry with extension and MIME type lookup +- Async document loading with tracing +- Session management for editing workflows +- Configurable via `EngineConfig` + +## Format Features -## Dependencies +Enable format support via Cargo features (all enabled by default): -- `nvisy-document` - Document manipulation types -- `jiff` - Timestamps -- `uuid` - Session identifiers +- `pdf` - PDF document support +- `docx` - Microsoft Word support +- `text` - Plain text and structured text formats +- `image` - Image format support (JPEG, PNG) diff --git a/crates/nvisy-image/README.md b/crates/nvisy-image/README.md index b5e1574..c79a21d 100644 --- a/crates/nvisy-image/README.md +++ b/crates/nvisy-image/README.md @@ -1,13 +1,19 @@ -# nvisy-image +# nvisy-rt-image -Image format support for nvisy. +Image format support for the Nvisy runtime. -This crate provides a `DocumentFormat` implementation for image files (PNG, JPEG, GIF, WebP, etc.). +## Supported Formats + +- **JPEG** - `.jpg`, `.jpeg` +- **PNG** - `.png` ## Status -This crate is currently a stub. Image parsing and manipulation are not yet implemented. +Stub implementation. Image processing not yet implemented. -## License +## Planned Features -MIT +- Image metadata extraction (EXIF, dimensions) +- Thumbnail generation +- Format conversion +- OCR integration for text extraction diff --git a/crates/nvisy-pdf/README.md b/crates/nvisy-pdf/README.md index 7c2cad4..e0719dc 100644 --- a/crates/nvisy-pdf/README.md +++ b/crates/nvisy-pdf/README.md @@ -1,13 +1,14 @@ -# nvisy-pdf +# nvisy-rt-pdf -PDF document format support for nvisy. - -This crate provides a `DocumentFormat` implementation for PDF files (.pdf). +PDF document format support for the Nvisy runtime. ## Status -This crate is currently a stub. PDF parsing and manipulation are not yet implemented. +Stub implementation. PDF parsing not yet implemented. -## License +## Planned Features -MIT +- Text extraction with layout preservation +- Page-based region detection +- Metadata extraction +- Thumbnail generation diff --git a/crates/nvisy-text/README.md b/crates/nvisy-text/README.md index fb89aad..1f8048b 100644 --- a/crates/nvisy-text/README.md +++ b/crates/nvisy-text/README.md @@ -1,104 +1,46 @@ -# nvisy-text +# nvisy-rt-text -Text-based document format support for nvisy. +Text-based document formats for the Nvisy runtime. -This crate provides support for loading and extracting text from -various text-based file formats: +## Supported Formats -- **Plain text** (`.txt`, `.text`) -- **Markdown** (`.md`, `.markdown`, `.mdx`) -- **JSON** (`.json`) -- **CSV/TSV** (`.csv`, `.tsv`) -- **XML** (`.xml`, `.xsd`, `.xsl`, `.xslt`, `.svg`, `.xhtml`, `.plist`) -- **YAML** (`.yaml`, `.yml`) -- **TOML** (`.toml`) -- **INI** (`.ini`, `.cfg`, `.conf`, `.config`) +| Format | Extensions | Features | +|--------|------------|----------| +| Plain text | `.txt`, `.text` | Paragraph detection | +| Markdown | `.md`, `.markdown`, `.mdx` | Headings, lists, code blocks | +| JSON | `.json` | Structure detection | +| CSV/TSV | `.csv`, `.tsv` | Table extraction | +| XML | `.xml`, `.xsd`, `.xsl`, `.svg` | Hierarchical parsing | +| YAML | `.yaml`, `.yml` | Key-value and list detection | +| TOML | `.toml` | Section and table parsing | +| INI | `.ini`, `.cfg`, `.conf` | Section grouping | ## Usage -```rust -use nvisy_rt_text::{PlainTextFormat, PlainTextDocument}; -use nvisy_rt_document::{ContentData, DocumentFormat, Document, TextExtractor}; +```rust,ignore +use nvisy_rt_text::PlainTextFormat; +use nvisy_rt_document::{ContentData, DocumentFormat, TextExtractor}; -# tokio_test::block_on(async { let format = PlainTextFormat::new(); let data = ContentData::from("Hello, world!\n\nThis is a paragraph."); -let doc = format.load(data).await.unwrap(); -assert_eq!(doc.regions().len(), 2); +let doc = format.load(data).await?; +let text = doc.extract_text().await?; -let text = doc.extract_text().await.unwrap(); assert_eq!(text.word_count(), 6); -# }); -``` - -## Formats - -### Plain Text - -Basic plain text with paragraph detection. - -```rust -use nvisy_rt_text::PlainTextFormat; -``` - -### Markdown - -Full Markdown parsing using pulldown-cmark with support for headings, lists, code blocks, blockquotes, and more. - -```rust -use nvisy_rt_text::MarkdownFormat; -``` - -### JSON - -JSON parsing with structure detection using serde_json. - -```rust -use nvisy_rt_text::JsonFormat; -``` - -### CSV/TSV - -CSV and TSV parsing using the csv crate. Implements `TableExtractor` for structured table access. - -```rust -use nvisy_rt_text::CsvFormat; -use nvisy_rt_document::TableExtractor; ``` -### XML - -XML parsing with hierarchical structure detection. +## Format Types ```rust -use nvisy_rt_text::XmlFormat; +use nvisy_rt_text::{ + PlainTextFormat, // .txt + MarkdownFormat, // .md + JsonFormat, // .json + CsvFormat, // .csv, .tsv + XmlFormat, // .xml + YamlFormat, // .yaml + TomlFormat, // .toml + IniFormat, // .ini +}; ``` - -### YAML - -YAML parsing with list and key-value detection. - -```rust -use nvisy_rt_text::YamlFormat; -``` - -### TOML - -TOML parsing with section and array table detection. - -```rust -use nvisy_rt_text::TomlFormat; -``` - -### INI - -INI/config file parsing with section grouping. - -```rust -use nvisy_rt_text::IniFormat; -``` - -## License - -MIT From c2be1df10a6652af7f51ce000fdbf603e2034f83 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sat, 17 Jan 2026 23:11:53 +0100 Subject: [PATCH 2/4] refactor(archive): rework handlers and compile-time feature gating - Move unpack logic from ArchiveFile to dedicated handlers (zip, tar, sevenz) - Add pack/unpack functions to zip and tar handlers - Create dedicated 7z handler for SevenZ format - Add TarResultExt for TAR error context (similar to ZipResultExt) - Make ZipResultExt and TarResultExt pub(crate) instead of public - Add #[cfg] attributes on ArchiveType variants for compile-time feature checks - Remove strum dependency from ArchiveType enum - Remove runtime error fallbacks for disabled features - Extract ArchiveRegistry to dedicated registry module - Clean up module structure and exports --- crates/nvisy-archive/Cargo.toml | 5 +- crates/nvisy-archive/src/error.rs | 134 ++++ crates/nvisy-archive/src/file/archive_file.rs | 391 ++++++++++ .../nvisy-archive/src/file/archive_handler.rs | 642 +++++++++++++++++ crates/nvisy-archive/src/file/archive_type.rs | 213 +++--- crates/nvisy-archive/src/file/mod.rs | 677 +----------------- crates/nvisy-archive/src/handler/mod.rs | 580 +-------------- crates/nvisy-archive/src/handler/sevenz.rs | 46 ++ crates/nvisy-archive/src/handler/tar.rs | 224 ++++++ .../nvisy-archive/src/handler/tar_handler.rs | 593 --------------- crates/nvisy-archive/src/handler/zip.rs | 126 ++++ .../nvisy-archive/src/handler/zip_handler.rs | 575 --------------- crates/nvisy-archive/src/lib.rs | 139 +--- crates/nvisy-archive/src/prelude.rs | 10 +- crates/nvisy-archive/src/registry/mod.rs | 408 +++++++++++ 15 files changed, 2097 insertions(+), 2666 deletions(-) create mode 100644 crates/nvisy-archive/src/error.rs create mode 100644 crates/nvisy-archive/src/file/archive_file.rs create mode 100644 crates/nvisy-archive/src/file/archive_handler.rs create mode 100644 crates/nvisy-archive/src/handler/sevenz.rs create mode 100644 crates/nvisy-archive/src/handler/tar.rs delete mode 100644 crates/nvisy-archive/src/handler/tar_handler.rs create mode 100644 crates/nvisy-archive/src/handler/zip.rs delete mode 100644 crates/nvisy-archive/src/handler/zip_handler.rs create mode 100644 crates/nvisy-archive/src/registry/mod.rs diff --git a/crates/nvisy-archive/Cargo.toml b/crates/nvisy-archive/Cargo.toml index 04815d5..f766307 100644 --- a/crates/nvisy-archive/Cargo.toml +++ b/crates/nvisy-archive/Cargo.toml @@ -41,8 +41,12 @@ tokio = { workspace = true, features = ["fs", "io-util", "rt"] } tempfile = { workspace = true } # Macros +derive_more = { workspace = true, features = ["debug", "display", "deref"] } strum = { workspace = true, features = ["derive"] } +# Logging +tracing = { workspace = true } + # Archive formats (optional) tar = { version = "0.4", optional = true } zip = { version = "7.1", optional = true } @@ -56,4 +60,3 @@ xz2 = { version = "0.1", optional = true } [dev-dependencies] tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } tokio-test = { workspace = true } -tempfile = { workspace = true } diff --git a/crates/nvisy-archive/src/error.rs b/crates/nvisy-archive/src/error.rs new file mode 100644 index 0000000..b07de08 --- /dev/null +++ b/crates/nvisy-archive/src/error.rs @@ -0,0 +1,134 @@ +//! Archive-specific error types and extensions +//! +//! This module provides error extension traits for creating archive-specific +//! errors and converting errors from underlying archive libraries. + +pub use nvisy_rt_core::error::{Error, ErrorResource, ErrorType, Result}; + +/// Extension trait for creating archive-specific errors +pub trait ArchiveErrorExt { + /// Create an unsupported format error + fn unsupported_format(format: impl Into) -> Error; + + /// Create an invalid archive error + fn invalid_archive(message: impl Into) -> Error; + + /// Create an entry not found error + fn entry_not_found(name: impl Into) -> Error; + + /// Create a permission denied error + fn archive_permission_denied(message: impl Into) -> Error; + + /// Create a corrupted archive error + fn corrupted(message: impl Into) -> Error; + + /// Create a resource limit error + fn archive_resource_limit(message: impl Into) -> Error; +} + +impl ArchiveErrorExt for Error { + fn unsupported_format(format: impl Into) -> Error { + Error::new(format!("Unsupported archive format: {}", format.into())) + .with_type(ErrorType::Runtime) + .with_resource(ErrorResource::Archive) + } + + fn invalid_archive(message: impl Into) -> Error { + Error::new(format!("Invalid archive: {}", message.into())) + .with_type(ErrorType::Runtime) + .with_resource(ErrorResource::Archive) + } + + fn entry_not_found(name: impl Into) -> Error { + Error::new(format!("Entry not found: {}", name.into())) + .with_type(ErrorType::Runtime) + .with_resource(ErrorResource::Archive) + } + + fn archive_permission_denied(message: impl Into) -> Error { + Error::new(format!("Permission denied: {}", message.into())) + .with_type(ErrorType::Runtime) + .with_resource(ErrorResource::Archive) + } + + fn corrupted(message: impl Into) -> Error { + Error::new(format!("Corrupted archive: {}", message.into())) + .with_type(ErrorType::Runtime) + .with_resource(ErrorResource::Archive) + } + + fn archive_resource_limit(message: impl Into) -> Error { + Error::new(format!("Resource limit exceeded: {}", message.into())) + .with_type(ErrorType::Runtime) + .with_resource(ErrorResource::Archive) + } +} + +/// Extension to convert zip::Result to our Result type +#[cfg(feature = "zip")] +pub(crate) trait ZipResultExt { + fn map_zip_err(self) -> Result; +} + +#[cfg(feature = "zip")] +impl ZipResultExt for std::result::Result { + fn map_zip_err(self) -> Result { + self.map_err(|e| { + Error::from_source("ZIP operation failed", e) + .with_type(ErrorType::Runtime) + .with_resource(ErrorResource::Archive) + }) + } +} + +/// Extension to convert tar I/O errors to our Result type +#[cfg(feature = "tar")] +pub(crate) trait TarResultExt { + fn map_tar_err(self) -> Result; +} + +#[cfg(feature = "tar")] +impl TarResultExt for std::result::Result { + fn map_tar_err(self) -> Result { + self.map_err(|e| { + Error::from_source("TAR operation failed", e) + .with_type(ErrorType::Runtime) + .with_resource(ErrorResource::Archive) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_creation() { + let error = ::unsupported_format("custom"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::invalid_archive("test message"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::entry_not_found("missing.txt"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::archive_permission_denied("access denied"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::corrupted("bad data"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::archive_resource_limit("too big"); + assert_eq!(error.resource, ErrorResource::Archive); + } + + #[test] + fn test_error_display() { + let error = ::unsupported_format("test"); + assert!(error.to_string().contains("Unsupported archive format")); + + let error = ::invalid_archive("bad archive"); + assert!(error.to_string().contains("Invalid archive")); + } +} diff --git a/crates/nvisy-archive/src/file/archive_file.rs b/crates/nvisy-archive/src/file/archive_file.rs new file mode 100644 index 0000000..614a3af --- /dev/null +++ b/crates/nvisy-archive/src/file/archive_file.rs @@ -0,0 +1,391 @@ +//! Archive file handling for content processing +//! +//! This module provides [`ArchiveFile`] for working with archive files, +//! including extraction to temporary directories and repacking from various sources. + +use std::ffi::OsStr; +use std::io::Cursor; +use std::path::{Path, PathBuf}; + +use bytes::Bytes; +use derive_more::Debug; +use tokio::fs; + +use super::ArchiveHandler; +use super::archive_type::ArchiveType; +use crate::{ArchiveErrorExt, Error, Result, handler}; + +pub use nvisy_rt_core::io::ContentData; +pub use nvisy_rt_core::path::ContentSource; + +/// Represents an archive file that can be loaded from various sources +/// +/// This struct encapsulates an archive and provides methods for +/// extracting its contents to a temporary directory for processing. +/// It integrates with nvisy-core's `ContentData` and `ContentSource` +/// for content tracking and integrity verification. +#[derive(Debug)] +pub struct ArchiveFile { + content_source: ContentSource, + archive_type: ArchiveType, + source: ArchiveSource, +} + +#[derive(Debug)] +enum ArchiveSource { + Path(PathBuf), + #[debug("ContentData({} bytes)", _0.size())] + ContentData(ContentData), +} + +impl ArchiveFile { + /// Create a new archive file from a file path + /// + /// The archive type is automatically detected from the file extension. + /// A new `ContentSource` is generated to track this archive. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_rt_archive::file::ArchiveFile; + /// + /// let archive = ArchiveFile::from_path("archive.zip")?; + /// # Ok::<(), nvisy_rt_archive::Error>(()) + /// ``` + pub fn from_path(path: impl AsRef) -> Result { + let path = path.as_ref(); + let extension = path + .extension() + .ok_or_else(|| Error::invalid_archive("No file extension found"))?; + + let full_name = path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or(""); + + let archive_type = if full_name.contains(".tar.") { + if let Some(pos) = full_name.find(".tar.") { + let compound_ext = &full_name[pos + 1..]; + ArchiveType::from_file_extension(OsStr::new(compound_ext)) + } else { + None + } + } else { + None + } + .or_else(|| ArchiveType::from_file_extension(extension)) + .ok_or_else(|| Error::unsupported_format(extension.to_string_lossy().to_string()))?; + + Ok(Self { + content_source: ContentSource::new(), + archive_type, + source: ArchiveSource::Path(path.to_path_buf()), + }) + } + + /// Create a new archive file from ContentData + /// + /// This preserves the content source from the provided ContentData, + /// maintaining content lineage tracking. + /// + /// # Example + /// + /// ``` + /// use nvisy_rt_archive::prelude::*; + /// + /// let data = ContentData::from(vec![0x50, 0x4B, 0x03, 0x04]); // ZIP signature + /// let archive = ArchiveFile::from_content_data(ArchiveType::Zip, data); + /// ``` + pub fn from_content_data(archive_type: ArchiveType, content_data: ContentData) -> Self { + Self { + content_source: content_data.content_source, + archive_type, + source: ArchiveSource::ContentData(content_data), + } + } + + /// Create a new archive file from raw bytes with explicit archive type + /// + /// A new `ContentSource` is generated to track this archive. + /// + /// # Example + /// + /// ``` + /// use nvisy_rt_archive::file::{ArchiveFile, ArchiveType}; + /// + /// let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature + /// let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data); + /// ``` + pub fn from_bytes(archive_type: ArchiveType, data: impl Into) -> Self { + let content_data = ContentData::from(data.into()); + Self { + content_source: content_data.content_source, + archive_type, + source: ArchiveSource::ContentData(content_data), + } + } + + /// Create an archive with explicit type (useful for ambiguous extensions) + pub fn with_archive_type(mut self, archive_type: ArchiveType) -> Self { + self.archive_type = archive_type; + self + } + + /// Get the content source identifier for this archive + pub fn content_source(&self) -> ContentSource { + self.content_source + } + + /// Get the archive type + pub fn archive_type(&self) -> ArchiveType { + self.archive_type + } + + /// Check if the archive source exists (only meaningful for file-based sources) + pub async fn exists(&self) -> bool { + match &self.source { + ArchiveSource::Path(path) => fs::try_exists(path).await.unwrap_or(false), + ArchiveSource::ContentData(_) => true, + } + } + + /// Get the file path (if loaded from a file) + pub fn path(&self) -> Option<&Path> { + match &self.source { + ArchiveSource::Path(path) => Some(path), + ArchiveSource::ContentData(_) => None, + } + } + + /// Get the size of the archive data in bytes + pub async fn size(&self) -> Result { + match &self.source { + ArchiveSource::Path(path) => { + let metadata = fs::metadata(path).await?; + Ok(metadata.len()) + } + ArchiveSource::ContentData(data) => Ok(data.size() as u64), + } + } + + /// Get the SHA256 hash of the archive content + pub async fn sha256(&self) -> Result { + match &self.source { + ArchiveSource::Path(path) => { + let data = fs::read(path).await?; + let content_data = ContentData::from(data); + Ok(content_data.sha256_hex()) + } + ArchiveSource::ContentData(data) => Ok(data.sha256_hex()), + } + } + + /// Extract the archive to a temporary directory + /// + /// Returns an [`ArchiveHandler`] handle for managing the extracted contents. + /// The directory is automatically cleaned up when the handle is dropped. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_rt_archive::file::{ArchiveFile, ArchiveHandler}; + /// + /// # async fn example() -> nvisy_rt_archive::Result<()> { + /// let archive = ArchiveFile::from_path("archive.zip")?; + /// let handler: ArchiveHandler = archive.unpack().await?; + /// + /// for file_path in handler.file_paths() { + /// println!("Found file: {:?}", file_path); + /// } + /// # Ok(()) + /// # } + /// ``` + pub async fn unpack(self) -> Result { + let temp_dir = tempfile::tempdir().map_err(|e| { + Error::invalid_archive(format!("Failed to create temporary directory: {}", e)) + })?; + let temp_path = temp_dir.keep(); + + let files = self.extract_to(&temp_path).await?; + + Ok(ArchiveHandler::new( + self.content_source, + self.archive_type, + self.path().map(|p| p.to_path_buf()), + temp_path, + files, + )) + } + + /// Extract the archive to a specific directory + /// + /// The target directory must already exist. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_rt_archive::file::ArchiveFile; + /// use std::path::Path; + /// + /// # async fn example() -> nvisy_rt_archive::Result<()> { + /// let archive = ArchiveFile::from_path("archive.zip")?; + /// let _handler = archive.unpack_to(Path::new("/tmp/my-extraction")).await?; + /// # Ok(()) + /// # } + /// ``` + pub async fn unpack_to(self, target_dir: impl AsRef) -> Result { + let target_dir = target_dir.as_ref(); + + if !target_dir.exists() { + return Err(Error::invalid_archive(format!( + "Target directory does not exist: {}", + target_dir.display() + ))); + } + + let files = self.extract_to(target_dir).await?; + + Ok(ArchiveHandler::new( + self.content_source, + self.archive_type, + self.path().map(|p| p.to_path_buf()), + target_dir.to_path_buf(), + files, + )) + } + + async fn extract_to(&self, target_dir: &Path) -> Result> { + let data = self.read_data().await?; + let archive_type = self.archive_type; + let target_dir = target_dir.to_path_buf(); + + tokio::task::spawn_blocking(move || { + let cursor = Cursor::new(data); + extract_archive(cursor, &target_dir, archive_type) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))? + } + + async fn read_data(&self) -> Result> { + match &self.source { + ArchiveSource::Path(path) => Ok(fs::read(path).await?), + ArchiveSource::ContentData(data) => Ok(data.as_bytes().to_vec()), + } + } +} + +fn extract_archive( + cursor: Cursor>, + target_dir: &Path, + archive_type: ArchiveType, +) -> Result> { + match archive_type { + #[cfg(feature = "zip")] + ArchiveType::Zip => handler::zip::unpack(cursor, target_dir), + #[cfg(feature = "tar")] + ArchiveType::Tar => handler::tar::unpack(cursor, target_dir), + #[cfg(all(feature = "tar", feature = "gzip"))] + ArchiveType::TarGz => handler::tar::unpack_gz(cursor, target_dir), + #[cfg(all(feature = "tar", feature = "bzip2"))] + ArchiveType::TarBz2 => handler::tar::unpack_bz2(cursor, target_dir), + #[cfg(all(feature = "tar", feature = "xz"))] + ArchiveType::TarXz => handler::tar::unpack_xz(cursor, target_dir), + #[cfg(feature = "gzip")] + ArchiveType::Gz => extract_single_compressed(cursor, target_dir, "gz"), + #[cfg(feature = "bzip2")] + ArchiveType::Bz2 => extract_single_compressed(cursor, target_dir, "bz2"), + #[cfg(feature = "xz")] + ArchiveType::Xz => extract_single_compressed(cursor, target_dir, "xz"), + #[cfg(feature = "sevenz")] + ArchiveType::SevenZ => handler::sevenz::unpack(cursor, target_dir), + } +} + +#[cfg(any(feature = "gzip", feature = "bzip2", feature = "xz"))] +fn extract_single_compressed( + cursor: Cursor>, + target_dir: &Path, + compression: &str, +) -> Result> { + use std::io::Read; + + let content: Vec = match compression { + #[cfg(feature = "gzip")] + "gz" => { + let mut decoder = flate2::read::GzDecoder::new(cursor); + let mut buf = Vec::new(); + decoder.read_to_end(&mut buf)?; + buf + } + #[cfg(feature = "bzip2")] + "bz2" => { + let mut decoder = bzip2::read::BzDecoder::new(cursor); + let mut buf = Vec::new(); + decoder.read_to_end(&mut buf)?; + buf + } + #[cfg(feature = "xz")] + "xz" => { + let mut decoder = xz2::read::XzDecoder::new(cursor); + let mut buf = Vec::new(); + decoder.read_to_end(&mut buf)?; + buf + } + _ => return Err(Error::unsupported_format(compression)), + }; + + let output_path = target_dir.join("extracted"); + std::fs::write(&output_path, content)?; + Ok(vec![output_path]) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_archive_file_from_bytes() { + let data = vec![0x50, 0x4B, 0x03, 0x04]; + let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data); + assert_eq!(archive.archive_type(), ArchiveType::Zip); + assert!(archive.path().is_none()); + assert!(!archive.content_source().as_uuid().is_nil()); + } + + #[test] + fn test_archive_file_from_content_data() { + let content_data = ContentData::from(vec![0x50, 0x4B, 0x03, 0x04]); + let original_source = content_data.content_source; + let archive = ArchiveFile::from_content_data(ArchiveType::Zip, content_data); + assert_eq!(archive.content_source(), original_source); + } + + #[test] + fn test_archive_file_from_path() -> Result<()> { + let archive = ArchiveFile::from_path("test.zip")?; + assert_eq!(archive.archive_type(), ArchiveType::Zip); + assert!(archive.path().is_some()); + Ok(()) + } + + #[test] + fn test_compound_extension() -> Result<()> { + let archive = ArchiveFile::from_path("test.tar.gz")?; + assert_eq!(archive.archive_type(), ArchiveType::TarGz); + Ok(()) + } + + #[test] + fn test_unsupported_extension() { + let result = ArchiveFile::from_path("test.unknown"); + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_memory_size() { + let data = vec![1, 2, 3, 4, 5]; + let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data); + assert_eq!(archive.size().await.unwrap(), 5); + } +} diff --git a/crates/nvisy-archive/src/file/archive_handler.rs b/crates/nvisy-archive/src/file/archive_handler.rs new file mode 100644 index 0000000..8a30990 --- /dev/null +++ b/crates/nvisy-archive/src/file/archive_handler.rs @@ -0,0 +1,642 @@ +//! Archive handler for managing extracted archive contents +//! +//! This module provides [`ArchiveHandler`] for managing extracted archive +//! contents with automatic cleanup on drop. + +use std::fs; +use std::path::{Path, PathBuf}; + +use derive_more::{Debug, Deref}; + +use super::ArchiveFile; +use super::ArchiveType; +use crate::{ArchiveErrorExt, Error, Result, handler}; + +pub use nvisy_rt_core::fs::ContentKind; +pub use nvisy_rt_core::path::ContentSource; + +/// Handle to an extracted archive +/// +/// This struct manages an extracted archive directory and ensures cleanup +/// when the handle is dropped. It provides methods for accessing and +/// manipulating extracted files. +/// +/// # Cleanup Behavior +/// +/// By default, the extraction directory is removed when this handle is +/// dropped. Use [`ArchiveHandler::persist`] to prevent automatic cleanup +/// if you want to keep the extracted files. +/// +/// # Example +/// +/// ```rust,ignore +/// use nvisy_rt_archive::ArchiveFile; +/// +/// # async fn example() -> nvisy_rt_archive::Result<()> { +/// let archive = ArchiveFile::from_path("archive.zip")?; +/// let handler = archive.unpack().await?; +/// +/// // Access extracted files +/// println!("Extracted {} files to {:?}", handler.file_count(), handler.path()); +/// +/// for path in handler.file_paths() { +/// println!(" - {:?}", path); +/// } +/// +/// // Directory is cleaned up when `handler` is dropped +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug, Deref)] +pub struct ArchiveHandler { + /// Unique identifier for this archive content + pub content_source: ContentSource, + /// Type of the original archive + pub archive_type: ArchiveType, + /// Original archive file path (if loaded from file) + pub original_path: Option, + /// Path to the extraction directory + #[deref] + path: PathBuf, + /// Files found in the archive + files: Vec, + /// Whether to clean up on drop + should_cleanup: bool, +} + +impl ArchiveHandler { + /// Create a new archive handler + /// + /// This is typically called internally by `ArchiveFile::unpack()` or + /// `ArchiveRegistry::extract()`. + pub fn new( + content_source: ContentSource, + archive_type: ArchiveType, + original_path: Option, + path: PathBuf, + files: Vec, + ) -> Self { + Self { + content_source, + archive_type, + original_path, + path, + files, + should_cleanup: true, + } + } + + /// Get the path to the extraction directory + pub fn path(&self) -> &Path { + &self.path + } + + /// Get the archive type + pub fn archive_type(&self) -> ArchiveType { + self.archive_type + } + + /// Get the number of files in the extracted archive + pub fn file_count(&self) -> usize { + self.files.len() + } + + /// Check if the extracted archive is empty + pub fn is_empty(&self) -> bool { + self.files.is_empty() + } + + /// Get a list of all file paths in the extracted archive + pub fn file_paths(&self) -> &[PathBuf] { + &self.files + } + + /// Get relative paths of all files (relative to extraction directory) + pub fn relative_file_paths(&self) -> Result> { + self.files + .iter() + .map(|path| { + path.strip_prefix(&self.path) + .map(|p| p.to_path_buf()) + .map_err(|e| Error::invalid_archive(format!("Invalid file path: {}", e))) + }) + .collect() + } + + /// Find files matching a specific predicate + pub fn find_files(&self, predicate: impl Fn(&PathBuf) -> bool) -> Vec<&PathBuf> { + self.files.iter().filter(|path| predicate(path)).collect() + } + + /// Find files with specific extension + pub fn find_files_by_extension(&self, extension: &str) -> Vec<&PathBuf> { + self.find_files(|path| { + path.extension() + .and_then(|ext| ext.to_str()) + .map(|ext| ext.eq_ignore_ascii_case(extension)) + .unwrap_or(false) + }) + } + + /// Find files matching a specific content kind + pub fn find_files_by_kind(&self, kind: ContentKind) -> Vec<&PathBuf> { + self.find_files(|path| self.content_kind_for_path(path) == kind) + } + + /// Get the content kind for a file path based on its extension + pub fn content_kind_for_path(&self, path: &Path) -> ContentKind { + path.extension() + .and_then(|ext| ext.to_str()) + .map(content_kind_from_extension) + .unwrap_or_default() + } + + /// Check if a specific file exists in the archive + pub fn contains_file(&self, relative_path: impl AsRef) -> bool { + let target_path = self.path.join(relative_path); + self.files.contains(&target_path) + } + + /// Read a file from the extracted archive + pub async fn read_file(&self, relative_path: impl AsRef) -> Result> { + let target_path = self.path.join(relative_path); + if !self.files.contains(&target_path) { + return Err(Error::entry_not_found( + target_path.to_string_lossy().to_string(), + )); + } + tokio::fs::read(&target_path).await.map_err(Into::into) + } + + /// Write a file to the extracted archive + pub async fn write_file( + &mut self, + relative_path: impl AsRef, + content: &[u8], + ) -> Result<()> { + let target_path = self.path.join(relative_path.as_ref()); + + // Create parent directories if they don't exist + if let Some(parent) = target_path.parent() { + tokio::fs::create_dir_all(parent).await?; + } + + tokio::fs::write(&target_path, content).await?; + + // Add to files list if not already present + if !self.files.contains(&target_path) { + self.files.push(target_path); + self.files.sort(); + } + + Ok(()) + } + + /// Refresh the file list by scanning the extraction directory + pub fn refresh_file_list(&mut self) -> Result<()> { + self.files = scan_files(&self.path)?; + Ok(()) + } + + /// Clean up the extraction directory immediately + /// + /// This method removes the extraction directory and all its contents. + /// After calling this method, the handler will no longer clean up on drop. + /// + /// # Errors + /// + /// Returns an error if the directory cannot be removed. + pub fn cleanup(&mut self) -> Result<()> { + if !self.path.exists() { + self.should_cleanup = false; + return Ok(()); + } + + fs::remove_dir_all(&self.path).map_err(|e| { + Error::invalid_archive(format!( + "Failed to clean up extraction directory '{}': {}", + self.path.display(), + e + )) + })?; + + self.should_cleanup = false; + self.files.clear(); + Ok(()) + } + + /// Persist the extraction directory + /// + /// Calling this method prevents the extraction directory from being + /// cleaned up when this handle is dropped. The directory will remain + /// on disk until manually deleted. + /// + /// Returns the path to the extraction directory. + pub fn persist(mut self) -> PathBuf { + self.should_cleanup = false; + self.path.clone() + } + + /// Create a new archive from the current directory contents + /// + /// This method packages all files in the extraction directory back into + /// an archive file at the specified location. + /// + /// # Errors + /// + /// Returns an error if: + /// - The target directory cannot be created + /// - Archive creation fails + /// - File I/O operations fail + pub async fn pack(&self, target_path: impl AsRef) -> Result { + let target_path = target_path.as_ref(); + + // Ensure parent directory exists + if let Some(parent) = target_path.parent() { + tokio::fs::create_dir_all(parent).await.map_err(|e| { + Error::invalid_archive(format!("Failed to create parent directory: {}", e)) + })?; + } + + // Determine archive type from target path extension or use original type + let archive_type = target_path + .extension() + .and_then(ArchiveType::from_file_extension) + .unwrap_or(self.archive_type); + + let source_dir = self.path.clone(); + let target = target_path.to_path_buf(); + + tokio::task::spawn_blocking(move || match archive_type { + #[cfg(feature = "zip")] + ArchiveType::Zip => handler::zip::pack(&source_dir, &target), + + #[cfg(feature = "tar")] + ArchiveType::Tar | ArchiveType::TarGz | ArchiveType::TarBz2 | ArchiveType::TarXz => { + handler::tar::pack(&source_dir, &target, archive_type) + } + + _ => Err(Error::unsupported_format(format!( + "Packing format not supported: {:?}", + archive_type + ))), + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + ArchiveFile::from_path(target_path) + } +} + +impl Drop for ArchiveHandler { + fn drop(&mut self) { + if !self.should_cleanup || !self.path.exists() { + return; + } + + if let Err(e) = fs::remove_dir_all(&self.path) { + tracing::warn!( + target: "nvisy_rt_archive", + path = %self.path.display(), + error = %e, + "Failed to clean up extraction directory" + ); + } + } +} + +/// Iterator implementation for ArchiveHandler +impl<'a> IntoIterator for &'a ArchiveHandler { + type IntoIter = std::slice::Iter<'a, PathBuf>; + type Item = &'a PathBuf; + + fn into_iter(self) -> Self::IntoIter { + self.files.iter() + } +} + +/// Detect content kind from file extension +fn content_kind_from_extension(extension: &str) -> ContentKind { + let ext = extension.to_lowercase(); + match ext.as_str() { + // Text formats + "txt" | "text" | "md" | "markdown" | "rst" | "xml" | "json" | "yaml" | "yml" | "toml" + | "ini" | "cfg" | "conf" | "log" => ContentKind::Text, + + // Document formats + "pdf" | "doc" | "docx" | "rtf" | "odt" | "pages" => ContentKind::Document, + + // Spreadsheet formats + "csv" | "tsv" | "xls" | "xlsx" | "ods" | "numbers" => ContentKind::Spreadsheet, + + // Image formats + "jpg" | "jpeg" | "png" | "gif" | "bmp" | "svg" | "webp" | "ico" | "tiff" | "tif" => { + ContentKind::Image + } + + // Archive formats + "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "tgz" | "tbz2" | "txz" => { + ContentKind::Archive + } + + _ => ContentKind::Unknown, + } +} + +/// Scan the directory for files recursively +pub(crate) fn scan_files(dir: &Path) -> Result> { + let mut files = Vec::new(); + let entries = fs::read_dir(dir)?; + + for entry in entries { + let entry = entry?; + let path = entry.path(); + + if path.is_file() { + files.push(path); + } else if path.is_dir() { + let mut sub_files = scan_files(&path)?; + files.append(&mut sub_files); + } + } + + files.sort(); + Ok(files) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_archive_handler_creation() { + let temp_dir = tempfile::tempdir().unwrap(); + let files = vec![PathBuf::from("test.txt")]; + + let handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + Some(PathBuf::from("test.zip")), + temp_dir.path().to_path_buf(), + files.clone(), + ); + + assert_eq!(handler.archive_type, ArchiveType::Zip); + assert_eq!(handler.file_count(), 1); + assert!(!handler.is_empty()); + } + + #[test] + fn test_empty_archive_handler() { + let temp_dir = tempfile::tempdir().unwrap(); + + let handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + temp_dir.path().to_path_buf(), + vec![], + ); + + assert_eq!(handler.file_count(), 0); + assert!(handler.is_empty()); + } + + #[test] + fn test_find_files_by_extension() { + let temp_dir = tempfile::tempdir().unwrap(); + let files = vec![ + PathBuf::from("test.txt"), + PathBuf::from("data.json"), + PathBuf::from("image.png"), + ]; + + let handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + temp_dir.path().to_path_buf(), + files, + ); + + let txt_files = handler.find_files_by_extension("txt"); + assert_eq!(txt_files.len(), 1); + + let json_files = handler.find_files_by_extension("json"); + assert_eq!(json_files.len(), 1); + } + + #[test] + fn test_iterator() { + let temp_dir = tempfile::tempdir().unwrap(); + let files = vec![PathBuf::from("file1.txt"), PathBuf::from("file2.txt")]; + + let handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + temp_dir.path().to_path_buf(), + files, + ); + + let collected: Vec<&PathBuf> = (&handler).into_iter().collect(); + assert_eq!(collected.len(), 2); + } + + #[tokio::test] + async fn test_write_and_read_file() { + let temp_dir = tempfile::tempdir().unwrap(); + let mut handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + temp_dir.path().to_path_buf(), + vec![], + ); + + let content = b"Hello, World!"; + handler.write_file("test.txt", content).await.unwrap(); + + assert!(handler.contains_file("test.txt")); + let read_content = handler.read_file("test.txt").await.unwrap(); + assert_eq!(read_content, content); + } + + #[test] + fn test_find_files_by_kind() { + let temp_dir = tempfile::tempdir().unwrap(); + let files = vec![ + PathBuf::from("document.pdf"), + PathBuf::from("data.csv"), + PathBuf::from("image.png"), + PathBuf::from("archive.zip"), + PathBuf::from("notes.txt"), + ]; + + let handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + temp_dir.path().to_path_buf(), + files, + ); + + let docs = handler.find_files_by_kind(ContentKind::Document); + assert_eq!(docs.len(), 1); + assert!(docs[0].to_string_lossy().contains("document.pdf")); + + let spreadsheets = handler.find_files_by_kind(ContentKind::Spreadsheet); + assert_eq!(spreadsheets.len(), 1); + assert!(spreadsheets[0].to_string_lossy().contains("data.csv")); + + let images = handler.find_files_by_kind(ContentKind::Image); + assert_eq!(images.len(), 1); + + let text = handler.find_files_by_kind(ContentKind::Text); + assert_eq!(text.len(), 1); + } + + #[test] + fn test_content_kind_for_path() { + let temp_dir = tempfile::tempdir().unwrap(); + let handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + temp_dir.path().to_path_buf(), + vec![], + ); + + assert_eq!( + handler.content_kind_for_path(Path::new("test.pdf")), + ContentKind::Document + ); + assert_eq!( + handler.content_kind_for_path(Path::new("data.csv")), + ContentKind::Spreadsheet + ); + assert_eq!( + handler.content_kind_for_path(Path::new("image.png")), + ContentKind::Image + ); + assert_eq!( + handler.content_kind_for_path(Path::new("notes.txt")), + ContentKind::Text + ); + assert_eq!( + handler.content_kind_for_path(Path::new("archive.zip")), + ContentKind::Archive + ); + assert_eq!( + handler.content_kind_for_path(Path::new("no_extension")), + ContentKind::Unknown + ); + } + + #[test] + fn test_drop_cleanup() { + let temp_dir = tempfile::tempdir().unwrap(); + let extract_path = temp_dir.path().join("test-extraction"); + fs::create_dir_all(&extract_path).unwrap(); + + // Create a file in the directory + fs::write(extract_path.join("test.txt"), b"hello").unwrap(); + + { + let _handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + extract_path.clone(), + vec![extract_path.join("test.txt")], + ); + // Handle dropped here + } + + // Directory should be cleaned up + assert!(!extract_path.exists()); + } + + #[test] + fn test_persist() { + let temp_dir = tempfile::tempdir().unwrap(); + let extract_path = temp_dir.path().join("test-extraction"); + fs::create_dir_all(&extract_path).unwrap(); + + let path = { + let handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + extract_path.clone(), + vec![], + ); + handler.persist() + }; + + // Directory should still exist after persist + assert!(path.exists()); + } + + #[test] + fn test_cleanup() { + let temp_dir = tempfile::tempdir().unwrap(); + let extract_path = temp_dir.path().join("test-extraction"); + fs::create_dir_all(&extract_path).unwrap(); + + // Create a file in the directory + fs::write(extract_path.join("test.txt"), b"hello").unwrap(); + + let mut handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + extract_path.clone(), + vec![extract_path.join("test.txt")], + ); + + // Cleanup should succeed + handler.cleanup().unwrap(); + + // Directory should be removed + assert!(!extract_path.exists()); + + // Files list should be cleared + assert!(handler.is_empty()); + + // Drop should not try to cleanup again (no error) + } + + #[test] + fn test_cleanup_already_removed() { + let temp_dir = tempfile::tempdir().unwrap(); + let extract_path = temp_dir.path().join("non-existent"); + + let mut handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + extract_path, + vec![], + ); + + // Cleanup should succeed even if directory doesn't exist + handler.cleanup().unwrap(); + } + + #[test] + fn test_deref_to_path() { + let temp_dir = tempfile::tempdir().unwrap(); + let handler = ArchiveHandler::new( + ContentSource::new(), + ArchiveType::Zip, + None, + temp_dir.path().to_path_buf(), + vec![], + ); + + // Test that Deref works - we can use PathBuf methods directly + let path: &Path = &handler; + assert!(path.exists()); + } +} diff --git a/crates/nvisy-archive/src/file/archive_type.rs b/crates/nvisy-archive/src/file/archive_type.rs index ddd3cba..99aaa34 100644 --- a/crates/nvisy-archive/src/file/archive_type.rs +++ b/crates/nvisy-archive/src/file/archive_type.rs @@ -1,140 +1,123 @@ //! Archive type definitions and utilities -//! -//! This module defines the different archive formats supported by the library -//! and provides utilities for working with archive types. use std::ffi::OsStr; -use strum::{AsRefStr, Display, EnumIter, EnumString}; - /// Supported archive types /// -/// This enum represents the different archive formats that can be processed. -/// It provides methods to determine the archive type from file extensions -/// and to get the supported extensions for each type. +/// Only variants for enabled features are available at compile time. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -#[derive(AsRefStr, Display, EnumIter, EnumString)] +#[non_exhaustive] pub enum ArchiveType { - /// ZIP archive format - #[strum(serialize = "ZIP")] + #[cfg(feature = "zip")] Zip, - /// TAR archive format (uncompressed) - #[strum(serialize = "TAR")] + #[cfg(feature = "tar")] Tar, - /// GZIP compressed TAR archive - #[strum(serialize = "TAR.GZ")] + #[cfg(all(feature = "tar", feature = "gzip"))] TarGz, - /// BZIP2 compressed TAR archive - #[strum(serialize = "TAR.BZ2")] + #[cfg(all(feature = "tar", feature = "bzip2"))] TarBz2, - /// XZ compressed TAR archive - #[strum(serialize = "TAR.XZ")] + #[cfg(all(feature = "tar", feature = "xz"))] TarXz, - /// GZIP compression (single file) - #[strum(serialize = "GZIP")] + #[cfg(feature = "gzip")] Gz, - /// BZIP2 compression (single file) - #[strum(serialize = "BZIP2")] + #[cfg(feature = "bzip2")] Bz2, - /// XZ compression (single file) - #[strum(serialize = "XZ")] + #[cfg(feature = "xz")] Xz, - /// 7-Zip archive format - #[strum(serialize = "7Z")] + #[cfg(feature = "sevenz")] SevenZ, } impl ArchiveType { /// Determine archive type from file extension - /// - /// # Arguments - /// - /// * `extension` - File extension string (without the dot) - /// - /// # Returns - /// - /// `Some(ArchiveType)` if the extension is recognized, `None` otherwise. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsStr; - /// use nvisy_rt_archive::ArchiveType; - /// - /// assert_eq!(ArchiveType::from_file_extension(OsStr::new("zip")), Some(ArchiveType::Zip)); - /// assert_eq!(ArchiveType::from_file_extension(OsStr::new("tar.gz")), Some(ArchiveType::TarGz)); - /// assert_eq!(ArchiveType::from_file_extension(OsStr::new("unknown")), None); - /// ``` pub fn from_file_extension(extension: &OsStr) -> Option { - let extension_str = extension.to_str()?.to_lowercase(); - match extension_str.as_str() { + let ext = extension.to_str()?.to_lowercase(); + match ext.as_str() { + #[cfg(feature = "zip")] "zip" => Some(Self::Zip), + #[cfg(feature = "tar")] "tar" => Some(Self::Tar), + #[cfg(all(feature = "tar", feature = "gzip"))] "tar.gz" | "tgz" => Some(Self::TarGz), + #[cfg(all(feature = "tar", feature = "bzip2"))] "tar.bz2" | "tbz2" | "tb2" => Some(Self::TarBz2), + #[cfg(all(feature = "tar", feature = "xz"))] "tar.xz" | "txz" => Some(Self::TarXz), + #[cfg(feature = "gzip")] "gz" | "gzip" => Some(Self::Gz), + #[cfg(feature = "bzip2")] "bz2" | "bzip2" => Some(Self::Bz2), + #[cfg(feature = "xz")] "xz" => Some(Self::Xz), + #[cfg(feature = "sevenz")] "7z" => Some(Self::SevenZ), _ => None, } } /// Get the file extensions associated with this archive type - /// - /// Returns a slice of static string references representing all - /// the file extensions that correspond to this archive type. - /// - /// # Examples - /// - /// ``` - /// use nvisy_rt_archive::ArchiveType; - /// - /// assert_eq!(ArchiveType::Zip.file_extensions(), &["zip"]); - /// assert_eq!(ArchiveType::TarGz.file_extensions(), &["tar.gz", "tgz"]); - /// ``` pub fn file_extensions(&self) -> &'static [&'static str] { match self { + #[cfg(feature = "zip")] Self::Zip => &["zip"], + #[cfg(feature = "tar")] Self::Tar => &["tar"], + #[cfg(all(feature = "tar", feature = "gzip"))] Self::TarGz => &["tar.gz", "tgz"], + #[cfg(all(feature = "tar", feature = "bzip2"))] Self::TarBz2 => &["tar.bz2", "tbz2", "tb2"], + #[cfg(all(feature = "tar", feature = "xz"))] Self::TarXz => &["tar.xz", "txz"], + #[cfg(feature = "gzip")] Self::Gz => &["gz", "gzip"], + #[cfg(feature = "bzip2")] Self::Bz2 => &["bz2", "bzip2"], + #[cfg(feature = "xz")] Self::Xz => &["xz"], + #[cfg(feature = "sevenz")] Self::SevenZ => &["7z"], } } /// Get the primary file extension for this archive type - /// - /// Returns the most common/preferred file extension for this archive type. - /// - /// # Examples - /// - /// ``` - /// use nvisy_rt_archive::ArchiveType; - /// - /// assert_eq!(ArchiveType::Zip.primary_extension(), "zip"); - /// assert_eq!(ArchiveType::TarGz.primary_extension(), "tar.gz"); - /// ``` pub fn primary_extension(&self) -> &'static str { self.file_extensions()[0] } - /// Check if this archive type is a compressed TAR variant + /// Check if this archive type is a TAR variant pub fn is_tar_variant(&self) -> bool { - matches!(self, Self::Tar | Self::TarGz | Self::TarBz2 | Self::TarXz) + match self { + #[cfg(feature = "tar")] + Self::Tar => true, + #[cfg(all(feature = "tar", feature = "gzip"))] + Self::TarGz => true, + #[cfg(all(feature = "tar", feature = "bzip2"))] + Self::TarBz2 => true, + #[cfg(all(feature = "tar", feature = "xz"))] + Self::TarXz => true, + #[allow(unreachable_patterns)] + _ => false, + } } /// Check if this archive type supports multiple files pub fn supports_multiple_files(&self) -> bool { - matches!( - self, - Self::Zip | Self::Tar | Self::TarGz | Self::TarBz2 | Self::TarXz | Self::SevenZ - ) + match self { + #[cfg(feature = "zip")] + Self::Zip => true, + #[cfg(feature = "tar")] + Self::Tar => true, + #[cfg(all(feature = "tar", feature = "gzip"))] + Self::TarGz => true, + #[cfg(all(feature = "tar", feature = "bzip2"))] + Self::TarBz2 => true, + #[cfg(all(feature = "tar", feature = "xz"))] + Self::TarXz => true, + #[cfg(feature = "sevenz")] + Self::SevenZ => true, + #[allow(unreachable_patterns)] + _ => false, + } } } @@ -142,8 +125,9 @@ impl ArchiveType { mod tests { use super::*; + #[cfg(feature = "zip")] #[test] - fn test_archive_type_from_extension() { + fn test_zip() { assert_eq!( ArchiveType::from_file_extension(OsStr::new("zip")), Some(ArchiveType::Zip) @@ -152,10 +136,26 @@ mod tests { ArchiveType::from_file_extension(OsStr::new("ZIP")), Some(ArchiveType::Zip) ); + assert_eq!(ArchiveType::Zip.file_extensions(), &["zip"]); + assert_eq!(ArchiveType::Zip.primary_extension(), "zip"); + assert!(!ArchiveType::Zip.is_tar_variant()); + assert!(ArchiveType::Zip.supports_multiple_files()); + } + + #[cfg(feature = "tar")] + #[test] + fn test_tar() { assert_eq!( ArchiveType::from_file_extension(OsStr::new("tar")), Some(ArchiveType::Tar) ); + assert!(ArchiveType::Tar.is_tar_variant()); + assert!(ArchiveType::Tar.supports_multiple_files()); + } + + #[cfg(all(feature = "tar", feature = "gzip"))] + #[test] + fn test_tar_gz() { assert_eq!( ArchiveType::from_file_extension(OsStr::new("tar.gz")), Some(ArchiveType::TarGz) @@ -164,57 +164,36 @@ mod tests { ArchiveType::from_file_extension(OsStr::new("tgz")), Some(ArchiveType::TarGz) ); - assert_eq!( - ArchiveType::from_file_extension(OsStr::new("unknown")), - None - ); - } - - #[test] - fn test_archive_type_extensions() { - assert_eq!(ArchiveType::Zip.file_extensions(), &["zip"]); - assert_eq!(ArchiveType::TarGz.file_extensions(), &["tar.gz", "tgz"]); - assert!(ArchiveType::TarBz2.file_extensions().contains(&"tar.bz2")); - } - - #[test] - fn test_archive_type_primary_extension() { - assert_eq!(ArchiveType::Zip.primary_extension(), "zip"); - assert_eq!(ArchiveType::TarGz.primary_extension(), "tar.gz"); - } - - #[test] - fn test_archive_type_variants() { - assert!(ArchiveType::Tar.is_tar_variant()); assert!(ArchiveType::TarGz.is_tar_variant()); - assert!(!ArchiveType::Zip.is_tar_variant()); - assert!(!ArchiveType::Gz.is_tar_variant()); } + #[cfg(feature = "gzip")] #[test] - fn test_archive_type_multiple_files() { - assert!(ArchiveType::Zip.supports_multiple_files()); - assert!(ArchiveType::Tar.supports_multiple_files()); - assert!(ArchiveType::SevenZ.supports_multiple_files()); + fn test_gz() { + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("gz")), + Some(ArchiveType::Gz) + ); + assert!(!ArchiveType::Gz.is_tar_variant()); assert!(!ArchiveType::Gz.supports_multiple_files()); - assert!(!ArchiveType::Bz2.supports_multiple_files()); } + #[cfg(feature = "sevenz")] #[test] - fn test_archive_type_display() { - assert_eq!(ArchiveType::Zip.to_string(), "ZIP"); - assert_eq!(ArchiveType::TarGz.to_string(), "TAR.GZ"); - assert_eq!(ArchiveType::SevenZ.to_string(), "7Z"); - } - - #[test] - fn test_archive_type_7z() { + fn test_7z() { assert_eq!( ArchiveType::from_file_extension(OsStr::new("7z")), Some(ArchiveType::SevenZ) ); - assert_eq!(ArchiveType::SevenZ.file_extensions(), &["7z"]); - assert_eq!(ArchiveType::SevenZ.primary_extension(), "7z"); assert!(!ArchiveType::SevenZ.is_tar_variant()); + assert!(ArchiveType::SevenZ.supports_multiple_files()); + } + + #[test] + fn test_unknown() { + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("unknown")), + None + ); } } diff --git a/crates/nvisy-archive/src/file/mod.rs b/crates/nvisy-archive/src/file/mod.rs index 9faa187..a43e6c3 100644 --- a/crates/nvisy-archive/src/file/mod.rs +++ b/crates/nvisy-archive/src/file/mod.rs @@ -3,677 +3,10 @@ //! This module provides functionality for working with archive files, //! including extraction to temporary directories and repacking from various sources. -pub mod archive_type; - -use std::ffi::OsStr; -use std::io::Cursor; -use std::path::{Path, PathBuf}; +mod archive_file; +mod archive_handler; +mod archive_type; +pub use archive_file::{ArchiveFile, ContentData, ContentSource}; +pub use archive_handler::{ArchiveHandler, ContentKind}; pub use archive_type::ArchiveType; -use bytes::Bytes; -use tempfile::TempDir; -use tokio::fs; - -#[cfg(feature = "zip")] -use crate::ZipResultExt; -use crate::handler::ArchiveHandler; -use crate::{ArchiveErrorExt, ContentData, ContentSource, Error, Result}; - -/// Represents an archive file that can be loaded from various sources -/// -/// This struct encapsulates an archive and provides methods for -/// extracting its contents to a temporary directory for processing. -/// It integrates with nvisy-core's `ContentData` and `ContentSource` -/// for content tracking and integrity verification. -#[derive(Debug)] -pub struct ArchiveFile { - /// Unique identifier for this archive content - content_source: ContentSource, - /// Type of archive - archive_type: ArchiveType, - /// Source data for the archive - source: ArchiveSource, -} - -/// Internal representation of archive data sources -#[derive(Debug)] -enum ArchiveSource { - /// Archive loaded from a file path - Path(PathBuf), - /// Archive loaded from ContentData (memory with metadata) - ContentData(ContentData), -} - -impl ArchiveFile { - /// Create a new archive file from a file path - /// - /// The archive type is automatically detected from the file extension. - /// A new `ContentSource` is generated to track this archive. - /// - /// # Example - /// - /// ```no_run - /// use nvisy_rt_archive::ArchiveFile; - /// use std::path::PathBuf; - /// - /// let archive = ArchiveFile::from_path("archive.zip")?; - /// # Ok::<(), nvisy_rt_archive::Error>(()) - /// ``` - pub fn from_path(path: impl AsRef) -> Result { - let path = path.as_ref(); - let extension = path - .extension() - .ok_or_else(|| Error::invalid_archive("No file extension found"))?; - - // Handle compound extensions like .tar.gz - let full_name = path - .file_name() - .and_then(|name| name.to_str()) - .unwrap_or(""); - - let archive_type = if full_name.contains(".tar.") { - // Try to match compound extensions first - if let Some(pos) = full_name.find(".tar.") { - let compound_ext = &full_name[pos + 1..]; // Skip the dot - ArchiveType::from_file_extension(OsStr::new(compound_ext)) - } else { - None - } - } else { - None - } - .or_else(|| ArchiveType::from_file_extension(extension)) - .ok_or_else(|| Error::unsupported_format(extension.to_string_lossy().to_string()))?; - - Ok(Self { - content_source: ContentSource::new(), - archive_type, - source: ArchiveSource::Path(path.to_path_buf()), - }) - } - - /// Create a new archive file from ContentData - /// - /// This preserves the content source from the provided ContentData, - /// maintaining content lineage tracking. - /// - /// # Example - /// - /// ``` - /// use nvisy_rt_archive::{ArchiveFile, ArchiveType, ContentData}; - /// - /// let data = ContentData::from(vec![0x50, 0x4B, 0x03, 0x04]); // ZIP signature - /// let archive = ArchiveFile::from_content_data(ArchiveType::Zip, data); - /// ``` - pub fn from_content_data(archive_type: ArchiveType, content_data: ContentData) -> Self { - Self { - content_source: content_data.content_source, - archive_type, - source: ArchiveSource::ContentData(content_data), - } - } - - /// Create a new archive file from raw bytes with explicit archive type - /// - /// A new `ContentSource` is generated to track this archive. - /// - /// # Example - /// - /// ``` - /// use nvisy_rt_archive::{ArchiveFile, ArchiveType}; - /// - /// let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature - /// let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data); - /// ``` - pub fn from_bytes(archive_type: ArchiveType, data: impl Into) -> Self { - let content_data = ContentData::from(data.into()); - Self { - content_source: content_data.content_source, - archive_type, - source: ArchiveSource::ContentData(content_data), - } - } - - /// Create an archive with explicit type (useful for ambiguous extensions) - pub fn with_archive_type(mut self, archive_type: ArchiveType) -> Self { - self.archive_type = archive_type; - self - } - - /// Get the content source identifier for this archive - pub fn content_source(&self) -> ContentSource { - self.content_source - } - - /// Get the archive type - pub fn archive_type(&self) -> ArchiveType { - self.archive_type - } - - /// Check if the archive source exists (only meaningful for file-based sources) - pub async fn exists(&self) -> bool { - match &self.source { - ArchiveSource::Path(path) => fs::try_exists(path).await.unwrap_or(false), - ArchiveSource::ContentData(_) => true, - } - } - - /// Get the file path (if loaded from a file) - pub fn path(&self) -> Option<&Path> { - match &self.source { - ArchiveSource::Path(path) => Some(path), - ArchiveSource::ContentData(_) => None, - } - } - - /// Get the size of the archive data in bytes - pub async fn size(&self) -> Result { - match &self.source { - ArchiveSource::Path(path) => { - let metadata = fs::metadata(path).await?; - Ok(metadata.len()) - } - ArchiveSource::ContentData(data) => Ok(data.size() as u64), - } - } - - /// Get the SHA256 hash of the archive content - /// - /// For file-based archives, this reads the file first. - /// For memory-based archives, the hash is computed lazily. - pub async fn sha256(&self) -> Result { - match &self.source { - ArchiveSource::Path(path) => { - let data = fs::read(path).await?; - let content_data = ContentData::from(data); - Ok(content_data.sha256_hex()) - } - ArchiveSource::ContentData(data) => Ok(data.sha256_hex()), - } - } - - /// Extract the archive to a temporary directory - /// - /// This method extracts all contents of the archive to a temporary - /// directory and returns an `ArchiveHandler` for managing the - /// extracted contents. - /// - /// # Errors - /// - /// Returns an error if: - /// - The archive file cannot be read - /// - The archive format is not supported - /// - Extraction fails - /// - Temporary directory creation fails - /// - /// # Example - /// - /// ```no_run - /// use nvisy_rt_archive::ArchiveFile; - /// - /// # async fn example() -> nvisy_rt_archive::Result<()> { - /// let archive = ArchiveFile::from_path("archive.zip")?; - /// let handler = archive.unpack().await?; - /// - /// // Work with extracted files - /// for file_path in handler.file_paths() { - /// println!("Found file: {:?}", file_path); - /// } - /// # Ok(()) - /// # } - /// ``` - pub async fn unpack(self) -> Result { - // Create temporary directory - let temp_dir = TempDir::new().map_err(|e| { - Error::invalid_archive(format!("Failed to create temporary directory: {}", e)) - })?; - - // Get archive data as ContentData - let content_data = self.get_content_data().await?; - let cursor = Cursor::new(content_data.as_bytes().to_vec()); - - // Extract based on archive type - let files = self.extract_archive(cursor, temp_dir.path()).await?; - - Ok(ArchiveHandler::new( - self.content_source, - self.archive_type, - self.path().map(|p| p.to_path_buf()), - temp_dir, - files, - )) - } - - /// Get the archive data as ContentData - async fn get_content_data(&self) -> Result { - match &self.source { - ArchiveSource::Path(path) => { - let data = fs::read(path).await?; - Ok(ContentData::new(self.content_source, data.into())) - } - ArchiveSource::ContentData(data) => Ok(data.clone()), - } - } - - /// Extract archive contents to the specified directory - async fn extract_archive( - &self, - data: Cursor>, - target_dir: &Path, - ) -> Result> { - match self.archive_type { - #[cfg(feature = "zip")] - ArchiveType::Zip => self.extract_zip(data, target_dir).await, - #[cfg(not(feature = "zip"))] - ArchiveType::Zip => Err(Error::unsupported_format("ZIP support not enabled")), - - #[cfg(feature = "tar")] - ArchiveType::Tar => self.extract_tar(data, target_dir).await, - #[cfg(not(feature = "tar"))] - ArchiveType::Tar => Err(Error::unsupported_format("TAR support not enabled")), - - #[cfg(all(feature = "tar", feature = "gzip"))] - ArchiveType::TarGz => self.extract_tar_gz(data, target_dir).await, - #[cfg(not(all(feature = "tar", feature = "gzip")))] - ArchiveType::TarGz => Err(Error::unsupported_format("TAR.GZ support not enabled")), - - #[cfg(all(feature = "tar", feature = "bzip2"))] - ArchiveType::TarBz2 => self.extract_tar_bz2(data, target_dir).await, - #[cfg(not(all(feature = "tar", feature = "bzip2")))] - ArchiveType::TarBz2 => Err(Error::unsupported_format("TAR.BZ2 support not enabled")), - - #[cfg(all(feature = "tar", feature = "xz"))] - ArchiveType::TarXz => self.extract_tar_xz(data, target_dir).await, - #[cfg(not(all(feature = "tar", feature = "xz")))] - ArchiveType::TarXz => Err(Error::unsupported_format("TAR.XZ support not enabled")), - - #[cfg(feature = "gzip")] - ArchiveType::Gz => self.extract_gz(data, target_dir).await, - #[cfg(not(feature = "gzip"))] - ArchiveType::Gz => Err(Error::unsupported_format("GZIP support not enabled")), - - #[cfg(feature = "bzip2")] - ArchiveType::Bz2 => self.extract_bz2(data, target_dir).await, - #[cfg(not(feature = "bzip2"))] - ArchiveType::Bz2 => Err(Error::unsupported_format("BZIP2 support not enabled")), - - #[cfg(feature = "xz")] - ArchiveType::Xz => self.extract_xz(data, target_dir).await, - #[cfg(not(feature = "xz"))] - ArchiveType::Xz => Err(Error::unsupported_format("XZ support not enabled")), - - #[cfg(feature = "sevenz")] - ArchiveType::SevenZ => self.extract_7z(data, target_dir).await, - #[cfg(not(feature = "sevenz"))] - ArchiveType::SevenZ => Err(Error::unsupported_format("7z support not enabled")), - } - } - - /// Extract ZIP archive - #[cfg(feature = "zip")] - async fn extract_zip(&self, data: Cursor>, target_dir: &Path) -> Result> { - use tokio::io::AsyncWriteExt; - use zip::ZipArchive; - - // Use spawn_blocking for CPU-bound decompression - let target_dir = target_dir.to_path_buf(); - let (files, entries_data) = tokio::task::spawn_blocking(move || { - let mut archive = ZipArchive::new(data).map_zip_err()?; - let mut entries_data = Vec::new(); - - for i in 0..archive.len() { - let mut file = archive.by_index(i).map_zip_err()?; - let name = file.name().to_string(); - let is_dir = file.is_dir(); - - if !is_dir { - let mut content = Vec::new(); - std::io::Read::read_to_end(&mut file, &mut content)?; - entries_data.push((name, content)); - } else { - entries_data.push((name, Vec::new())); - } - } - - Ok::<_, Error>((Vec::new(), entries_data)) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - let mut files = files; - for (name, content) in entries_data { - let file_path = target_dir.join(&name); - - // Create parent directories if they don't exist - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent).await?; - } - - if name.ends_with('/') { - fs::create_dir_all(&file_path).await?; - } else { - let mut output_file = fs::File::create(&file_path).await?; - output_file.write_all(&content).await?; - files.push(file_path); - } - } - - Ok(files) - } - - /// Extract TAR archive - #[cfg(feature = "tar")] - async fn extract_tar(&self, data: Cursor>, target_dir: &Path) -> Result> { - use tar::Archive; - use tokio::io::AsyncWriteExt; - - let target_dir = target_dir.to_path_buf(); - - // Use spawn_blocking for CPU-bound decompression - let entries_data = tokio::task::spawn_blocking(move || { - let mut archive = Archive::new(data); - let mut entries_data = Vec::new(); - - for entry in archive.entries()? { - let mut entry = entry?; - let path = entry.path()?.to_path_buf(); - let is_dir = entry.header().entry_type().is_dir(); - - if !is_dir { - let mut content = Vec::new(); - std::io::Read::read_to_end(&mut entry, &mut content)?; - entries_data.push((path, content, false)); - } else { - entries_data.push((path, Vec::new(), true)); - } - } - - Ok::<_, Error>(entries_data) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - let mut files = Vec::new(); - for (path, content, is_dir) in entries_data { - let file_path = target_dir.join(&path); - - // Create parent directories if they don't exist - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent).await?; - } - - if is_dir { - fs::create_dir_all(&file_path).await?; - } else { - let mut output_file = fs::File::create(&file_path).await?; - output_file.write_all(&content).await?; - files.push(file_path); - } - } - - Ok(files) - } - - /// Extract GZIP-compressed TAR archive - #[cfg(all(feature = "tar", feature = "gzip"))] - async fn extract_tar_gz( - &self, - data: Cursor>, - target_dir: &Path, - ) -> Result> { - use flate2::read::GzDecoder; - - let decompressed = tokio::task::spawn_blocking(move || { - let decoder = GzDecoder::new(data); - let mut buf = Vec::new(); - std::io::Read::read_to_end(&mut { decoder }, &mut buf)?; - Ok::<_, Error>(buf) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - let cursor = Cursor::new(decompressed); - self.extract_tar(cursor, target_dir).await - } - - /// Extract BZIP2-compressed TAR archive - #[cfg(all(feature = "tar", feature = "bzip2"))] - async fn extract_tar_bz2( - &self, - data: Cursor>, - target_dir: &Path, - ) -> Result> { - use bzip2::read::BzDecoder; - - let decompressed = tokio::task::spawn_blocking(move || { - let decoder = BzDecoder::new(data); - let mut buf = Vec::new(); - std::io::Read::read_to_end(&mut { decoder }, &mut buf)?; - Ok::<_, Error>(buf) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - let cursor = Cursor::new(decompressed); - self.extract_tar(cursor, target_dir).await - } - - /// Extract XZ-compressed TAR archive - #[cfg(all(feature = "tar", feature = "xz"))] - async fn extract_tar_xz( - &self, - data: Cursor>, - target_dir: &Path, - ) -> Result> { - use xz2::read::XzDecoder; - - let decompressed = tokio::task::spawn_blocking(move || { - let mut decoder = XzDecoder::new(data); - let mut buf = Vec::new(); - std::io::Read::read_to_end(&mut decoder, &mut buf)?; - Ok::<_, Error>(buf) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - let cursor = Cursor::new(decompressed); - self.extract_tar(cursor, target_dir).await - } - - /// Extract single GZIP file - #[cfg(feature = "gzip")] - async fn extract_gz(&self, data: Cursor>, target_dir: &Path) -> Result> { - use flate2::read::GzDecoder; - use tokio::io::AsyncWriteExt; - - let path_clone = self.path().map(|p| p.to_path_buf()); - - let content = tokio::task::spawn_blocking(move || { - let mut decoder = GzDecoder::new(data); - let mut buf = Vec::new(); - std::io::Read::read_to_end(&mut decoder, &mut buf)?; - Ok::<_, Error>(buf) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - // For single files, we need to determine the output filename - let output_path = if let Some(path) = path_clone { - let stem = path - .file_stem() - .and_then(|s| s.to_str()) - .unwrap_or("extracted"); - target_dir.join(stem) - } else { - target_dir.join("extracted") - }; - - let mut output_file = fs::File::create(&output_path).await?; - output_file.write_all(&content).await?; - - Ok(vec![output_path]) - } - - /// Extract single BZIP2 file - #[cfg(feature = "bzip2")] - async fn extract_bz2(&self, data: Cursor>, target_dir: &Path) -> Result> { - use bzip2::read::BzDecoder; - use tokio::io::AsyncWriteExt; - - let path_clone = self.path().map(|p| p.to_path_buf()); - - let content = tokio::task::spawn_blocking(move || { - let mut decoder = BzDecoder::new(data); - let mut buf = Vec::new(); - std::io::Read::read_to_end(&mut decoder, &mut buf)?; - Ok::<_, Error>(buf) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - let output_path = if let Some(path) = path_clone { - let stem = path - .file_stem() - .and_then(|s| s.to_str()) - .unwrap_or("extracted"); - target_dir.join(stem) - } else { - target_dir.join("extracted") - }; - - let mut output_file = fs::File::create(&output_path).await?; - output_file.write_all(&content).await?; - - Ok(vec![output_path]) - } - - /// Extract single XZ file - #[cfg(feature = "xz")] - async fn extract_xz(&self, data: Cursor>, target_dir: &Path) -> Result> { - use tokio::io::AsyncWriteExt; - use xz2::read::XzDecoder; - - let path_clone = self.path().map(|p| p.to_path_buf()); - - let content = tokio::task::spawn_blocking(move || { - let mut decoder = XzDecoder::new(data); - let mut buf = Vec::new(); - std::io::Read::read_to_end(&mut decoder, &mut buf)?; - Ok::<_, Error>(buf) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - let output_path = if let Some(path) = path_clone { - let stem = path - .file_stem() - .and_then(|s| s.to_str()) - .unwrap_or("extracted"); - target_dir.join(stem) - } else { - target_dir.join("extracted") - }; - - let mut output_file = fs::File::create(&output_path).await?; - output_file.write_all(&content).await?; - - Ok(vec![output_path]) - } - - /// Extract 7z archive - #[cfg(feature = "sevenz")] - async fn extract_7z(&self, data: Cursor>, target_dir: &Path) -> Result> { - let target_dir = target_dir.to_path_buf(); - let data_vec = data.into_inner(); - - // Use spawn_blocking for CPU-bound decompression - let files = tokio::task::spawn_blocking(move || { - // Write data to a temp file since sevenz-rust works better with files - let temp_file = tempfile::NamedTempFile::new().map_err(|e| { - Error::invalid_archive(format!("Failed to create temp file: {}", e)) - })?; - std::fs::write(temp_file.path(), &data_vec)?; - - // Decompress to target directory - sevenz_rust::decompress_file(temp_file.path(), &target_dir).map_err(|e| { - Error::invalid_archive(format!("Failed to extract 7z archive: {}", e)) - })?; - - // Collect extracted files - fn collect_files(dir: &Path) -> std::io::Result> { - let mut files = Vec::new(); - if dir.is_dir() { - for entry in std::fs::read_dir(dir)? { - let entry = entry?; - let path = entry.path(); - if path.is_file() { - files.push(path); - } else if path.is_dir() { - files.extend(collect_files(&path)?); - } - } - } - Ok(files) - } - - let files = collect_files(&target_dir)?; - Ok::<_, Error>(files) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - Ok(files) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_archive_file_from_bytes() { - let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature - let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data); - assert_eq!(archive.archive_type(), ArchiveType::Zip); - assert!(archive.path().is_none()); - // Content source should be valid - assert!(!archive.content_source().as_uuid().is_nil()); - } - - #[test] - fn test_archive_file_from_content_data() { - let content_data = ContentData::from(vec![0x50, 0x4B, 0x03, 0x04]); - let original_source = content_data.content_source; - let archive = ArchiveFile::from_content_data(ArchiveType::Zip, content_data); - // Should preserve the original content source - assert_eq!(archive.content_source(), original_source); - } - - #[test] - fn test_archive_file_from_path() -> Result<()> { - let archive = ArchiveFile::from_path("test.zip")?; - assert_eq!(archive.archive_type(), ArchiveType::Zip); - assert!(archive.path().is_some()); - Ok(()) - } - - #[test] - fn test_compound_extension() -> Result<()> { - let archive = ArchiveFile::from_path("test.tar.gz")?; - assert_eq!(archive.archive_type(), ArchiveType::TarGz); - Ok(()) - } - - #[test] - fn test_unsupported_extension() { - let result = ArchiveFile::from_path("test.unknown"); - assert!(result.is_err()); - } - - #[tokio::test] - async fn test_memory_size() { - let data = vec![1, 2, 3, 4, 5]; - let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data); - assert_eq!(archive.size().await.unwrap(), 5); - } -} diff --git a/crates/nvisy-archive/src/handler/mod.rs b/crates/nvisy-archive/src/handler/mod.rs index a3b6c30..ffcdb70 100644 --- a/crates/nvisy-archive/src/handler/mod.rs +++ b/crates/nvisy-archive/src/handler/mod.rs @@ -1,572 +1,10 @@ -//! Archive file handler for managing extracted archive contents +//! Archive format handlers //! -//! This module provides the [`ArchiveHandler`] struct for managing -//! temporary directories containing extracted archive contents and -//! repacking them back into archives. - -pub mod tar_handler; -pub mod zip_handler; - -use std::fs; -use std::path::{Path, PathBuf}; - -// Re-exports for convenience -pub use tar_handler::{TarArchiveBuilder, TarArchiveHandler, TarDirectoryBuilder, TarEntryInfo}; -use tempfile::TempDir; -pub use zip_handler::{ZipArchiveBuilder, ZipArchiveHandler, ZipDirectoryBuilder, ZipEntryInfo}; - -use crate::{ - ArchiveErrorExt, ArchiveType, ContentKind, ContentMetadata, ContentSource, Error, Result, -}; - -/// Detect content kind from file extension -/// -/// This function maps common file extensions to their content kind categories. -fn content_kind_from_extension(extension: &str) -> ContentKind { - let ext = extension.to_lowercase(); - match ext.as_str() { - // Text formats - "txt" | "text" | "md" | "markdown" | "rst" | "xml" | "json" | "yaml" | "yml" | "toml" - | "ini" | "cfg" | "conf" | "log" => ContentKind::Text, - - // Document formats - "pdf" | "doc" | "docx" | "rtf" | "odt" | "pages" => ContentKind::Document, - - // Spreadsheet formats - "csv" | "tsv" | "xls" | "xlsx" | "ods" | "numbers" => ContentKind::Spreadsheet, - - // Image formats - "jpg" | "jpeg" | "png" | "gif" | "bmp" | "svg" | "webp" | "ico" | "tiff" | "tif" => { - ContentKind::Image - } - - // Archive formats - "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "tgz" | "tbz2" | "txz" => { - ContentKind::Archive - } - - _ => ContentKind::Unknown, - } -} - -/// Handler for unpacked archive contents -/// -/// This struct manages the temporary directory containing extracted -/// archive contents and provides methods for iterating over files -/// and repacking the archive. -#[derive(Debug)] -pub struct ArchiveHandler { - /// Content source identifier for the original archive - pub content_source: ContentSource, - /// Type of the original archive - pub archive_type: ArchiveType, - /// Original archive file path (if loaded from file) - pub original_path: Option, - /// Temporary directory containing extracted files - temp_dir: TempDir, - /// Files found in the archive - files: Vec, -} - -impl ArchiveHandler { - /// Create a new archive file handler - /// - /// This is typically called internally by `ArchiveFile::unpack()`. - pub fn new( - content_source: ContentSource, - archive_type: ArchiveType, - original_path: Option, - temp_dir: TempDir, - files: Vec, - ) -> Self { - Self { - content_source, - archive_type, - original_path, - temp_dir, - files, - } - } - - /// Get the path to the temporary directory containing extracted files - pub fn temp_path(&self) -> &Path { - self.temp_dir.path() - } - - /// Get the number of files in the archive - pub fn file_count(&self) -> usize { - self.files.len() - } - - /// Check if the archive is empty - pub fn is_empty(&self) -> bool { - self.files.is_empty() - } - - /// Get a list of all file paths in the archive - pub fn file_paths(&self) -> &[PathBuf] { - &self.files - } - - /// Find files matching a specific predicate - pub fn find_files(&self, predicate: impl Fn(&PathBuf) -> bool) -> Vec<&PathBuf> { - self.files.iter().filter(|path| predicate(path)).collect() - } - - /// Find files with specific extension - pub fn find_files_by_extension(&self, extension: &str) -> Vec<&PathBuf> { - self.find_files(|path| { - path.extension() - .and_then(|ext| ext.to_str()) - .map(|ext| ext.eq_ignore_ascii_case(extension)) - .unwrap_or(false) - }) - } - - /// Find files matching a specific content kind - pub fn find_files_by_kind(&self, kind: ContentKind) -> Vec<&PathBuf> { - self.find_files(|path| self.content_kind_for_path(path) == kind) - } - - /// Get the content kind for a file path based on its extension - pub fn content_kind_for_path(&self, path: &Path) -> ContentKind { - path.extension() - .and_then(|ext| ext.to_str()) - .map(content_kind_from_extension) - .unwrap_or_default() - } - - /// Create content metadata for a file using its relative path within the archive - /// - /// The returned metadata has a new ContentSource (derived from the archive's source) - /// and includes the relative path within the archive. - pub fn content_metadata_for_file(&self, relative_path: impl AsRef) -> ContentMetadata { - ContentMetadata::with_path(ContentSource::new(), relative_path.as_ref()) - } - - /// Get content metadata for all files in the archive - /// - /// Returns a list of ContentMetadata entries for each extracted file, - /// using relative paths within the archive. - pub fn all_content_metadata(&self) -> Result> { - let temp_path = self.temp_path(); - self.files - .iter() - .map(|path| { - let relative = path - .strip_prefix(temp_path) - .map_err(|e| Error::invalid_archive(format!("Invalid file path: {}", e)))?; - Ok(ContentMetadata::with_path(ContentSource::new(), relative)) - }) - .collect() - } - - /// Get all files recursively in the temporary directory - pub fn refresh_file_list(&mut self) -> Result<()> { - self.files = Self::scan_files(self.temp_path())?; - Ok(()) - } - - /// Create a new archive from the current temporary directory contents - /// - /// This method packages all files in the temporary directory back into - /// an archive file at the specified location. - /// - /// # Errors - /// - /// Returns an error if: - /// - The target directory cannot be created - /// - Archive creation fails - /// - File I/O operations fail - /// - /// # Example - /// - /// ```no_run - /// use nvisy_rt_archive::{ArchiveFile, ArchiveType}; - /// - /// # async fn example() -> nvisy_rt_archive::Result<()> { - /// let archive = ArchiveFile::from_path("original.zip")?; - /// let handler = archive.unpack().await?; - /// - /// // Modify files in handler.temp_path()... - /// - /// let new_archive = handler.pack("modified.zip").await?; - /// # Ok(()) - /// # } - /// ``` - pub async fn pack(self, target_path: impl AsRef) -> Result { - let target_path = target_path.as_ref(); - - // Ensure parent directory exists - if let Some(parent) = target_path.parent() { - tokio::fs::create_dir_all(parent).await.map_err(|e| { - Error::invalid_archive(format!("Failed to create parent directory: {}", e)) - })?; - } - - // Determine archive type from target path extension or use original type - let archive_type = target_path - .extension() - .and_then(ArchiveType::from_file_extension) - .unwrap_or(self.archive_type); - - match archive_type { - ArchiveType::Zip => { - #[cfg(feature = "zip")] - { - zip_handler::ZipDirectoryBuilder::create(self.temp_path(), target_path).await?; - } - #[cfg(not(feature = "zip"))] - { - return Err(Error::unsupported_format("ZIP support not enabled")); - } - } - ArchiveType::Tar | ArchiveType::TarGz | ArchiveType::TarBz2 | ArchiveType::TarXz => { - #[cfg(feature = "tar")] - { - tar_handler::TarDirectoryBuilder::create( - self.temp_path(), - target_path, - archive_type, - ) - .await?; - } - #[cfg(not(feature = "tar"))] - { - return Err(Error::unsupported_format("TAR support not enabled")); - } - } - _ => { - return Err(Error::unsupported_format(format!( - "Packing format not supported: {}", - archive_type - ))); - } - } - - crate::ArchiveFile::from_path(target_path) - } - - /// Scan the directory for files recursively - pub fn scan_files(dir: &Path) -> Result> { - let mut files = Vec::new(); - let entries = fs::read_dir(dir)?; - - for entry in entries { - let entry = entry?; - let path = entry.path(); - - if path.is_file() { - files.push(path); - } else if path.is_dir() { - // Recursively scan subdirectories - let mut sub_files = Self::scan_files(&path)?; - files.append(&mut sub_files); - } - } - - files.sort(); - Ok(files) - } - - /// Get relative paths of all files (relative to temp directory) - pub fn relative_file_paths(&self) -> Result> { - let temp_path = self.temp_path(); - self.files - .iter() - .map(|path| { - path.strip_prefix(temp_path) - .map(|p| p.to_path_buf()) - .map_err(|e| Error::invalid_archive(format!("Invalid file path: {}", e))) - }) - .collect() - } - - /// Check if a specific file exists in the archive - pub fn contains_file(&self, relative_path: impl AsRef) -> bool { - let target_path = self.temp_path().join(relative_path); - self.files.contains(&target_path) - } - - /// Get the content of a specific file as bytes - pub async fn read_file(&self, relative_path: impl AsRef) -> Result> { - let target_path = self.temp_path().join(relative_path); - if !self.files.contains(&target_path) { - return Err(Error::entry_not_found( - target_path.to_string_lossy().to_string(), - )); - } - tokio::fs::read(&target_path).await.map_err(Into::into) - } - - /// Write content to a file in the archive - pub async fn write_file( - &mut self, - relative_path: impl AsRef, - content: &[u8], - ) -> Result<()> { - let target_path = self.temp_path().join(relative_path.as_ref()); - - // Create parent directories if they don't exist - if let Some(parent) = target_path.parent() { - tokio::fs::create_dir_all(parent).await?; - } - - tokio::fs::write(&target_path, content).await?; - - // Add to files list if not already present - if !self.files.contains(&target_path) { - self.files.push(target_path); - self.files.sort(); - } - - Ok(()) - } -} - -/// Iterator implementation for ArchiveHandler -/// -/// Iterates over all file paths in the extracted archive. -impl<'a> IntoIterator for &'a ArchiveHandler { - type IntoIter = std::slice::Iter<'a, PathBuf>; - type Item = &'a PathBuf; - - fn into_iter(self) -> Self::IntoIter { - self.files.iter() - } -} - -impl IntoIterator for ArchiveHandler { - type IntoIter = std::vec::IntoIter; - type Item = PathBuf; - - fn into_iter(self) -> Self::IntoIter { - self.files.into_iter() - } -} - -#[cfg(test)] -mod tests { - use tempfile::TempDir; - - use super::*; - - #[test] - fn test_archive_handler_creation() { - let temp_dir = TempDir::new().unwrap(); - let files = vec![PathBuf::from("test.txt")]; - let content_source = ContentSource::new(); - - let handler = ArchiveHandler::new( - content_source, - ArchiveType::Zip, - Some(PathBuf::from("test.zip")), - temp_dir, - files.clone(), - ); - - assert_eq!(handler.archive_type, ArchiveType::Zip); - assert_eq!(handler.file_count(), 1); - assert!(!handler.is_empty()); - } - - #[test] - fn test_empty_archive_handler() { - let temp_dir = TempDir::new().unwrap(); - let files = vec![]; - - let handler = ArchiveHandler::new( - ContentSource::new(), - ArchiveType::Zip, - None, - temp_dir, - files, - ); - - assert_eq!(handler.file_count(), 0); - assert!(handler.is_empty()); - } - - #[test] - fn test_find_files_by_extension() { - let temp_dir = TempDir::new().unwrap(); - let files = vec![ - PathBuf::from("test.txt"), - PathBuf::from("data.json"), - PathBuf::from("image.png"), - ]; - - let handler = ArchiveHandler::new( - ContentSource::new(), - ArchiveType::Zip, - None, - temp_dir, - files, - ); - - let txt_files = handler.find_files_by_extension("txt"); - assert_eq!(txt_files.len(), 1); - - let json_files = handler.find_files_by_extension("json"); - assert_eq!(json_files.len(), 1); - } - - #[test] - fn test_iterator() { - let temp_dir = TempDir::new().unwrap(); - let files = vec![PathBuf::from("file1.txt"), PathBuf::from("file2.txt")]; - - let handler = ArchiveHandler::new( - ContentSource::new(), - ArchiveType::Zip, - None, - temp_dir, - files, - ); - - let collected: Vec<&PathBuf> = (&handler).into_iter().collect(); - assert_eq!(collected.len(), 2); - } - - #[tokio::test] - async fn test_write_and_read_file() { - let temp_dir = TempDir::new().unwrap(); - let mut handler = ArchiveHandler::new( - ContentSource::new(), - ArchiveType::Zip, - None, - temp_dir, - vec![], - ); - - let content = b"Hello, World!"; - handler.write_file("test.txt", content).await.unwrap(); - - assert!(handler.contains_file("test.txt")); - let read_content = handler.read_file("test.txt").await.unwrap(); - assert_eq!(read_content, content); - } - - #[test] - fn test_find_files_by_kind() { - let temp_dir = TempDir::new().unwrap(); - let files = vec![ - PathBuf::from("document.pdf"), - PathBuf::from("data.csv"), - PathBuf::from("image.png"), - PathBuf::from("archive.zip"), - PathBuf::from("notes.txt"), - ]; - - let handler = ArchiveHandler::new( - ContentSource::new(), - ArchiveType::Zip, - None, - temp_dir, - files, - ); - - let docs = handler.find_files_by_kind(ContentKind::Document); - assert_eq!(docs.len(), 1); - assert!(docs[0].to_string_lossy().contains("document.pdf")); - - let spreadsheets = handler.find_files_by_kind(ContentKind::Spreadsheet); - assert_eq!(spreadsheets.len(), 1); - assert!(spreadsheets[0].to_string_lossy().contains("data.csv")); - - let images = handler.find_files_by_kind(ContentKind::Image); - assert_eq!(images.len(), 1); - - let text = handler.find_files_by_kind(ContentKind::Text); - assert_eq!(text.len(), 1); - } - - #[test] - fn test_content_kind_for_path() { - let temp_dir = TempDir::new().unwrap(); - let handler = ArchiveHandler::new( - ContentSource::new(), - ArchiveType::Zip, - None, - temp_dir, - vec![], - ); - - assert_eq!( - handler.content_kind_for_path(Path::new("test.pdf")), - ContentKind::Document - ); - assert_eq!( - handler.content_kind_for_path(Path::new("data.csv")), - ContentKind::Spreadsheet - ); - assert_eq!( - handler.content_kind_for_path(Path::new("image.png")), - ContentKind::Image - ); - assert_eq!( - handler.content_kind_for_path(Path::new("notes.txt")), - ContentKind::Text - ); - assert_eq!( - handler.content_kind_for_path(Path::new("archive.zip")), - ContentKind::Archive - ); - assert_eq!( - handler.content_kind_for_path(Path::new("no_extension")), - ContentKind::Unknown - ); - } - - #[test] - fn test_content_metadata_for_file() { - let temp_dir = TempDir::new().unwrap(); - let handler = ArchiveHandler::new( - ContentSource::new(), - ArchiveType::Zip, - None, - temp_dir, - vec![], - ); - - let metadata = handler.content_metadata_for_file("docs/report.pdf"); - assert_eq!(metadata.filename(), Some("report.pdf")); - assert_eq!(metadata.file_extension(), Some("pdf")); - assert!(!metadata.content_source.as_uuid().is_nil()); - } - - #[tokio::test] - async fn test_all_content_metadata() { - let temp_dir = TempDir::new().unwrap(); - let temp_path = temp_dir.path().to_path_buf(); - - // Create actual files in temp dir - let file1 = temp_path.join("doc.pdf"); - let file2 = temp_path.join("data.csv"); - tokio::fs::write(&file1, b"pdf content").await.unwrap(); - tokio::fs::write(&file2, b"csv content").await.unwrap(); - - let files = vec![file1, file2]; - let handler = ArchiveHandler::new( - ContentSource::new(), - ArchiveType::Zip, - None, - temp_dir, - files, - ); - - let metadata_list = handler.all_content_metadata().unwrap(); - assert_eq!(metadata_list.len(), 2); - - // Check that each metadata has the correct relative path - let filenames: Vec<_> = metadata_list.iter().filter_map(|m| m.filename()).collect(); - assert!(filenames.contains(&"doc.pdf")); - assert!(filenames.contains(&"data.csv")); - - // Each should have a unique content source - assert_ne!( - metadata_list[0].content_source, - metadata_list[1].content_source - ); - } -} +//! This module provides low-level handlers for packing and unpacking archives. + +#[cfg(feature = "sevenz")] +pub(crate) mod sevenz; +#[cfg(feature = "tar")] +pub(crate) mod tar; +#[cfg(feature = "zip")] +pub(crate) mod zip; diff --git a/crates/nvisy-archive/src/handler/sevenz.rs b/crates/nvisy-archive/src/handler/sevenz.rs new file mode 100644 index 0000000..dc88195 --- /dev/null +++ b/crates/nvisy-archive/src/handler/sevenz.rs @@ -0,0 +1,46 @@ +//! 7z archive handler implementation + +use std::io::Cursor; +use std::path::{Path, PathBuf}; + +use crate::{ArchiveErrorExt, Result}; + +/// Extracts a 7z archive to the target directory. +/// +/// Returns a list of extracted file paths. +pub fn unpack(data: Cursor>, target_dir: &Path) -> Result> { + // sevenz-rust works better with files, so write to temp file first + let temp_file = tempfile::NamedTempFile::new() + .map_err(|e| crate::Error::invalid_archive(format!("Failed to create temp file: {}", e)))?; + + std::fs::write(temp_file.path(), data.into_inner()) + .map_err(|e| crate::Error::invalid_archive(format!("Failed to write temp file: {}", e)))?; + + sevenz_rust::decompress_file(temp_file.path(), target_dir).map_err(|e| { + crate::Error::invalid_archive(format!("Failed to extract 7z archive: {}", e)) + })?; + + collect_files(target_dir) +} + +fn collect_files(dir: &Path) -> Result> { + let mut files = Vec::new(); + + for entry in std::fs::read_dir(dir) + .map_err(|e| crate::Error::invalid_archive(format!("Failed to read directory: {}", e)))? + { + let path = entry + .map_err(|e| crate::Error::invalid_archive(format!("Failed to read entry: {}", e)))? + .path(); + + if path.is_file() { + files.push(path); + } else if path.is_dir() { + files.extend(collect_files(&path)?); + } + } + + Ok(files) +} + +// Note: sevenz-rust does not support compression/packing diff --git a/crates/nvisy-archive/src/handler/tar.rs b/crates/nvisy-archive/src/handler/tar.rs new file mode 100644 index 0000000..4efc15d --- /dev/null +++ b/crates/nvisy-archive/src/handler/tar.rs @@ -0,0 +1,224 @@ +//! TAR archive handler implementation + +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; + +use tar::{Archive, Builder, EntryType}; + +use crate::file::ArchiveType; +use crate::{ArchiveErrorExt, Result, TarResultExt}; + +/// Extracts a TAR archive to the target directory. +/// +/// Returns a list of extracted file paths. +pub fn unpack(reader: R, target_dir: &Path) -> Result> { + let mut archive = Archive::new(reader); + let mut files = Vec::new(); + + for entry in archive.entries().map_tar_err()? { + let mut entry = entry.map_tar_err()?; + let path = entry.path().map_tar_err()?.to_path_buf(); + let file_path = target_dir.join(&path); + + if let Some(parent) = file_path.parent() { + std::fs::create_dir_all(parent).map_tar_err()?; + } + + match entry.header().entry_type() { + EntryType::Regular => { + let mut content = Vec::new(); + entry.read_to_end(&mut content).map_tar_err()?; + + let mut output = std::fs::File::create(&file_path).map_tar_err()?; + output.write_all(&content).map_tar_err()?; + + files.push(file_path); + } + EntryType::Directory => { + std::fs::create_dir_all(&file_path).map_tar_err()?; + } + EntryType::Symlink => + { + #[cfg(unix)] + if let Ok(Some(link_target)) = entry.link_name() { + std::os::unix::fs::symlink(&link_target, &file_path).map_tar_err()?; + } + } + EntryType::Link => { + if let Ok(Some(link_target)) = entry.link_name() { + let source_path = target_dir.join(link_target); + if source_path.exists() { + std::fs::copy(&source_path, &file_path).map_tar_err()?; + files.push(file_path); + } + } + } + _ => {} + } + } + + Ok(files) +} + +/// Extracts a gzip-compressed TAR archive. +#[cfg(feature = "gzip")] +pub fn unpack_gz(reader: R, target_dir: &Path) -> Result> { + let decoder = flate2::read::GzDecoder::new(reader); + unpack(decoder, target_dir) +} + +/// Extracts a bzip2-compressed TAR archive. +#[cfg(feature = "bzip2")] +pub fn unpack_bz2(reader: R, target_dir: &Path) -> Result> { + let decoder = bzip2::read::BzDecoder::new(reader); + unpack(decoder, target_dir) +} + +/// Extracts an xz-compressed TAR archive. +#[cfg(feature = "xz")] +pub fn unpack_xz(reader: R, target_dir: &Path) -> Result> { + let decoder = xz2::read::XzDecoder::new(reader); + unpack(decoder, target_dir) +} + +/// Packs a directory into a TAR archive with the specified compression. +pub fn pack(source_dir: &Path, target_path: &Path, archive_type: ArchiveType) -> Result<()> { + let files = collect_files(source_dir)?; + + match archive_type { + ArchiveType::Tar => pack_plain(source_dir, target_path, &files), + #[cfg(feature = "gzip")] + ArchiveType::TarGz => pack_gz(source_dir, target_path, &files), + #[cfg(feature = "bzip2")] + ArchiveType::TarBz2 => pack_bz2(source_dir, target_path, &files), + #[cfg(feature = "xz")] + ArchiveType::TarXz => pack_xz(source_dir, target_path, &files), + _ => Err(crate::Error::unsupported_format(format!( + "Unsupported TAR variant: {:?}", + archive_type + ))), + } +} + +fn pack_plain(source_dir: &Path, target_path: &Path, files: &[PathBuf]) -> Result<()> { + let file = std::fs::File::create(target_path).map_tar_err()?; + let mut builder = Builder::new(file); + append_files(&mut builder, source_dir, files)?; + builder.finish().map_tar_err()?; + Ok(()) +} + +#[cfg(feature = "gzip")] +fn pack_gz(source_dir: &Path, target_path: &Path, files: &[PathBuf]) -> Result<()> { + let file = std::fs::File::create(target_path).map_tar_err()?; + let encoder = flate2::write::GzEncoder::new(file, flate2::Compression::default()); + let mut builder = Builder::new(encoder); + append_files(&mut builder, source_dir, files)?; + builder.into_inner().map_tar_err()?.finish().map_tar_err()?; + Ok(()) +} + +#[cfg(feature = "bzip2")] +fn pack_bz2(source_dir: &Path, target_path: &Path, files: &[PathBuf]) -> Result<()> { + let file = std::fs::File::create(target_path).map_tar_err()?; + let encoder = bzip2::write::BzEncoder::new(file, bzip2::Compression::default()); + let mut builder = Builder::new(encoder); + append_files(&mut builder, source_dir, files)?; + builder.into_inner().map_tar_err()?.finish().map_tar_err()?; + Ok(()) +} + +#[cfg(feature = "xz")] +fn pack_xz(source_dir: &Path, target_path: &Path, files: &[PathBuf]) -> Result<()> { + let file = std::fs::File::create(target_path).map_tar_err()?; + let encoder = xz2::write::XzEncoder::new(file, 6); + let mut builder = Builder::new(encoder); + append_files(&mut builder, source_dir, files)?; + builder.into_inner().map_tar_err()?.finish().map_tar_err()?; + Ok(()) +} + +fn append_files( + builder: &mut Builder, + source_dir: &Path, + files: &[PathBuf], +) -> Result<()> { + for file_path in files { + let relative_path = file_path + .strip_prefix(source_dir) + .map_err(|e| crate::Error::invalid_archive(format!("Invalid file path: {}", e)))?; + builder + .append_path_with_name(file_path, relative_path) + .map_tar_err()?; + } + Ok(()) +} + +fn collect_files(dir: &Path) -> Result> { + let mut files = Vec::new(); + + for entry in std::fs::read_dir(dir).map_tar_err()? { + let path = entry.map_tar_err()?.path(); + if path.is_file() { + files.push(path); + } else if path.is_dir() { + files.extend(collect_files(&path)?); + } + } + + files.sort(); + Ok(files) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_pack_and_unpack_tar() { + let source_dir = TempDir::new().unwrap(); + let target_dir = TempDir::new().unwrap(); + + std::fs::write(source_dir.path().join("test.txt"), b"Hello").unwrap(); + std::fs::create_dir(source_dir.path().join("subdir")).unwrap(); + std::fs::write(source_dir.path().join("subdir/nested.txt"), b"World").unwrap(); + + let archive_path = target_dir.path().join("test.tar"); + pack(source_dir.path(), &archive_path, ArchiveType::Tar).unwrap(); + + let extract_dir = TempDir::new().unwrap(); + let files = unpack( + std::fs::File::open(&archive_path).unwrap(), + extract_dir.path(), + ) + .unwrap(); + + assert_eq!(files.len(), 2); + assert_eq!( + std::fs::read_to_string(extract_dir.path().join("test.txt")).unwrap(), + "Hello" + ); + } + + #[cfg(feature = "gzip")] + #[test] + fn test_pack_and_unpack_tar_gz() { + let source_dir = TempDir::new().unwrap(); + let target_dir = TempDir::new().unwrap(); + + std::fs::write(source_dir.path().join("test.txt"), b"Hello").unwrap(); + + let archive_path = target_dir.path().join("test.tar.gz"); + pack(source_dir.path(), &archive_path, ArchiveType::TarGz).unwrap(); + + let extract_dir = TempDir::new().unwrap(); + let files = unpack_gz( + std::fs::File::open(&archive_path).unwrap(), + extract_dir.path(), + ) + .unwrap(); + + assert_eq!(files.len(), 1); + } +} diff --git a/crates/nvisy-archive/src/handler/tar_handler.rs b/crates/nvisy-archive/src/handler/tar_handler.rs deleted file mode 100644 index efa030b..0000000 --- a/crates/nvisy-archive/src/handler/tar_handler.rs +++ /dev/null @@ -1,593 +0,0 @@ -//! TAR archive handler implementation -//! -//! This module provides specialized handling for TAR archives using the tar crate, -//! including support for compressed TAR formats (tar.gz, tar.bz2, tar.xz). - -use std::io::{Cursor, Read, Write}; -use std::path::{Path, PathBuf}; - -use tar::{Archive, Builder, EntryType}; -use tokio::fs; -use tokio::io::AsyncWriteExt; - -use crate::{ArchiveErrorExt, ArchiveType, Error, Result}; - -/// Buffered writer for XZ compression using liblzma-rs -/// -/// This writer buffers all data and compresses it when dropped or explicitly finished. -struct XzBufferedWriter { - writer: Option, - buffer: Vec, -} - -impl XzBufferedWriter { - fn new(writer: W, _buffer: Vec) -> Self { - Self { - writer: Some(writer), - buffer: Vec::new(), - } - } - - fn finish(&mut self) -> std::io::Result<()> { - if let Some(writer) = self.writer.take() { - use xz2::write::XzEncoder; - let mut encoder = XzEncoder::new(writer, 6); - encoder.write_all(&self.buffer)?; - encoder.finish()?; - } - Ok(()) - } -} - -impl Write for XzBufferedWriter { - fn write(&mut self, buf: &[u8]) -> std::io::Result { - self.buffer.extend_from_slice(buf); - Ok(buf.len()) - } - - fn flush(&mut self) -> std::io::Result<()> { - // For buffered XZ compression, we don't flush until finish() - Ok(()) - } -} - -impl Drop for XzBufferedWriter { - fn drop(&mut self) { - let _ = self.finish(); - } -} - -/// Specialized handler for TAR archive operations -/// -/// This handler provides efficient TAR-specific operations using the tar crate, -/// with support for various compression formats. -pub struct TarArchiveHandler { - /// The underlying TAR archive - archive: Archive, - /// Archive type (for compression handling) - archive_type: ArchiveType, -} - -impl TarArchiveHandler { - /// Create a new TAR handler from a reader - pub fn new(reader: R, archive_type: ArchiveType) -> Result { - if !archive_type.is_tar_variant() { - return Err(Error::unsupported_format(format!( - "Expected TAR variant, got: {}", - archive_type - ))); - } - - Ok(Self { - archive: Archive::new(reader), - archive_type, - }) - } - - /// Get the archive type - pub fn archive_type(&self) -> ArchiveType { - self.archive_type - } - - /// Set whether to preserve permissions when extracting - pub fn set_preserve_permissions(&mut self, preserve: bool) { - self.archive.set_preserve_permissions(preserve); - } - - /// Set whether to preserve modification times when extracting - pub fn set_preserve_mtime(&mut self, preserve: bool) { - self.archive.set_preserve_mtime(preserve); - } - - /// Set whether to unpack extended attributes - pub fn set_unpack_xattrs(&mut self, unpack: bool) { - self.archive.set_unpack_xattrs(unpack); - } - - /// Extract all entries to the specified directory - pub async fn extract_to(&mut self, target_dir: impl AsRef) -> Result> { - let target_dir = target_dir.as_ref(); - fs::create_dir_all(target_dir).await?; - - let mut extracted_files = Vec::new(); - - for entry in self.archive.entries()? { - let mut entry = entry?; - let path = entry.path()?.to_path_buf(); - let target_path = target_dir.join(&path); - - // Create parent directories - if let Some(parent) = target_path.parent() { - fs::create_dir_all(parent).await?; - } - - match entry.header().entry_type() { - EntryType::Regular => { - let mut content = Vec::new(); - entry.read_to_end(&mut content)?; - - let mut file = fs::File::create(&target_path).await?; - file.write_all(&content).await?; - - extracted_files.push(target_path); - } - EntryType::Directory => { - fs::create_dir_all(&target_path).await?; - } - EntryType::Symlink => { - if let Ok(Some(link_target)) = entry.link_name() { - #[cfg(unix)] - { - tokio::fs::symlink(&link_target, &target_path).await?; - } - #[cfg(windows)] - { - // Windows requires different handling for symlinks - if target_path.is_dir() { - tokio::fs::symlink_dir(&link_target, &target_path).await?; - } else { - tokio::fs::symlink_file(&link_target, &target_path).await?; - } - } - } - } - EntryType::Link => { - // Hard links - create a copy for simplicity - if let Ok(Some(link_target)) = entry.link_name() { - let source_path = target_dir.join(link_target); - if source_path.exists() { - fs::copy(&source_path, &target_path).await?; - extracted_files.push(target_path); - } - } - } - _ => { - // Handle other entry types as needed - // For now, we skip unsupported types - } - } - } - - Ok(extracted_files) - } - - /// Get entries as an iterator - pub fn entries(&mut self) -> Result> { - Ok(self.archive.entries()?) - } - - /// List all entries without extracting - pub fn list_entries(&mut self) -> Result> { - let mut entries = Vec::new(); - - for entry in self.archive.entries()? { - let entry = entry?; - let header = entry.header(); - - let info = TarEntryInfo { - path: entry.path()?.to_path_buf(), - size: header.size()?, - entry_type: header.entry_type(), - mode: header.mode()?, - uid: header.uid()?, - gid: header.gid()?, - mtime: header.mtime()?, - }; - - entries.push(info); - } - - Ok(entries) - } -} - -/// Information about a TAR entry -#[derive(Debug, Clone)] -pub struct TarEntryInfo { - /// Path of the entry within the archive - pub path: PathBuf, - /// Size of the entry in bytes - pub size: u64, - /// Type of entry (file, directory, symlink, etc.) - pub entry_type: EntryType, - /// File mode/permissions - pub mode: u32, - /// User ID - pub uid: u64, - /// Group ID - pub gid: u64, - /// Modification time (Unix timestamp) - pub mtime: u64, -} - -/// Builder for creating TAR archives -pub struct TarArchiveBuilder { - builder: Builder, - archive_type: ArchiveType, -} - -impl TarArchiveBuilder { - /// Create a new TAR archive builder - pub fn new(writer: W, archive_type: ArchiveType) -> Result { - if !archive_type.is_tar_variant() { - return Err(Error::unsupported_format(format!( - "Expected TAR variant, got: {}", - archive_type - ))); - } - - Ok(Self { - builder: Builder::new(writer), - archive_type, - }) - } - - /// Get the archive type - pub fn archive_type(&self) -> ArchiveType { - self.archive_type - } - - /// Add a file to the archive from a path - pub fn append_path_with_name, N: AsRef>( - &mut self, - path: P, - name: N, - ) -> Result<()> { - self.builder.append_path_with_name(path, name)?; - Ok(()) - } - - /// Add a file to the archive with the same name as the path - pub fn append_path>(&mut self, path: P) -> Result<()> { - self.builder.append_path(path)?; - Ok(()) - } - - /// Add a directory to the archive - pub fn append_dir, Q: AsRef>( - &mut self, - path: P, - src_path: Q, - ) -> Result<()> { - self.builder.append_dir(path, src_path)?; - Ok(()) - } - - /// Add a directory recursively to the archive - pub fn append_dir_all, Q: AsRef>( - &mut self, - path: P, - src_path: Q, - ) -> Result<()> { - self.builder.append_dir_all(path, src_path)?; - Ok(()) - } - - /// Add data from a reader to the archive - pub fn append_data, R: Read>( - &mut self, - path: P, - size: u64, - data: R, - ) -> Result<()> { - let mut header = tar::Header::new_gnu(); - header.set_size(size); - header.set_mode(0o644); - header.set_cksum(); - - self.builder.append_data(&mut header, path, data)?; - Ok(()) - } - - /// Finish writing the archive - pub fn finish(self) -> Result { - Ok(self.builder.into_inner()?) - } -} - -/// Builder for creating TAR archives from directories -pub struct TarDirectoryBuilder; - -impl TarDirectoryBuilder { - /// Create a TAR archive from a directory - /// - /// This method collects all files in the source directory and creates - /// a TAR archive at the target path with the specified compression. - pub async fn create( - source_dir: &Path, - target_path: &Path, - archive_type: ArchiveType, - ) -> Result<()> { - use std::fs; - - // Collect all files in the directory - fn collect_files(dir: &Path) -> Result> { - let mut files = Vec::new(); - let entries = fs::read_dir(dir)?; - - for entry in entries { - let entry = entry?; - let path = entry.path(); - - if path.is_file() { - files.push(path); - } else if path.is_dir() { - let mut sub_files = collect_files(&path)?; - files.append(&mut sub_files); - } - } - - files.sort(); - Ok(files) - } - - let files = collect_files(source_dir)?; - let source_dir = source_dir.to_path_buf(); - let target_path = target_path.to_path_buf(); - - // Use spawn_blocking for CPU-bound compression - tokio::task::spawn_blocking(move || { - match archive_type { - ArchiveType::Tar => { - let file = std::fs::File::create(&target_path)?; - let mut builder = Builder::new(file); - - for file_path in files { - let relative_path = file_path.strip_prefix(&source_dir).map_err(|e| { - Error::invalid_archive(format!("Invalid file path: {}", e)) - })?; - builder.append_path_with_name(&file_path, relative_path)?; - } - - builder.finish()?; - } - #[cfg(feature = "gzip")] - ArchiveType::TarGz => { - use flate2::Compression; - use flate2::write::GzEncoder; - - let file = std::fs::File::create(&target_path)?; - let encoder = GzEncoder::new(file, Compression::default()); - let mut builder = Builder::new(encoder); - - for file_path in files { - let relative_path = file_path.strip_prefix(&source_dir).map_err(|e| { - Error::invalid_archive(format!("Invalid file path: {}", e)) - })?; - builder.append_path_with_name(&file_path, relative_path)?; - } - - builder.finish()?; - } - #[cfg(feature = "bzip2")] - ArchiveType::TarBz2 => { - use bzip2::Compression; - use bzip2::write::BzEncoder; - - let file = std::fs::File::create(&target_path)?; - let encoder = BzEncoder::new(file, Compression::default()); - let mut builder = Builder::new(encoder); - - for file_path in files { - let relative_path = file_path.strip_prefix(&source_dir).map_err(|e| { - Error::invalid_archive(format!("Invalid file path: {}", e)) - })?; - builder.append_path_with_name(&file_path, relative_path)?; - } - - builder.finish()?; - } - #[cfg(feature = "xz")] - ArchiveType::TarXz => { - use xz2::write::XzEncoder; - - let file = std::fs::File::create(&target_path)?; - let encoder = XzEncoder::new(file, 6); - let mut builder = Builder::new(encoder); - - for file_path in files { - let relative_path = file_path.strip_prefix(&source_dir).map_err(|e| { - Error::invalid_archive(format!("Invalid file path: {}", e)) - })?; - builder.append_path_with_name(&file_path, relative_path)?; - } - - let encoder = builder.into_inner()?; - encoder.finish()?; - } - _ => { - return Err(Error::unsupported_format(format!( - "Unsupported TAR variant: {}", - archive_type - ))); - } - } - - Ok::<_, Error>(()) - }) - .await - .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; - - Ok(()) - } -} - -/// Convenience functions for creating compressed TAR handlers -impl TarArchiveHandler>> { - /// Create a TAR handler from compressed data - pub fn from_compressed_data( - data: Vec, - archive_type: ArchiveType, - ) -> Result>> { - let cursor = Cursor::new(data); - - match archive_type { - ArchiveType::Tar => { - let reader: Box = Box::new(cursor); - Ok(TarArchiveHandler { - archive: Archive::new(reader), - archive_type, - }) - } - ArchiveType::TarGz => { - use flate2::read::GzDecoder; - let decoder = GzDecoder::new(cursor); - let reader: Box = Box::new(decoder); - Ok(TarArchiveHandler { - archive: Archive::new(reader), - archive_type, - }) - } - ArchiveType::TarBz2 => { - use bzip2::read::BzDecoder; - let decoder = BzDecoder::new(cursor); - let reader: Box = Box::new(decoder); - Ok(TarArchiveHandler { - archive: Archive::new(reader), - archive_type, - }) - } - ArchiveType::TarXz => { - use xz2::read::XzDecoder; - let decoder = XzDecoder::new(cursor); - let reader: Box = Box::new(decoder); - Ok(TarArchiveHandler { - archive: Archive::new(reader), - archive_type, - }) - } - _ => Err(Error::unsupported_format(format!( - "Not a TAR variant: {}", - archive_type - ))), - } - } -} - -/// Convenience functions for creating compressed TAR builders -impl TarArchiveBuilder { - /// Create a compressed TAR builder - pub fn compressed( - writer: W, - archive_type: ArchiveType, - ) -> Result>> { - match archive_type { - ArchiveType::Tar => { - let writer: Box = Box::new(writer); - Ok(TarArchiveBuilder { - builder: Builder::new(writer), - archive_type, - }) - } - ArchiveType::TarGz => { - use flate2::Compression; - use flate2::write::GzEncoder; - let encoder = GzEncoder::new(writer, Compression::default()); - let writer: Box = Box::new(encoder); - Ok(TarArchiveBuilder { - builder: Builder::new(writer), - archive_type, - }) - } - ArchiveType::TarBz2 => { - use bzip2::Compression; - use bzip2::write::BzEncoder; - let encoder = BzEncoder::new(writer, Compression::default()); - let writer: Box = Box::new(encoder); - Ok(TarArchiveBuilder { - builder: Builder::new(writer), - archive_type, - }) - } - ArchiveType::TarXz => { - // For XZ compression, we need to buffer the data and compress it at the end - // This is a limitation of liblzma-rs compared to xz2's streaming interface - let buffer = Vec::new(); - let xz_writer = XzBufferedWriter::new(writer, buffer); - let writer: Box = Box::new(xz_writer); - Ok(TarArchiveBuilder { - builder: Builder::new(writer), - archive_type, - }) - } - _ => Err(Error::unsupported_format(format!( - "Not a TAR variant: {}", - archive_type - ))), - } - } -} - -#[cfg(test)] -mod tests { - use std::io::Cursor; - - use super::*; - - #[tokio::test] - async fn test_tar_handler_creation() { - let data = Vec::new(); - let cursor = Cursor::new(data); - let handler = TarArchiveHandler::new(cursor, ArchiveType::Tar); - assert!(handler.is_ok()); - } - - #[test] - fn test_tar_handler_invalid_type() { - let data = Vec::new(); - let cursor = Cursor::new(data); - let handler = TarArchiveHandler::new(cursor, ArchiveType::Zip); - assert!(handler.is_err()); - } - - #[test] - fn test_tar_builder_creation() { - let writer = Vec::new(); - let builder = TarArchiveBuilder::new(writer, ArchiveType::Tar); - assert!(builder.is_ok()); - } - - #[test] - fn test_compressed_builder_creation() { - let writer = Vec::new(); - let builder = TarArchiveBuilder::compressed(writer, ArchiveType::TarGz); - assert!(builder.is_ok()); - } - - #[test] - fn test_entry_info() { - let info = TarEntryInfo { - path: PathBuf::from("test.txt"), - size: 100, - entry_type: EntryType::Regular, - mode: 0o644, - uid: 1000, - gid: 1000, - mtime: 1234567890, - }; - - assert_eq!(info.path, PathBuf::from("test.txt")); - assert_eq!(info.size, 100); - assert_eq!(info.mode, 0o644); - } -} diff --git a/crates/nvisy-archive/src/handler/zip.rs b/crates/nvisy-archive/src/handler/zip.rs new file mode 100644 index 0000000..96fa6c6 --- /dev/null +++ b/crates/nvisy-archive/src/handler/zip.rs @@ -0,0 +1,126 @@ +//! ZIP archive handler implementation + +use std::io::{Read, Seek, Write}; +use std::path::{Path, PathBuf}; + +use zip::write::SimpleFileOptions; +use zip::{CompressionMethod, ZipArchive, ZipWriter}; + +use crate::{ArchiveErrorExt, Result, ZipResultExt}; + +/// Extracts a ZIP archive to the target directory. +/// +/// Returns a list of extracted file paths. +pub fn unpack(reader: R, target_dir: &Path) -> Result> { + let mut archive = ZipArchive::new(reader).map_zip_err()?; + let mut files = Vec::new(); + + for i in 0..archive.len() { + let mut entry = archive.by_index(i).map_zip_err()?; + let name = entry.name().to_string(); + let file_path = target_dir.join(&name); + + if let Some(parent) = file_path.parent() { + std::fs::create_dir_all(parent)?; + } + + if entry.is_dir() { + std::fs::create_dir_all(&file_path)?; + } else { + let mut content = Vec::with_capacity(entry.size() as usize); + entry.read_to_end(&mut content)?; + + let mut output = std::fs::File::create(&file_path)?; + output.write_all(&content)?; + + #[cfg(unix)] + if let Some(mode) = entry.unix_mode() { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&file_path, std::fs::Permissions::from_mode(mode))?; + } + + files.push(file_path); + } + } + + Ok(files) +} + +/// Packs a directory into a ZIP archive. +pub fn pack(source_dir: &Path, target_path: &Path) -> Result<()> { + let files = collect_files(source_dir)?; + let file = std::fs::File::create(target_path)?; + let mut zip = ZipWriter::new(file); + + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + + for file_path in files { + let relative_path = file_path + .strip_prefix(source_dir) + .map_err(|e| crate::Error::invalid_archive(format!("Invalid file path: {}", e)))?; + + let content = std::fs::read(&file_path)?; + zip.start_file(relative_path.to_string_lossy().as_ref(), options) + .map_zip_err()?; + zip.write_all(&content)?; + } + + zip.finish().map_zip_err()?; + Ok(()) +} + +fn collect_files(dir: &Path) -> Result> { + let mut files = Vec::new(); + + for entry in std::fs::read_dir(dir)? { + let path = entry?.path(); + if path.is_file() { + files.push(path); + } else if path.is_dir() { + files.extend(collect_files(&path)?); + } + } + + files.sort(); + Ok(files) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_pack_and_unpack() { + let source_dir = TempDir::new().unwrap(); + let target_dir = TempDir::new().unwrap(); + + // Create test files + std::fs::write(source_dir.path().join("test.txt"), b"Hello").unwrap(); + std::fs::create_dir(source_dir.path().join("subdir")).unwrap(); + std::fs::write(source_dir.path().join("subdir/nested.txt"), b"World").unwrap(); + + // Pack + let archive_path = target_dir.path().join("test.zip"); + pack(source_dir.path(), &archive_path).unwrap(); + assert!(archive_path.exists()); + + // Unpack + let extract_dir = TempDir::new().unwrap(); + let files = unpack( + std::fs::File::open(&archive_path).unwrap(), + extract_dir.path(), + ) + .unwrap(); + + assert_eq!(files.len(), 2); + assert_eq!( + std::fs::read_to_string(extract_dir.path().join("test.txt")).unwrap(), + "Hello" + ); + assert_eq!( + std::fs::read_to_string(extract_dir.path().join("subdir/nested.txt")).unwrap(), + "World" + ); + } +} diff --git a/crates/nvisy-archive/src/handler/zip_handler.rs b/crates/nvisy-archive/src/handler/zip_handler.rs deleted file mode 100644 index 50469b5..0000000 --- a/crates/nvisy-archive/src/handler/zip_handler.rs +++ /dev/null @@ -1,575 +0,0 @@ -//! ZIP archive handler implementation -//! -//! This module provides specialized handling for ZIP archives using the zip crate, -//! with support for various compression methods and ZIP-specific features. - -use std::io::{Cursor, Read, Seek, Write}; -use std::path::{Path, PathBuf}; - -use tokio::fs; -use tokio::io::AsyncWriteExt; -use zip::read::ZipFile; -use zip::write::{ExtendedFileOptions, SimpleFileOptions}; -use zip::{CompressionMethod, DateTime, ZipArchive, ZipWriter}; - -use crate::{ArchiveErrorExt, ArchiveType, Error, Result, ZipResultExt}; - -/// Specialized handler for ZIP archive operations -/// -/// This handler provides efficient ZIP-specific operations using the zip crate, -/// with support for various compression methods and ZIP features. -#[derive(Debug)] -pub struct ZipArchiveHandler { - /// The underlying ZIP archive - archive: ZipArchive, - /// Archive type (should always be ZIP) - archive_type: ArchiveType, -} - -impl ZipArchiveHandler { - /// Create a new ZIP handler from a reader - pub fn new(reader: R, archive_type: ArchiveType) -> Result { - if archive_type != ArchiveType::Zip { - return Err(Error::unsupported_format(format!( - "Expected ZIP, got: {}", - archive_type - ))); - } - - let archive = ZipArchive::new(reader).map_zip_err()?; - - Ok(Self { - archive, - archive_type, - }) - } - - /// Get the archive type - pub fn archive_type(&self) -> ArchiveType { - self.archive_type - } - - /// Get the number of files in the archive - pub fn len(&self) -> usize { - self.archive.len() - } - - /// Check if the archive is empty - pub fn is_empty(&self) -> bool { - self.archive.len() == 0 - } - - /// Extract all entries to the specified directory - pub async fn extract_to(&mut self, target_dir: impl AsRef) -> Result> { - let target_dir = target_dir.as_ref(); - fs::create_dir_all(target_dir).await?; - - let mut extracted_files = Vec::new(); - - for i in 0..self.archive.len() { - let mut file = self.archive.by_index(i).map_zip_err()?; - let file_path = target_dir.join(file.name()); - - // Create parent directories - if let Some(parent) = file_path.parent() { - fs::create_dir_all(parent).await?; - } - - if file.is_dir() { - fs::create_dir_all(&file_path).await?; - } else { - let mut content = Vec::with_capacity(file.size() as usize); - std::io::Read::read_to_end(&mut file, &mut content)?; - - let mut output_file = fs::File::create(&file_path).await?; - output_file.write_all(&content).await?; - - // Set file permissions on Unix systems - #[cfg(unix)] - { - if let Some(mode) = file.unix_mode() { - use std::os::unix::fs::PermissionsExt; - let permissions = std::fs::Permissions::from_mode(mode); - std::fs::set_permissions(&file_path, permissions)?; - } - } - - extracted_files.push(file_path); - } - } - - Ok(extracted_files) - } - - /// Extract a specific file by name - pub async fn extract_file(&mut self, name: &str, target_path: impl AsRef) -> Result<()> { - let mut file = self.archive.by_name(name).map_zip_err()?; - let target_path = target_path.as_ref(); - - if let Some(parent) = target_path.parent() { - fs::create_dir_all(parent).await?; - } - - let mut content = Vec::with_capacity(file.size() as usize); - std::io::Read::read_to_end(&mut file, &mut content)?; - - let mut output_file = fs::File::create(target_path).await?; - output_file.write_all(&content).await?; - - Ok(()) - } - - /// Read a file's content directly into memory - pub fn read_file(&mut self, name: &str) -> Result> { - let mut file = self.archive.by_name(name).map_zip_err()?; - let mut content = Vec::with_capacity(file.size() as usize); - std::io::Read::read_to_end(&mut file, &mut content)?; - Ok(content) - } - - /// Get file by index - pub fn by_index(&mut self, index: usize) -> Result> { - self.archive.by_index(index).map_zip_err() - } - - /// Get file by name - pub fn by_name(&mut self, name: &str) -> Result> { - self.archive.by_name(name).map_zip_err() - } - - /// List all entries without extracting - pub fn list_entries(&mut self) -> Result> { - let mut entries = Vec::new(); - - for i in 0..self.archive.len() { - let file = self.archive.by_index(i).map_zip_err()?; - - let info = ZipEntryInfo { - name: file.name().to_string(), - size: file.size(), - compressed_size: file.compressed_size(), - compression_method: file.compression(), - is_dir: file.is_dir(), - is_file: file.is_file(), - unix_mode: file.unix_mode(), - last_modified: file.last_modified().unwrap_or_default(), - crc32: file.crc32(), - extra_data: file.extra_data().unwrap_or(&[]).to_vec(), - comment: file.comment().to_string(), - }; - - entries.push(info); - } - - Ok(entries) - } - - /// Get file names - pub fn file_names(&self) -> Vec { - self.archive.file_names().map(|s| s.to_string()).collect() - } - - /// Check if a file exists in the archive - pub fn contains_file(&mut self, name: &str) -> bool { - self.archive.by_name(name).is_ok() - } - - /// Get the comment of the archive - pub fn comment(&self) -> String { - String::from_utf8_lossy(self.archive.comment()).to_string() - } -} - -/// Information about a ZIP entry -#[derive(Debug, Clone)] -pub struct ZipEntryInfo { - /// Name of the file within the archive - pub name: String, - /// Uncompressed size in bytes - pub size: u64, - /// Compressed size in bytes - pub compressed_size: u64, - /// Compression method used - pub compression_method: CompressionMethod, - /// Whether this entry is a directory - pub is_dir: bool, - /// Whether this entry is a file - pub is_file: bool, - /// Unix file permissions (if available) - pub unix_mode: Option, - /// Last modification time - pub last_modified: DateTime, - /// CRC32 checksum - pub crc32: u32, - /// Extra data field - pub extra_data: Vec, - /// File comment - pub comment: String, -} - -/// Builder for creating ZIP archives -pub struct ZipArchiveBuilder { - writer: ZipWriter, - archive_type: ArchiveType, -} - -impl ZipArchiveBuilder { - /// Create a new ZIP archive builder - pub fn new(writer: W) -> Self { - Self { - writer: ZipWriter::new(writer), - archive_type: ArchiveType::Zip, - } - } - - /// Get the archive type - pub fn archive_type(&self) -> ArchiveType { - self.archive_type - } - - /// Set the comment for the archive - pub fn set_comment(&mut self, comment: String) { - self.writer.set_comment(comment); - } - - /// Start a new file in the archive with default options - pub fn start_file(&mut self, name: &str) -> Result<()> { - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); - self.writer.start_file(name, options).map_zip_err()?; - Ok(()) - } - - /// Start a new file with custom options - pub fn start_file_with_options( - &mut self, - name: &str, - options: SimpleFileOptions, - ) -> Result<()> { - self.writer.start_file(name, options).map_zip_err()?; - Ok(()) - } - - /// Start a new file with extended options - pub fn start_file_with_extra_data( - &mut self, - name: &str, - _options: ExtendedFileOptions, - ) -> Result<()> { - // Note: ExtendedFileOptions may not be supported in this version - // Convert to SimpleFileOptions for compatibility - let simple_options = - SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); - self.writer.start_file(name, simple_options).map_zip_err()?; - Ok(()) - } - - /// Write data to the current file - pub fn write(&mut self, data: &[u8]) -> Result { - Ok(self.writer.write(data)?) - } - - /// Write all data to the current file - pub fn write_all(&mut self, data: &[u8]) -> Result<()> { - self.writer.write_all(data)?; - Ok(()) - } - - /// Add a file from a path with default compression - pub async fn add_file_from_path( - &mut self, - archive_path: &str, - file_path: impl AsRef, - ) -> Result<()> { - let file_path = file_path.as_ref(); - let content = fs::read(file_path).await?; - - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); - - self.writer - .start_file(archive_path, options) - .map_zip_err()?; - self.writer.write_all(&content)?; - - Ok(()) - } - - /// Add a file from memory - pub fn add_file_from_memory(&mut self, name: &str, data: &[u8]) -> Result<()> { - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); - - self.writer.start_file(name, options).map_zip_err()?; - self.writer.write_all(data)?; - - Ok(()) - } - - /// Add a directory entry - pub fn add_directory(&mut self, name: &str) -> Result<()> { - let dir_name = if name.ends_with('/') { - name.to_string() - } else { - format!("{}/", name) - }; - - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); - - self.writer.start_file(&dir_name, options).map_zip_err()?; - Ok(()) - } - - /// Add an entire directory recursively - pub async fn add_directory_recursively( - &mut self, - archive_prefix: &str, - dir_path: impl AsRef, - ) -> Result<()> { - let dir_path = dir_path.as_ref(); - let mut entries = fs::read_dir(dir_path).await?; - - while let Some(entry) = entries.next_entry().await? { - let entry_path = entry.path(); - let file_name = entry.file_name(); - let file_name_str = file_name.to_string_lossy(); - - let archive_path = if archive_prefix.is_empty() { - file_name_str.to_string() - } else { - format!("{}/{}", archive_prefix, file_name_str) - }; - - if entry_path.is_dir() { - self.add_directory(&archive_path)?; - self.add_directory_recursively(&archive_path, &entry_path) - .await?; - } else { - self.add_file_from_path(&archive_path, &entry_path).await?; - } - } - - Ok(()) - } - - /// Create options for storing files without compression - pub fn stored_options() -> SimpleFileOptions { - SimpleFileOptions::default().compression_method(CompressionMethod::Stored) - } - - /// Create options for maximum compression - pub fn max_compression_options() -> SimpleFileOptions { - SimpleFileOptions::default() - .compression_method(CompressionMethod::Deflated) - .compression_level(Some(9)) - } - - /// Create options with custom compression level - pub fn compression_options(level: i32) -> SimpleFileOptions { - SimpleFileOptions::default() - .compression_method(CompressionMethod::Deflated) - .compression_level(Some(level.into())) - } - - /// Finish writing the archive and return the underlying writer - pub fn finish(self) -> Result { - self.writer.finish().map_zip_err() - } -} - -/// Builder for creating ZIP archives from directories -pub struct ZipDirectoryBuilder; - -impl ZipDirectoryBuilder { - /// Create a ZIP archive from a directory - /// - /// This method collects all files in the source directory and creates - /// a ZIP archive at the target path. - pub async fn create(source_dir: &Path, target_path: &Path) -> Result<()> { - use std::fs; - use std::io::Write; - - use zip::write::SimpleFileOptions; - use zip::{CompressionMethod, ZipWriter}; - - // Collect all files in the directory - fn collect_files(dir: &Path) -> Result> { - let mut files = Vec::new(); - let entries = fs::read_dir(dir)?; - - for entry in entries { - let entry = entry?; - let path = entry.path(); - - if path.is_file() { - files.push(path); - } else if path.is_dir() { - let mut sub_files = collect_files(&path)?; - files.append(&mut sub_files); - } - } - - files.sort(); - Ok(files) - } - - let files = collect_files(source_dir)?; - let file = std::fs::File::create(target_path)?; - let mut zip = ZipWriter::new(file); - - let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); - - for file_path in files { - let relative_path = file_path - .strip_prefix(source_dir) - .map_err(|e| Error::invalid_archive(format!("Invalid file path: {}", e)))?; - - let file_content = tokio::fs::read(&file_path).await?; - - zip.start_file(relative_path.to_string_lossy().as_ref(), options) - .map_zip_err()?; - zip.write_all(&file_content)?; - } - - zip.finish().map_zip_err()?; - Ok(()) - } -} - -/// Convenience constructor for ZIP handlers from memory -impl ZipArchiveHandler>> { - /// Create a ZIP handler from in-memory data - pub fn from_memory(data: Vec) -> Result { - let cursor = Cursor::new(data); - Self::new(cursor, ArchiveType::Zip) - } -} - -/// Convenience constructor for ZIP builders with memory backing -impl ZipArchiveBuilder>> { - /// Create a ZIP builder that writes to memory - pub fn new_in_memory() -> Self { - let cursor = Cursor::new(Vec::new()); - Self::new(cursor) - } -} - -#[cfg(test)] -mod tests { - use std::io::Cursor; - - use tempfile::TempDir; - - use super::*; - - #[test] - fn test_zip_handler_from_memory() { - // Create a minimal ZIP file in memory - let cursor = Cursor::new(Vec::new()); - let mut builder = ZipArchiveBuilder::new(cursor); - - builder - .add_file_from_memory("test.txt", b"Hello, World!") - .unwrap(); - let cursor = builder.finish().unwrap(); - - // Test the handler - let data = cursor.into_inner(); - let handler = ZipArchiveHandler::from_memory(data); - assert!(handler.is_ok()); - - let mut handler = handler.unwrap(); - assert_eq!(handler.len(), 1); - assert!(!handler.is_empty()); - assert!(handler.contains_file("test.txt")); - } - - #[test] - fn test_zip_handler_invalid_type() { - let data = Vec::new(); - let cursor = Cursor::new(data); - let handler = ZipArchiveHandler::new(cursor, ArchiveType::Tar); - assert!(handler.is_err()); - } - - #[test] - fn test_zip_builder_creation() { - let cursor = Cursor::new(Vec::new()); - let builder = ZipArchiveBuilder::new(cursor); - assert_eq!(builder.archive_type(), ArchiveType::Zip); - } - - #[test] - fn test_zip_builder_in_memory() { - let mut builder = ZipArchiveBuilder::new_in_memory(); - builder - .add_file_from_memory("test.txt", b"Hello, World!") - .unwrap(); - builder.add_directory("subdir").unwrap(); - - let cursor = builder.finish().unwrap(); - let data = cursor.into_inner(); - assert!(!data.is_empty()); - } - - #[test] - fn test_compression_options() { - // Test that options can be created without panicking - let _stored = ZipArchiveBuilder::>>::stored_options(); - let _max_compression = ZipArchiveBuilder::>>::max_compression_options(); - let _custom = ZipArchiveBuilder::>>::compression_options(5); - - // Note: compression_method field is private, so we can't test it directly - // but we can verify the options are created successfully - } - - #[tokio::test] - async fn test_zip_extract_operations() { - // Create a ZIP file with test data - let mut builder = ZipArchiveBuilder::new_in_memory(); - builder - .add_file_from_memory("file1.txt", b"Content 1") - .unwrap(); - builder - .add_file_from_memory("file2.txt", b"Content 2") - .unwrap(); - builder.add_directory("subdir").unwrap(); - builder - .add_file_from_memory("subdir/file3.txt", b"Content 3") - .unwrap(); - - let cursor = builder.finish().unwrap(); - let data = cursor.into_inner(); - - // Test extraction - let mut handler = ZipArchiveHandler::from_memory(data).unwrap(); - let temp_dir = TempDir::new().unwrap(); - - let extracted_files = handler.extract_to(temp_dir.path()).await.unwrap(); - assert_eq!(extracted_files.len(), 3); // 3 files (directories don't count) - - // Test reading specific file - let content = handler.read_file("file1.txt").unwrap(); - assert_eq!(content, b"Content 1"); - } - - #[test] - fn test_entry_info() { - let info = ZipEntryInfo { - name: "test.txt".to_string(), - size: 100, - compressed_size: 80, - compression_method: CompressionMethod::Deflated, - is_dir: false, - is_file: true, - unix_mode: Some(0o644), - last_modified: DateTime::default(), - crc32: 12345, - extra_data: Vec::new(), - comment: String::new(), - }; - - assert_eq!(info.name, "test.txt"); - assert_eq!(info.size, 100); - assert_eq!(info.compressed_size, 80); - assert!(!info.is_dir); - assert!(info.is_file); - } -} diff --git a/crates/nvisy-archive/src/lib.rs b/crates/nvisy-archive/src/lib.rs index 17e3c45..ba65c3f 100644 --- a/crates/nvisy-archive/src/lib.rs +++ b/crates/nvisy-archive/src/lib.rs @@ -2,139 +2,16 @@ #![cfg_attr(docsrs, feature(doc_cfg))] #![doc = include_str!("../README.md")] +mod error; pub mod file; -pub mod handler; +pub(crate) mod handler; pub mod prelude; +mod registry; -// Re-exports for convenience -pub use file::{ArchiveFile, ArchiveType}; -pub use handler::ArchiveHandler; -// Re-export core types used in archive operations -pub use nvisy_rt_core::error::{Error, ErrorResource, ErrorType, Result}; -pub use nvisy_rt_core::fs::{ContentKind, ContentMetadata}; -pub use nvisy_rt_core::io::ContentData; -pub use nvisy_rt_core::path::ContentSource; - -/// Extension trait for creating archive-specific errors -pub trait ArchiveErrorExt { - /// Create an unsupported format error - fn unsupported_format(format: impl Into) -> Error; - - /// Create an invalid archive error - fn invalid_archive(message: impl Into) -> Error; - - /// Create an entry not found error - fn entry_not_found(name: impl Into) -> Error; - - /// Create a permission denied error - fn archive_permission_denied(message: impl Into) -> Error; - - /// Create a corrupted archive error - fn corrupted(message: impl Into) -> Error; - - /// Create a resource limit error - fn archive_resource_limit(message: impl Into) -> Error; -} - -impl ArchiveErrorExt for Error { - fn unsupported_format(format: impl Into) -> Error { - Error::new(format!("Unsupported archive format: {}", format.into())) - .with_type(ErrorType::Runtime) - .with_resource(ErrorResource::Archive) - } - - fn invalid_archive(message: impl Into) -> Error { - Error::new(format!("Invalid archive: {}", message.into())) - .with_type(ErrorType::Runtime) - .with_resource(ErrorResource::Archive) - } - - fn entry_not_found(name: impl Into) -> Error { - Error::new(format!("Entry not found: {}", name.into())) - .with_type(ErrorType::Runtime) - .with_resource(ErrorResource::Archive) - } - - fn archive_permission_denied(message: impl Into) -> Error { - Error::new(format!("Permission denied: {}", message.into())) - .with_type(ErrorType::Runtime) - .with_resource(ErrorResource::Archive) - } - - fn corrupted(message: impl Into) -> Error { - Error::new(format!("Corrupted archive: {}", message.into())) - .with_type(ErrorType::Runtime) - .with_resource(ErrorResource::Archive) - } - - fn archive_resource_limit(message: impl Into) -> Error { - Error::new(format!("Resource limit exceeded: {}", message.into())) - .with_type(ErrorType::Runtime) - .with_resource(ErrorResource::Archive) - } -} - -/// Extension trait for converting ZIP errors to our Error type -#[cfg(feature = "zip")] -pub trait ZipErrorExt { - /// Convert a ZIP error to an archive Error - fn into_archive_error(self) -> Error; -} +pub use error::{ArchiveErrorExt, Error, ErrorResource, ErrorType, Result}; +pub use registry::ArchiveRegistry; +#[cfg(feature = "tar")] +pub(crate) use error::TarResultExt; #[cfg(feature = "zip")] -impl ZipErrorExt for zip::result::ZipError { - fn into_archive_error(self) -> Error { - Error::from_source("ZIP operation failed", self) - .with_type(ErrorType::Runtime) - .with_resource(ErrorResource::Archive) - } -} - -/// Extension to convert zip::Result to our Result type -#[cfg(feature = "zip")] -pub trait ZipResultExt { - /// Convert a ZIP result to an archive Result - fn map_zip_err(self) -> Result; -} - -#[cfg(feature = "zip")] -impl ZipResultExt for std::result::Result { - fn map_zip_err(self) -> Result { - self.map_err(|e| e.into_archive_error()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_error_creation() { - let error = ::unsupported_format("custom"); - assert_eq!(error.resource, ErrorResource::Archive); - - let error = ::invalid_archive("test message"); - assert_eq!(error.resource, ErrorResource::Archive); - - let error = ::entry_not_found("missing.txt"); - assert_eq!(error.resource, ErrorResource::Archive); - - let error = ::archive_permission_denied("access denied"); - assert_eq!(error.resource, ErrorResource::Archive); - - let error = ::corrupted("bad data"); - assert_eq!(error.resource, ErrorResource::Archive); - - let error = ::archive_resource_limit("too big"); - assert_eq!(error.resource, ErrorResource::Archive); - } - - #[test] - fn test_error_display() { - let error = ::unsupported_format("test"); - assert!(error.to_string().contains("Unsupported archive format")); - - let error = ::invalid_archive("bad archive"); - assert!(error.to_string().contains("Invalid archive")); - } -} +pub(crate) use error::ZipResultExt; diff --git a/crates/nvisy-archive/src/prelude.rs b/crates/nvisy-archive/src/prelude.rs index 35ecb8c..72221d4 100644 --- a/crates/nvisy-archive/src/prelude.rs +++ b/crates/nvisy-archive/src/prelude.rs @@ -3,10 +3,8 @@ //! This module re-exports the most commonly used types from this crate. //! It is intended to be glob-imported for convenience. -// Archive types -pub use crate::file::{ArchiveFile, ArchiveType}; -pub use crate::handler::ArchiveHandler; -// Error handling +pub use crate::file::{ + ArchiveFile, ArchiveHandler, ArchiveType, ContentData, ContentKind, ContentSource, +}; +pub use crate::registry::ArchiveRegistry; pub use crate::{ArchiveErrorExt, Error, ErrorResource, ErrorType, Result}; -// Core types re-exported for convenience -pub use crate::{ContentData, ContentKind, ContentMetadata, ContentSource}; diff --git a/crates/nvisy-archive/src/registry/mod.rs b/crates/nvisy-archive/src/registry/mod.rs new file mode 100644 index 0000000..1d6bbe4 --- /dev/null +++ b/crates/nvisy-archive/src/registry/mod.rs @@ -0,0 +1,408 @@ +//! Archive extraction registry for managing temporary directories +//! +//! This module provides [`ArchiveRegistry`] for managing extracted archive +//! contents in a centralized location with automatic cleanup. + +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Duration, SystemTime}; + +use crate::file::{ArchiveFile, ArchiveHandler}; +use crate::{ArchiveErrorExt, Error, Result}; + +/// Prefix used for registry-managed extraction directories +const EXTRACTION_DIR_PREFIX: &str = "nvisy-extract-"; + +/// Default maximum age for stale directories (24 hours) +const DEFAULT_STALE_THRESHOLD_SECS: u64 = 24 * 60 * 60; + +/// Registry for managing extracted archive contents +/// +/// The registry provides a centralized way to extract archives into +/// managed directories with automatic cleanup. Each extraction creates +/// a uniquely named subdirectory that is cleaned up when the returned +/// handle is dropped. +/// +/// # Example +/// +/// ```rust,ignore +/// use nvisy_rt_archive::ArchiveRegistry; +/// +/// # async fn example() -> nvisy_rt_archive::Result<()> { +/// let registry = ArchiveRegistry::new("/tmp/archives")?; +/// +/// // Extract an archive - returns a handle to the extracted contents +/// let handler = registry.extract("my-archive.zip").await?; +/// +/// // Access extracted files +/// println!("Extracted to: {:?}", handler.path()); +/// +/// // Handler is dropped here, cleaning up the extracted directory +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug, Clone)] +pub struct ArchiveRegistry { + inner: Arc, +} + +#[derive(Debug)] +struct RegistryInner { + /// Base directory where all extractions are stored + base_dir: PathBuf, + /// Counter for generating unique directory names + counter: AtomicU64, +} + +impl ArchiveRegistry { + /// Create a new archive registry with the specified base directory + /// + /// The base directory will be created if it does not exist. + /// + /// # Errors + /// + /// Returns an error if the base directory cannot be created. + pub fn new(base_dir: impl AsRef) -> Result { + let base_dir = base_dir.as_ref().to_path_buf(); + + // Create the base directory if it doesn't exist + fs::create_dir_all(&base_dir).map_err(|e| { + Error::invalid_archive(format!( + "Failed to create registry base directory '{}': {}", + base_dir.display(), + e + )) + })?; + + Ok(Self { + inner: Arc::new(RegistryInner { + base_dir, + counter: AtomicU64::new(0), + }), + }) + } + + /// Get the base directory path + pub fn base_dir(&self) -> &Path { + &self.inner.base_dir + } + + /// Extract an archive and return a handle to the extracted contents + /// + /// The extraction directory will be automatically cleaned up when + /// the returned [`ArchiveHandler`] is dropped. + /// + /// # Errors + /// + /// Returns an error if: + /// - The archive file cannot be opened + /// - The archive cannot be extracted + /// - The extraction directory cannot be created + pub async fn extract(&self, archive_path: impl AsRef) -> Result { + self.extract_with_name(archive_path, None).await + } + + /// Extract an archive with a custom directory name + /// + /// If `name` is `None`, a unique name will be generated automatically. + /// The name will be prefixed with the registry prefix to ensure it can + /// be identified during cleanup. + /// + /// # Errors + /// + /// Returns an error if: + /// - The archive file cannot be opened + /// - The archive cannot be extracted + /// - The extraction directory cannot be created + /// - A directory with the specified name already exists + pub async fn extract_with_name( + &self, + archive_path: impl AsRef, + name: Option<&str>, + ) -> Result { + let archive_path = archive_path.as_ref(); + + // Generate directory name + let dir_name = match name { + Some(n) => format!("{}{}", EXTRACTION_DIR_PREFIX, n), + None => self.generate_unique_name(), + }; + + let extract_dir = self.inner.base_dir.join(&dir_name); + + // Check if directory already exists + if extract_dir.exists() { + return Err(Error::invalid_archive(format!( + "Extraction directory already exists: {}", + extract_dir.display() + ))); + } + + // Create the extraction directory + fs::create_dir_all(&extract_dir).map_err(|e| { + Error::invalid_archive(format!( + "Failed to create extraction directory '{}': {}", + extract_dir.display(), + e + )) + })?; + + // Open and extract the archive + let archive = ArchiveFile::from_path(archive_path)?; + archive.unpack_to(&extract_dir).await + } + + /// Generate a unique directory name + fn generate_unique_name(&self) -> String { + let count = self.inner.counter.fetch_add(1, Ordering::SeqCst); + let timestamp = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + + format!("{}{}-{}", EXTRACTION_DIR_PREFIX, timestamp, count) + } + + /// Clean up stale extraction directories + /// + /// This function removes extraction directories that are older than + /// the specified threshold. Use this to clean up directories left + /// behind by crashed processes or I/O failures. + /// + /// # Arguments + /// + /// * `max_age` - Maximum age of directories to keep. Directories older + /// than this will be removed. If `None`, uses a default of 24 hours. + /// + /// # Returns + /// + /// Returns the number of directories that were cleaned up. + /// + /// # Errors + /// + /// Returns an error if the base directory cannot be read. Individual + /// directory removal failures are logged but do not cause an error. + pub fn cleanup_stale(&self, max_age: Option) -> Result { + let threshold = max_age.unwrap_or(Duration::from_secs(DEFAULT_STALE_THRESHOLD_SECS)); + let now = SystemTime::now(); + let mut cleaned = 0; + + let entries = fs::read_dir(&self.inner.base_dir).map_err(|e| { + Error::invalid_archive(format!( + "Failed to read registry directory '{}': {}", + self.inner.base_dir.display(), + e + )) + })?; + + for entry in entries.flatten() { + let path = entry.path(); + + // Only process directories with our prefix + if !path.is_dir() { + continue; + } + + let file_name = match path.file_name().and_then(|n| n.to_str()) { + Some(name) if name.starts_with(EXTRACTION_DIR_PREFIX) => name, + _ => continue, + }; + + // Check if the directory is older than the threshold + let metadata = match fs::metadata(&path) { + Ok(m) => m, + Err(_) => continue, + }; + + let modified = match metadata.modified() { + Ok(t) => t, + Err(_) => continue, + }; + + let age = match now.duration_since(modified) { + Ok(d) => d, + Err(_) => continue, + }; + + if age > threshold { + // Attempt to remove the stale directory + if fs::remove_dir_all(&path).is_ok() { + cleaned += 1; + tracing::info!( + target: "nvisy_rt_archive", + directory = %file_name, + age_secs = age.as_secs(), + "Cleaned up stale extraction directory" + ); + } else { + tracing::warn!( + target: "nvisy_rt_archive", + directory = %file_name, + "Failed to clean up stale extraction directory" + ); + } + } + } + + Ok(cleaned) + } + + /// List all current extraction directories + /// + /// Returns the paths of all directories managed by this registry. + pub fn list_extractions(&self) -> Result> { + let entries = fs::read_dir(&self.inner.base_dir).map_err(|e| { + Error::invalid_archive(format!( + "Failed to read registry directory '{}': {}", + self.inner.base_dir.display(), + e + )) + })?; + + let mut extractions = Vec::new(); + + for entry in entries.flatten() { + let path = entry.path(); + + let is_managed = path.is_dir() + && path + .file_name() + .and_then(|n| n.to_str()) + .is_some_and(|name| name.starts_with(EXTRACTION_DIR_PREFIX)); + + if is_managed { + extractions.push(path); + } + } + + extractions.sort(); + Ok(extractions) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_registry_creation() { + let temp_dir = tempfile::tempdir().unwrap(); + let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); + + assert!(registry.base_dir().exists()); + } + + #[test] + fn test_registry_creation_creates_directory() { + let temp_dir = tempfile::tempdir().unwrap(); + let nested_path = temp_dir.path().join("nested").join("registry"); + + let registry = ArchiveRegistry::new(&nested_path).unwrap(); + assert!(registry.base_dir().exists()); + assert_eq!(registry.base_dir(), nested_path); + } + + #[test] + fn test_unique_name_generation() { + let temp_dir = tempfile::tempdir().unwrap(); + let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); + + let name1 = registry.generate_unique_name(); + let name2 = registry.generate_unique_name(); + + assert!(name1.starts_with(EXTRACTION_DIR_PREFIX)); + assert!(name2.starts_with(EXTRACTION_DIR_PREFIX)); + assert_ne!(name1, name2); + } + + #[test] + fn test_list_extractions_empty() { + let temp_dir = tempfile::tempdir().unwrap(); + let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); + + let extractions = registry.list_extractions().unwrap(); + assert!(extractions.is_empty()); + } + + #[test] + fn test_list_extractions_with_directories() { + let temp_dir = tempfile::tempdir().unwrap(); + let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); + + // Create some extraction directories + let dir1 = temp_dir + .path() + .join(format!("{}test1", EXTRACTION_DIR_PREFIX)); + let dir2 = temp_dir + .path() + .join(format!("{}test2", EXTRACTION_DIR_PREFIX)); + let other_dir = temp_dir.path().join("other-directory"); + + fs::create_dir_all(&dir1).unwrap(); + fs::create_dir_all(&dir2).unwrap(); + fs::create_dir_all(&other_dir).unwrap(); + + let extractions = registry.list_extractions().unwrap(); + + // Should only find directories with our prefix + assert_eq!(extractions.len(), 2); + assert!(extractions.contains(&dir1)); + assert!(extractions.contains(&dir2)); + assert!(!extractions.contains(&other_dir)); + } + + #[test] + fn test_cleanup_stale_no_stale_directories() { + let temp_dir = tempfile::tempdir().unwrap(); + let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); + + // Create a fresh directory + let dir = temp_dir + .path() + .join(format!("{}fresh", EXTRACTION_DIR_PREFIX)); + fs::create_dir_all(&dir).unwrap(); + + // Should not clean up fresh directories + let cleaned = registry + .cleanup_stale(Some(Duration::from_secs(3600))) + .unwrap(); + assert_eq!(cleaned, 0); + assert!(dir.exists()); + } + + #[test] + fn test_cleanup_stale_removes_old_directories() { + let temp_dir = tempfile::tempdir().unwrap(); + let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); + + // Create a directory + let dir = temp_dir + .path() + .join(format!("{}stale", EXTRACTION_DIR_PREFIX)); + fs::create_dir_all(&dir).unwrap(); + + // Use a zero threshold to clean up immediately + let cleaned = registry.cleanup_stale(Some(Duration::ZERO)).unwrap(); + + assert_eq!(cleaned, 1); + assert!(!dir.exists()); + } + + #[test] + fn test_cleanup_stale_ignores_non_prefixed_directories() { + let temp_dir = tempfile::tempdir().unwrap(); + let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); + + // Create directories without the prefix + let other_dir = temp_dir.path().join("other-old-directory"); + fs::create_dir_all(&other_dir).unwrap(); + + // Should not clean up directories without our prefix + let cleaned = registry.cleanup_stale(Some(Duration::ZERO)).unwrap(); + + assert_eq!(cleaned, 0); + assert!(other_dir.exists()); + } +} From d8ad7a588472fac8a60fb895bc70eb45027394d9 Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sat, 17 Jan 2026 23:15:34 +0100 Subject: [PATCH 3/4] fix: restore public exports and fix clippy warnings - Re-export ArchiveFile, ArchiveHandler, ArchiveType from nvisy-archive root - Remove needless Ok wrapper in Engine::extract_archive_to_handler --- Cargo.lock | 3 + crates/nvisy-archive/src/lib.rs | 1 + crates/nvisy-document/src/lib.rs | 1 + crates/nvisy-engine/Cargo.toml | 6 +- crates/nvisy-engine/src/engine/mod.rs | 459 ++++++++++++++++++++++--- crates/nvisy-engine/src/lib.rs | 13 +- crates/nvisy-engine/src/session/mod.rs | 63 +++- 7 files changed, 490 insertions(+), 56 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c06b57b..658646b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -646,6 +646,7 @@ version = "0.1.0" dependencies = [ "bytes", "bzip2", + "derive_more", "flate2", "nvisy-rt-core", "sevenz-rust", @@ -654,6 +655,7 @@ dependencies = [ "tempfile", "tokio", "tokio-test", + "tracing", "xz2", "zip", ] @@ -719,6 +721,7 @@ dependencies = [ "nvisy-rt-text", "serde", "serde_json", + "tempfile", "tokio", "tracing", "uuid", diff --git a/crates/nvisy-archive/src/lib.rs b/crates/nvisy-archive/src/lib.rs index ba65c3f..b27013a 100644 --- a/crates/nvisy-archive/src/lib.rs +++ b/crates/nvisy-archive/src/lib.rs @@ -9,6 +9,7 @@ pub mod prelude; mod registry; pub use error::{ArchiveErrorExt, Error, ErrorResource, ErrorType, Result}; +pub use file::{ArchiveFile, ArchiveHandler, ArchiveType}; pub use registry::ArchiveRegistry; #[cfg(feature = "tar")] diff --git a/crates/nvisy-document/src/lib.rs b/crates/nvisy-document/src/lib.rs index b27f9b5..fea7be7 100644 --- a/crates/nvisy-document/src/lib.rs +++ b/crates/nvisy-document/src/lib.rs @@ -30,6 +30,7 @@ pub use metadata::{ PropertyValue, }; pub use nvisy_rt_core::error::{BoxError, Error, ErrorResource, ErrorType, Result}; +pub use nvisy_rt_core::fs::ContentKind; pub use table::{CellDataType, NormalizedCell, NormalizedRow, NormalizedTable, TableExtractor}; pub use text::{ExtractedText, TextExtractor}; pub use thumbnail::{ImageFormat, Thumbnail, ThumbnailGenerator, ThumbnailOptions, ThumbnailSize}; diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml index 0dda308..7882004 100644 --- a/crates/nvisy-engine/Cargo.toml +++ b/crates/nvisy-engine/Cargo.toml @@ -21,7 +21,8 @@ all-features = true rustdoc-args = ["--cfg", "docsrs"] [features] -default = ["pdf", "docx", "text", "image"] +default = ["archive", "pdf", "docx", "text", "image"] +archive = ["dep:nvisy-rt-archive"] pdf = ["dep:nvisy-rt-pdf"] docx = ["dep:nvisy-rt-docx"] text = ["dep:nvisy-rt-text"] @@ -29,7 +30,7 @@ image = ["dep:nvisy-rt-image"] [dependencies] # Internal crates -nvisy-rt-archive = { workspace = true } +nvisy-rt-archive = { workspace = true, optional = true } nvisy-rt-document = { workspace = true } nvisy-rt-docx = { workspace = true, optional = true } nvisy-rt-image = { workspace = true, optional = true } @@ -49,4 +50,5 @@ tracing = { workspace = true } [dev-dependencies] serde_json = { workspace = true, features = ["std"] } +tempfile = { workspace = true } tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/nvisy-engine/src/engine/mod.rs b/crates/nvisy-engine/src/engine/mod.rs index eb8c258..1a99baf 100644 --- a/crates/nvisy-engine/src/engine/mod.rs +++ b/crates/nvisy-engine/src/engine/mod.rs @@ -6,13 +6,37 @@ mod config; use std::path::Path; +use std::sync::Arc; pub use config::EngineConfig; use nvisy_rt_document::{ContentData, Result}; +#[cfg(feature = "archive")] +use tracing::warn; use tracing::{debug, info}; +#[cfg(feature = "archive")] +use nvisy_rt_archive::{ArchiveHandler, ArchiveRegistry}; +#[cfg(feature = "archive")] +use nvisy_rt_document::ContentKind; + use crate::TRACING_TARGET_ENGINE; use crate::registry::{FormatRegistry, LoadedDocument}; +#[cfg(feature = "archive")] +use crate::{TRACING_TARGET_ARCHIVE, TRACING_TARGET_LOAD}; + +/// Shared state for the engine (cheap to clone via Arc). +#[derive(Debug)] +struct EngineInner { + /// Configuration for the engine. + config: EngineConfig, + + /// Format registry for dynamic loading. + registry: FormatRegistry, + + /// Archive registry for extraction management. + #[cfg(feature = "archive")] + archive_registry: ArchiveRegistry, +} /// The central document processing engine. /// @@ -20,6 +44,9 @@ use crate::registry::{FormatRegistry, LoadedDocument}; /// - Loading documents from various formats (PDF, DOCX, plain text, etc.) /// - Managing format handlers via a dynamic registry /// - Auto-detecting formats from file extensions or MIME types +/// - Extracting and loading documents from archives (with `archive` feature) +/// +/// `Engine` is cheap to clone - all clones share the same underlying state. /// /// # Example /// @@ -34,73 +61,109 @@ use crate::registry::{FormatRegistry, LoadedDocument}; /// // Load by extension /// let doc = engine.load_by_extension("json", data).await?; /// -/// // Load with specific format (when you need the concrete type) -/// let doc = engine.pdf().load(data).await?; +/// // Load from archive (requires `archive` feature) +/// let docs = engine.load_from_archive("documents.zip").await?; /// ``` -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Engine { - /// Configuration for the engine. - config: EngineConfig, - - /// Format registry for dynamic loading. - registry: FormatRegistry, + inner: Arc, } impl Engine { /// Creates a new engine with default configuration and all default formats. + /// + /// # Panics + /// + /// Panics if the default archive registry cannot be created (when `archive` feature is enabled). #[must_use] pub fn new() -> Self { info!(target: TRACING_TARGET_ENGINE, "Creating engine with default configuration"); Self { - config: EngineConfig::default(), - registry: FormatRegistry::with_defaults(), + inner: Arc::new(EngineInner { + config: EngineConfig::default(), + registry: FormatRegistry::with_defaults(), + #[cfg(feature = "archive")] + archive_registry: ArchiveRegistry::new(std::env::temp_dir().join("nvisy-engine")) + .expect("failed to create default archive registry"), + }), } } /// Creates a new engine with the specified configuration. + /// + /// # Panics + /// + /// Panics if the default archive registry cannot be created (when `archive` feature is enabled). #[must_use] pub fn with_config(config: EngineConfig) -> Self { debug!(target: TRACING_TARGET_ENGINE, ?config, "Creating engine with custom configuration"); Self { - config, - registry: FormatRegistry::with_defaults(), + inner: Arc::new(EngineInner { + config, + registry: FormatRegistry::with_defaults(), + #[cfg(feature = "archive")] + archive_registry: ArchiveRegistry::new(std::env::temp_dir().join("nvisy-engine")) + .expect("failed to create default archive registry"), + }), } } - /// Creates a new engine with a custom registry. + /// Creates a new engine with a custom format registry. + /// + /// # Panics + /// + /// Panics if the default archive registry cannot be created (when `archive` feature is enabled). #[must_use] - pub fn with_registry(registry: FormatRegistry) -> Self { - debug!(target: TRACING_TARGET_ENGINE, "Creating engine with custom registry"); + pub fn with_format_registry(registry: FormatRegistry) -> Self { + debug!(target: TRACING_TARGET_ENGINE, "Creating engine with custom format registry"); Self { - config: EngineConfig::default(), - registry, + inner: Arc::new(EngineInner { + config: EngineConfig::default(), + registry, + #[cfg(feature = "archive")] + archive_registry: ArchiveRegistry::new(std::env::temp_dir().join("nvisy-engine")) + .expect("failed to create default archive registry"), + }), } } - /// Creates a new engine with custom configuration and registry. + /// Creates a new engine with a custom archive registry. + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] #[must_use] - pub fn with_config_and_registry(config: EngineConfig, registry: FormatRegistry) -> Self { - debug!(target: TRACING_TARGET_ENGINE, ?config, "Creating engine with custom configuration and registry"); - Self { config, registry } + pub fn with_archive_registry(archive_registry: ArchiveRegistry) -> Self { + debug!( + target: TRACING_TARGET_ENGINE, + base_dir = %archive_registry.base_dir().display(), + "Creating engine with custom archive registry" + ); + Self { + inner: Arc::new(EngineInner { + config: EngineConfig::default(), + registry: FormatRegistry::with_defaults(), + archive_registry, + }), + } } /// Returns a reference to the engine configuration. #[must_use] pub fn config(&self) -> &EngineConfig { - &self.config + &self.inner.config } /// Returns a reference to the format registry. #[must_use] pub fn registry(&self) -> &FormatRegistry { - &self.registry + &self.inner.registry } - /// Returns a mutable reference to the format registry. - /// - /// Use this to register custom formats. - pub fn registry_mut(&mut self) -> &mut FormatRegistry { - &mut self.registry + /// Returns a reference to the archive registry. + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + #[must_use] + pub fn archive_registry(&self) -> &ArchiveRegistry { + &self.inner.archive_registry } /// Loads a document from a file path. @@ -115,7 +178,7 @@ impl Engine { /// - The extension is not supported /// - The document fails to load pub async fn load_file>(&self, path: P) -> Result { - self.registry.load_file(path).await + self.inner.registry.load_file(path).await } /// Loads a document by file extension. @@ -124,7 +187,7 @@ impl Engine { /// /// Returns an error if the extension is not supported or loading fails. pub async fn load_by_extension(&self, ext: &str, data: ContentData) -> Result { - self.registry.load_by_extension(ext, data).await + self.inner.registry.load_by_extension(ext, data).await } /// Loads a document by MIME type. @@ -133,31 +196,315 @@ impl Engine { /// /// Returns an error if the MIME type is not supported or loading fails. pub async fn load_by_mime(&self, mime: &str, data: ContentData) -> Result { - self.registry.load_by_mime(mime, data).await + self.inner.registry.load_by_mime(mime, data).await } /// Checks if a file extension is supported. #[must_use] pub fn supports_extension(&self, ext: &str) -> bool { - self.registry.supports_extension(ext) + self.inner.registry.supports_extension(ext) } /// Checks if a MIME type is supported. #[must_use] pub fn supports_mime(&self, mime: &str) -> bool { - self.registry.supports_mime(mime) + self.inner.registry.supports_mime(mime) } /// Returns all supported file extensions. #[must_use] pub fn supported_extensions(&self) -> Vec<&'static str> { - self.registry.supported_extensions() + self.inner.registry.supported_extensions() } /// Returns all supported MIME types. #[must_use] pub fn supported_mime_types(&self) -> Vec<&'static str> { - self.registry.supported_mime_types() + self.inner.registry.supported_mime_types() + } + + /// Loads all supported documents from an archive file. + /// + /// This method: + /// 1. Extracts the archive (using the registry if configured) + /// 2. Iterates over all extracted files + /// 3. Attempts to load each file that has a supported extension + /// 4. Returns a collection of successfully loaded documents + /// + /// Files that cannot be loaded (unsupported format, parse errors, etc.) + /// are logged and skipped. + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + pub async fn load_from_archive( + &self, + archive_path: impl AsRef, + ) -> Result> { + let archive_path = archive_path.as_ref(); + info!( + target: TRACING_TARGET_ARCHIVE, + path = %archive_path.display(), + "Loading documents from archive" + ); + + let handler = self.extract_archive(archive_path).await?; + self.load_from_archive_handler(&handler).await + } + + /// Loads documents from an archive filtered by content kind. + /// + /// Only files matching the specified [`ContentKind`] will be loaded. + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + pub async fn load_from_archive_by_kind( + &self, + archive_path: impl AsRef, + kind: ContentKind, + ) -> Result> { + let archive_path = archive_path.as_ref(); + info!( + target: TRACING_TARGET_ARCHIVE, + path = %archive_path.display(), + kind = ?kind, + "Loading documents from archive by kind" + ); + + let handler = self.extract_archive(archive_path).await?; + self.load_from_handler_by_kind(&handler, kind).await + } + + /// Loads documents from an archive filtered by file extension. + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + pub async fn load_from_archive_by_extension( + &self, + archive_path: impl AsRef, + extension: &str, + ) -> Result> { + let archive_path = archive_path.as_ref(); + info!( + target: TRACING_TARGET_ARCHIVE, + path = %archive_path.display(), + extension = %extension, + "Loading documents from archive by extension" + ); + + let handler = self.extract_archive(archive_path).await?; + self.load_from_handler_by_extension(&handler, extension) + .await + } + + /// Returns an iterator over file paths in an archive that the engine supports. + /// + /// This is useful when you want to inspect or selectively load files from an archive + /// without loading all of them at once. + /// + /// # Example + /// + /// ```ignore + /// let handler = engine.extract_archive_to_handler("documents.zip").await?; + /// for path in engine.supported_files(&handler) { + /// println!("Can load: {}", path.display()); + /// } + /// ``` + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + pub fn supported_files<'a>( + &'a self, + handler: &'a ArchiveHandler, + ) -> impl Iterator { + handler.file_paths().iter().filter(|path| { + path.extension() + .and_then(|e| e.to_str()) + .is_some_and(|ext| self.supports_extension(ext)) + }) + } + + /// Extracts an archive and returns the handler for manual inspection. + /// + /// Use this when you want to inspect archive contents before loading, + /// or when you want to selectively load files using [`Self::supported_files`]. + /// + /// # Example + /// + /// ```ignore + /// let handler = engine.extract_archive_to_handler("documents.zip").await?; + /// + /// // Inspect supported files + /// for path in engine.supported_files(&handler) { + /// println!("Found: {}", path.display()); + /// } + /// + /// // Load specific file + /// let doc = engine.load_file(handler.path().join("report.pdf")).await?; + /// ``` + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + pub async fn extract_archive_to_handler( + &self, + archive_path: impl AsRef, + ) -> Result { + self.extract_archive(archive_path.as_ref()).await + } + + /// Loads documents from an already extracted archive handler. + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + pub async fn load_from_archive_handler( + &self, + handler: &ArchiveHandler, + ) -> Result> { + let mut documents = Vec::new(); + + for file_path in self.supported_files(handler) { + match self.load_file(file_path).await { + Ok(doc) => { + debug!( + target: TRACING_TARGET_LOAD, + path = %file_path.display(), + mime_type = %doc.info().mime_type, + regions = doc.regions().len(), + "Loaded document from archive" + ); + documents.push(doc); + } + Err(e) => { + warn!( + target: TRACING_TARGET_ARCHIVE, + path = %file_path.display(), + error = %e, + "Failed to load document from archive" + ); + } + } + } + + info!( + target: TRACING_TARGET_ARCHIVE, + count = documents.len(), + "Loaded documents from archive" + ); + + Ok(documents) + } + + /// Extracts an archive using the registry. + #[cfg(feature = "archive")] + async fn extract_archive(&self, archive_path: &Path) -> Result { + debug!( + target: TRACING_TARGET_ARCHIVE, + path = %archive_path.display(), + base_dir = %self.inner.archive_registry.base_dir().display(), + "Extracting archive" + ); + self.inner.archive_registry.extract(archive_path).await + } + + /// Loads documents from a handler filtered by content kind. + #[cfg(feature = "archive")] + async fn load_from_handler_by_kind( + &self, + handler: &ArchiveHandler, + kind: ContentKind, + ) -> Result> { + let mut documents = Vec::new(); + + let matching_files = handler.find_files_by_kind(kind); + debug!( + target: TRACING_TARGET_ARCHIVE, + kind = ?kind, + count = matching_files.len(), + "Found files matching content kind" + ); + + for file_path in matching_files { + let ext = match file_path.extension().and_then(|e| e.to_str()) { + Some(ext) => ext, + None => continue, + }; + + if !self.supports_extension(ext) { + debug!( + target: TRACING_TARGET_ARCHIVE, + path = %file_path.display(), + extension = %ext, + "Skipping unsupported file type" + ); + continue; + } + + match self.load_file(file_path).await { + Ok(doc) => { + debug!( + target: TRACING_TARGET_LOAD, + path = %file_path.display(), + mime_type = %doc.info().mime_type, + "Loaded document from archive" + ); + documents.push(doc); + } + Err(e) => { + warn!( + target: TRACING_TARGET_ARCHIVE, + path = %file_path.display(), + error = %e, + "Failed to load document from archive" + ); + } + } + } + + Ok(documents) + } + + /// Loads documents from a handler filtered by file extension. + #[cfg(feature = "archive")] + async fn load_from_handler_by_extension( + &self, + handler: &ArchiveHandler, + extension: &str, + ) -> Result> { + let mut documents = Vec::new(); + + let matching_files = handler.find_files_by_extension(extension); + debug!( + target: TRACING_TARGET_ARCHIVE, + extension = %extension, + count = matching_files.len(), + "Found files with extension" + ); + + if !self.supports_extension(extension) { + warn!( + target: TRACING_TARGET_ARCHIVE, + extension = %extension, + "Extension not supported by engine" + ); + return Ok(documents); + } + + for file_path in matching_files { + match self.load_file(file_path).await { + Ok(doc) => { + debug!( + target: TRACING_TARGET_LOAD, + path = %file_path.display(), + mime_type = %doc.info().mime_type, + "Loaded document from archive" + ); + documents.push(doc); + } + Err(e) => { + warn!( + target: TRACING_TARGET_ARCHIVE, + path = %file_path.display(), + error = %e, + "Failed to load document from archive" + ); + } + } + } + + Ok(documents) } } @@ -167,16 +514,6 @@ impl Default for Engine { } } -impl Clone for Engine { - fn clone(&self) -> Self { - debug!(target: TRACING_TARGET_ENGINE, "Cloning engine"); - Self { - config: self.config.clone(), - registry: FormatRegistry::with_defaults(), - } - } -} - #[cfg(test)] mod tests { use super::*; @@ -197,6 +534,15 @@ mod tests { assert_eq!(engine.config().max_file_size, Some(50 * 1024 * 1024)); } + #[test] + fn test_engine_clone_is_cheap() { + let engine = Engine::new(); + let cloned = engine.clone(); + + // Both should point to the same Arc + assert!(Arc::ptr_eq(&engine.inner, &cloned.inner)); + } + #[test] fn test_supported_extensions() { let engine = Engine::new(); @@ -286,13 +632,13 @@ mod tests { } #[test] - fn test_custom_registry() { + fn test_custom_format_registry() { let mut registry = FormatRegistry::new(); #[cfg(feature = "text")] registry.register(nvisy_rt_text::JsonFormat::new()); - let engine = Engine::with_registry(registry); + let engine = Engine::with_format_registry(registry); #[cfg(feature = "text")] { @@ -300,4 +646,23 @@ mod tests { assert!(!engine.supports_extension("xml")); // Not registered } } + + #[cfg(feature = "archive")] + #[test] + fn test_engine_with_archive_registry() { + let temp_dir = tempfile::tempdir().unwrap(); + let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); + + let engine = Engine::with_archive_registry(registry); + + assert_eq!(engine.archive_registry().base_dir(), temp_dir.path()); + } + + #[cfg(feature = "archive")] + #[test] + fn test_default_archive_registry() { + let engine = Engine::new(); + let expected_path = std::env::temp_dir().join("nvisy-engine"); + assert_eq!(engine.archive_registry().base_dir(), expected_path); + } } diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs index 768f829..f9308d8 100644 --- a/crates/nvisy-engine/src/lib.rs +++ b/crates/nvisy-engine/src/lib.rs @@ -7,13 +7,22 @@ pub mod registry; pub mod session; pub use engine::{Engine, EngineConfig}; +#[cfg(feature = "archive")] +#[cfg_attr(docsrs, doc(cfg(feature = "archive")))] +pub use nvisy_rt_archive::{ + self as arc, ArchiveFile, ArchiveHandler, ArchiveRegistry, ArchiveType, +}; pub use nvisy_rt_document::{ - self as doc, BoundingBox, Capabilities, Document, DocumentFormat, Point, Region, RegionId, - RegionKind, + self as doc, BoundingBox, Capabilities, ContentKind, Document, DocumentFormat, Point, Region, + RegionId, RegionKind, }; pub use registry::{FormatRef, FormatRegistry, LoadedDocument}; pub use session::{AccessEntry, AccessHistory, ReadSession, SessionConfig, SessionId}; +/// Tracing target for archive operations. +#[cfg(feature = "archive")] +pub const TRACING_TARGET_ARCHIVE: &str = "nvisy_rt_engine::archive"; + /// Tracing target for engine operations. pub const TRACING_TARGET_ENGINE: &str = "nvisy_rt_engine::engine"; diff --git a/crates/nvisy-engine/src/session/mod.rs b/crates/nvisy-engine/src/session/mod.rs index 072831c..9bdd5c5 100644 --- a/crates/nvisy-engine/src/session/mod.rs +++ b/crates/nvisy-engine/src/session/mod.rs @@ -14,8 +14,11 @@ use bytes::Bytes; pub use history::{AccessEntry, AccessHistory}; use jiff::Timestamp; use nvisy_rt_document::{Capabilities, Document, PageOptions, Region, RegionId, Result}; +use tracing::{debug, info, trace}; use uuid::Uuid; +use crate::TRACING_TARGET_SESSION; + /// Unique identifier for a read session. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct SessionId(Uuid); @@ -122,8 +125,19 @@ impl ReadSession { vec![] }; + let id = SessionId::new(); + + info!( + target: TRACING_TARGET_SESSION, + session_id = %id, + total_pages = ?total_pages, + regions = region_cache.len(), + track_history = config.track_history, + "Created new read session" + ); + Self { - id: SessionId::new(), + id, document, capabilities, history, @@ -168,7 +182,14 @@ impl ReadSession { /// Records an access event. pub fn record_access(&mut self, description: impl Into) { if self.config.track_history { - self.history.record(description); + let desc = description.into(); + debug!( + target: TRACING_TARGET_SESSION, + session_id = %self.id, + description = %desc, + "Recording access" + ); + self.history.record(desc); } } @@ -181,16 +202,37 @@ impl ReadSession { /// Returns regions for a specific page. #[must_use] pub fn regions_for_page(&self, page: NonZeroU32) -> Vec<&Region> { - self.region_cache + let regions: Vec<_> = self + .region_cache .values() .filter(|r| r.page == Some(page)) - .collect() + .collect(); + + trace!( + target: TRACING_TARGET_SESSION, + session_id = %self.id, + page = page.get(), + count = regions.len(), + "Retrieved regions for page" + ); + + regions } /// Finds a region by ID. #[must_use] pub fn find_region(&self, id: RegionId) -> Option<&Region> { - self.region_cache.get(&id) + let region = self.region_cache.get(&id); + + trace!( + target: TRACING_TARGET_SESSION, + session_id = %self.id, + region_id = %id, + found = region.is_some(), + "Looking up region" + ); + + region } /// Returns the total number of pages. @@ -223,11 +265,22 @@ impl ReadSession { /// Serializes the document to bytes. pub async fn to_bytes(&self) -> Result { + debug!( + target: TRACING_TARGET_SESSION, + session_id = %self.id, + "Serializing document to bytes" + ); self.document.to_bytes().await } /// Consumes the session and returns the underlying document. pub fn into_document(self) -> D { + info!( + target: TRACING_TARGET_SESSION, + session_id = %self.id, + history_entries = self.history.len(), + "Closing session" + ); self.document } } From 1962b90589342d4e9becb14402ef1ee4f95a19cc Mon Sep 17 00:00:00 2001 From: Oleh Martsokha Date: Sun, 18 Jan 2026 00:48:08 +0100 Subject: [PATCH 4/4] feat(archive): add archive creation support with registry tracking - Add ArchiveHandler::from_directory for creating archives from existing dirs - Add ArchiveRegistry::create_archive_dir for managed temp directories - Add ArchiveRegistry::list_creations and list_all methods - Update cleanup_stale to handle both extraction and creation directories - Add Engine::create_archive_handler for managed archive creation - Add Engine::create_archive for packing existing directories - Auto-append extension in pack() if path lacks valid archive extension --- README.md | 28 ++-- .../nvisy-archive/src/file/archive_handler.rs | 88 ++++++++++- crates/nvisy-archive/src/registry/mod.rs | 145 ++++++++++++++++-- crates/nvisy-engine/src/engine/mod.rs | 101 +++++++++++- 4 files changed, 317 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index c166704..a208997 100644 --- a/README.md +++ b/README.md @@ -32,27 +32,17 @@ let doc = engine.load_by_extension("txt", data).await?; let text = doc.extract_text().await?; ``` -## Development +## Changelog -```bash -# Build -cargo build --workspace +See [CHANGELOG.md](CHANGELOG.md) for release notes and version history. -# Test -cargo test --workspace --all-features - -# Lint -cargo clippy --workspace --all-targets --all-features -- -D warnings - -# Format -cargo +nightly fmt --all -``` - -## Requirements +## License -- Rust 1.92+ -- Cargo with workspace support +MIT License - see [LICENSE.txt](LICENSE.txt) -## License +## Support -MIT +- **Documentation**: [docs.nvisy.com](https://docs.nvisy.com) +- **Issues**: [GitHub Issues](https://github.com/nvisycom/server/issues) +- **Email**: [support@nvisy.com](mailto:support@nvisy.com) +- **API Status**: [nvisy.openstatus.dev](https://nvisy.openstatus.dev) diff --git a/crates/nvisy-archive/src/file/archive_handler.rs b/crates/nvisy-archive/src/file/archive_handler.rs index 8a30990..fb870d6 100644 --- a/crates/nvisy-archive/src/file/archive_handler.rs +++ b/crates/nvisy-archive/src/file/archive_handler.rs @@ -86,6 +86,49 @@ impl ArchiveHandler { } } + /// Create an archive handler from an existing directory + /// + /// This is useful for creating archives from a directory of files. + /// The handler will scan the directory for files and prepare them for packing. + /// + /// # Arguments + /// + /// * `path` - Path to the directory containing files to pack + /// * `archive_type` - The target archive type for packing + /// + /// # Errors + /// + /// Returns an error if the path doesn't exist, is not a directory, + /// or cannot be scanned. + pub fn from_directory(path: impl AsRef, archive_type: ArchiveType) -> Result { + let path = path.as_ref().to_path_buf(); + + if !path.exists() { + return Err(Error::invalid_archive(format!( + "Directory does not exist: {}", + path.display() + ))); + } + + if !path.is_dir() { + return Err(Error::invalid_archive(format!( + "Path is not a directory: {}", + path.display() + ))); + } + + let files = scan_files(&path)?; + + Ok(Self { + content_source: ContentSource::new(), + archive_type, + original_path: None, + path, + files, + should_cleanup: false, // Don't clean up directories we didn't create + }) + } + /// Get the path to the extraction directory pub fn path(&self) -> &Path { &self.path @@ -240,7 +283,17 @@ impl ArchiveHandler { /// Create a new archive from the current directory contents /// /// This method packages all files in the extraction directory back into - /// an archive file at the specified location. + /// an archive file at the specified location. If the target path doesn't + /// have the correct extension for the archive type, it will be appended. + /// + /// # Examples + /// + /// ```ignore + /// // With ArchiveType::Zip: + /// handler.pack("output.zip") // -> output.zip + /// handler.pack("output") // -> output.zip + /// handler.pack("output.tar") // -> output.tar.zip + /// ``` /// /// # Errors /// @@ -251,6 +304,9 @@ impl ArchiveHandler { pub async fn pack(&self, target_path: impl AsRef) -> Result { let target_path = target_path.as_ref(); + // Append extension if the path doesn't already have a valid one for this archive type + let target_path = ensure_archive_extension(target_path, self.archive_type); + // Ensure parent directory exists if let Some(parent) = target_path.parent() { tokio::fs::create_dir_all(parent).await.map_err(|e| { @@ -258,14 +314,9 @@ impl ArchiveHandler { })?; } - // Determine archive type from target path extension or use original type - let archive_type = target_path - .extension() - .and_then(ArchiveType::from_file_extension) - .unwrap_or(self.archive_type); - + let archive_type = self.archive_type; let source_dir = self.path.clone(); - let target = target_path.to_path_buf(); + let target = target_path.clone(); tokio::task::spawn_blocking(move || match archive_type { #[cfg(feature = "zip")] @@ -343,6 +394,27 @@ fn content_kind_from_extension(extension: &str) -> ContentKind { } } +/// Ensure the path has the correct extension for the archive type. +/// If the path already ends with a valid extension, returns it unchanged. +/// Otherwise, appends the primary extension. +fn ensure_archive_extension(path: &Path, archive_type: ArchiveType) -> PathBuf { + let file_name = match path.file_name().and_then(|n| n.to_str()) { + Some(name) => name, + None => return path.with_extension(archive_type.primary_extension()), + }; + + // Check if filename already ends with a valid extension + for ext in archive_type.file_extensions() { + if file_name.ends_with(&format!(".{}", ext)) { + return path.to_path_buf(); + } + } + + // Append primary extension to existing filename + let new_name = format!("{}.{}", file_name, archive_type.primary_extension()); + path.with_file_name(new_name) +} + /// Scan the directory for files recursively pub(crate) fn scan_files(dir: &Path) -> Result> { let mut files = Vec::new(); diff --git a/crates/nvisy-archive/src/registry/mod.rs b/crates/nvisy-archive/src/registry/mod.rs index 1d6bbe4..79f41f8 100644 --- a/crates/nvisy-archive/src/registry/mod.rs +++ b/crates/nvisy-archive/src/registry/mod.rs @@ -9,12 +9,15 @@ use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{Duration, SystemTime}; -use crate::file::{ArchiveFile, ArchiveHandler}; +use crate::file::{ArchiveFile, ArchiveHandler, ArchiveType}; use crate::{ArchiveErrorExt, Error, Result}; /// Prefix used for registry-managed extraction directories const EXTRACTION_DIR_PREFIX: &str = "nvisy-extract-"; +/// Prefix used for registry-managed archive creation directories +const CREATION_DIR_PREFIX: &str = "nvisy-create-"; + /// Default maximum age for stale directories (24 hours) const DEFAULT_STALE_THRESHOLD_SECS: u64 = 24 * 60 * 60; @@ -127,7 +130,7 @@ impl ArchiveRegistry { // Generate directory name let dir_name = match name { Some(n) => format!("{}{}", EXTRACTION_DIR_PREFIX, n), - None => self.generate_unique_name(), + None => self.generate_unique_name(EXTRACTION_DIR_PREFIX), }; let extract_dir = self.inner.base_dir.join(&dir_name); @@ -154,20 +157,98 @@ impl ArchiveRegistry { archive.unpack_to(&extract_dir).await } - /// Generate a unique directory name - fn generate_unique_name(&self) -> String { + /// Create a managed directory for assembling files before packing into an archive. + /// + /// Returns an [`ArchiveHandler`] pointing to an empty directory where you can + /// add files. When ready, call [`ArchiveHandler::pack`] to create the archive. + /// The directory will be automatically cleaned up when the handler is dropped. + /// + /// # Arguments + /// + /// * `archive_type` - The target archive format for packing + /// + /// # Example + /// + /// ```rust,ignore + /// use nvisy_rt_archive::{ArchiveRegistry, ArchiveType}; + /// + /// # async fn example() -> nvisy_rt_archive::Result<()> { + /// let registry = ArchiveRegistry::new("/tmp/archives")?; + /// + /// // Create a directory for assembling files + /// let mut handler = registry.create_archive_dir(ArchiveType::Zip)?; + /// + /// // Add files to the directory + /// handler.write_file("doc.txt", b"Hello, world!").await?; + /// + /// // Pack into an archive + /// let archive = handler.pack("output.zip").await?; + /// # Ok(()) + /// # } + /// ``` + pub fn create_archive_dir(&self, archive_type: ArchiveType) -> Result { + self.create_archive_dir_with_name(archive_type, None) + } + + /// Create a managed directory with a custom name for assembling files. + /// + /// If `name` is `None`, a unique name will be generated automatically. + /// + /// # Errors + /// + /// Returns an error if: + /// - The directory cannot be created + /// - A directory with the specified name already exists + pub fn create_archive_dir_with_name( + &self, + archive_type: ArchiveType, + name: Option<&str>, + ) -> Result { + let dir_name = match name { + Some(n) => format!("{}{}", CREATION_DIR_PREFIX, n), + None => self.generate_unique_name(CREATION_DIR_PREFIX), + }; + + let create_dir = self.inner.base_dir.join(&dir_name); + + if create_dir.exists() { + return Err(Error::invalid_archive(format!( + "Creation directory already exists: {}", + create_dir.display() + ))); + } + + fs::create_dir_all(&create_dir).map_err(|e| { + Error::invalid_archive(format!( + "Failed to create archive directory '{}': {}", + create_dir.display(), + e + )) + })?; + + Ok(ArchiveHandler::new( + nvisy_rt_core::path::ContentSource::new(), + archive_type, + None, + create_dir, + Vec::new(), // Empty directory, no files yet + )) + } + + /// Generate a unique directory name with the given prefix + fn generate_unique_name(&self, prefix: &str) -> String { let count = self.inner.counter.fetch_add(1, Ordering::SeqCst); let timestamp = SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .unwrap_or_default() .as_millis(); - format!("{}{}-{}", EXTRACTION_DIR_PREFIX, timestamp, count) + format!("{}{}-{}", prefix, timestamp, count) } - /// Clean up stale extraction directories + /// Clean up stale extraction and creation directories /// - /// This function removes extraction directories that are older than + /// This function removes managed directories that are older than /// the specified threshold. Use this to clean up directories left /// behind by crashed processes or I/O failures. /// @@ -200,13 +281,18 @@ impl ArchiveRegistry { for entry in entries.flatten() { let path = entry.path(); - // Only process directories with our prefix + // Only process directories with our prefixes if !path.is_dir() { continue; } let file_name = match path.file_name().and_then(|n| n.to_str()) { - Some(name) if name.starts_with(EXTRACTION_DIR_PREFIX) => name, + Some(name) + if name.starts_with(EXTRACTION_DIR_PREFIX) + || name.starts_with(CREATION_DIR_PREFIX) => + { + name + } _ => continue, }; @@ -251,8 +337,30 @@ impl ArchiveRegistry { /// List all current extraction directories /// - /// Returns the paths of all directories managed by this registry. + /// Returns the paths of all extraction directories managed by this registry. pub fn list_extractions(&self) -> Result> { + self.list_dirs_with_prefix(EXTRACTION_DIR_PREFIX) + } + + /// List all current archive creation directories + /// + /// Returns the paths of all creation directories managed by this registry. + pub fn list_creations(&self) -> Result> { + self.list_dirs_with_prefix(CREATION_DIR_PREFIX) + } + + /// List all managed directories (both extractions and creations) + /// + /// Returns the paths of all directories managed by this registry. + pub fn list_all(&self) -> Result> { + let mut dirs = self.list_extractions()?; + dirs.extend(self.list_creations()?); + dirs.sort(); + Ok(dirs) + } + + /// List directories with a specific prefix + fn list_dirs_with_prefix(&self, prefix: &str) -> Result> { let entries = fs::read_dir(&self.inner.base_dir).map_err(|e| { Error::invalid_archive(format!( "Failed to read registry directory '{}': {}", @@ -261,7 +369,7 @@ impl ArchiveRegistry { )) })?; - let mut extractions = Vec::new(); + let mut dirs = Vec::new(); for entry in entries.flatten() { let path = entry.path(); @@ -270,15 +378,15 @@ impl ArchiveRegistry { && path .file_name() .and_then(|n| n.to_str()) - .is_some_and(|name| name.starts_with(EXTRACTION_DIR_PREFIX)); + .is_some_and(|name| name.starts_with(prefix)); if is_managed { - extractions.push(path); + dirs.push(path); } } - extractions.sort(); - Ok(extractions) + dirs.sort(); + Ok(dirs) } } @@ -309,12 +417,15 @@ mod tests { let temp_dir = tempfile::tempdir().unwrap(); let registry = ArchiveRegistry::new(temp_dir.path()).unwrap(); - let name1 = registry.generate_unique_name(); - let name2 = registry.generate_unique_name(); + let name1 = registry.generate_unique_name(EXTRACTION_DIR_PREFIX); + let name2 = registry.generate_unique_name(EXTRACTION_DIR_PREFIX); assert!(name1.starts_with(EXTRACTION_DIR_PREFIX)); assert!(name2.starts_with(EXTRACTION_DIR_PREFIX)); assert_ne!(name1, name2); + + let create_name = registry.generate_unique_name(CREATION_DIR_PREFIX); + assert!(create_name.starts_with(CREATION_DIR_PREFIX)); } #[test] diff --git a/crates/nvisy-engine/src/engine/mod.rs b/crates/nvisy-engine/src/engine/mod.rs index 1a99baf..db2c153 100644 --- a/crates/nvisy-engine/src/engine/mod.rs +++ b/crates/nvisy-engine/src/engine/mod.rs @@ -15,7 +15,7 @@ use tracing::warn; use tracing::{debug, info}; #[cfg(feature = "archive")] -use nvisy_rt_archive::{ArchiveHandler, ArchiveRegistry}; +use nvisy_rt_archive::{ArchiveFile, ArchiveHandler, ArchiveRegistry, ArchiveType}; #[cfg(feature = "archive")] use nvisy_rt_document::ContentKind; @@ -346,6 +346,105 @@ impl Engine { self.extract_archive(archive_path.as_ref()).await } + /// Creates a managed directory for assembling files before packing into an archive. + /// + /// Returns an [`ArchiveHandler`] pointing to an empty directory managed by the + /// registry. Add files to the directory, then call [`ArchiveHandler::pack`] to + /// create the archive. The directory will be automatically cleaned up when the + /// handler is dropped. + /// + /// # Arguments + /// + /// * `archive_type` - The target archive format for packing + /// + /// # Example + /// + /// ```ignore + /// use nvisy_rt_archive::ArchiveType; + /// + /// // Create a managed directory for assembling files + /// let mut handler = engine.create_archive_handler(ArchiveType::Zip)?; + /// + /// // Add files to the directory + /// handler.write_file("doc.txt", b"Hello, world!").await?; + /// handler.write_file("data.json", b"{\"key\": \"value\"}").await?; + /// + /// // Pack into an archive + /// let archive = handler.pack("/path/to/output.zip").await?; + /// ``` + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + pub fn create_archive_handler(&self, archive_type: ArchiveType) -> Result { + debug!( + target: TRACING_TARGET_ARCHIVE, + archive_type = ?archive_type, + base_dir = %self.inner.archive_registry.base_dir().display(), + "Creating archive handler" + ); + + self.inner.archive_registry.create_archive_dir(archive_type) + } + + /// Creates an archive from an existing directory of files. + /// + /// This method takes a source directory and packs its contents into an archive + /// at the specified target path. The archive format is determined by the + /// `archive_type` parameter. + /// + /// For assembling files from scratch, consider using [`Self::create_archive_handler`] + /// which provides a managed directory with automatic cleanup. + /// + /// # Arguments + /// + /// * `source_dir` - Directory containing files to pack + /// * `target_path` - Path where the archive will be created + /// * `archive_type` - The format of the archive to create + /// + /// # Example + /// + /// ```ignore + /// use nvisy_rt_archive::ArchiveType; + /// + /// // Create a ZIP archive from a directory + /// let archive = engine.create_archive( + /// "/path/to/files", + /// "/path/to/output.zip", + /// ArchiveType::Zip + /// ).await?; + /// ``` + #[cfg(feature = "archive")] + #[cfg_attr(docsrs, doc(cfg(feature = "archive")))] + pub async fn create_archive( + &self, + source_dir: impl AsRef, + target_path: impl AsRef, + archive_type: ArchiveType, + ) -> Result { + let source_dir = source_dir.as_ref(); + let target_path = target_path.as_ref(); + + debug!( + target: TRACING_TARGET_ARCHIVE, + source = %source_dir.display(), + target = %target_path.display(), + archive_type = ?archive_type, + "Creating archive from directory" + ); + + let handler = ArchiveHandler::from_directory(source_dir, archive_type)?; + let archive = handler.pack(target_path).await?; + + info!( + target: TRACING_TARGET_ARCHIVE, + source = %source_dir.display(), + target = %target_path.display(), + file_count = handler.file_count(), + "Created archive" + ); + + Ok(archive) + } + /// Loads documents from an already extracted archive handler. #[cfg(feature = "archive")] #[cfg_attr(docsrs, doc(cfg(feature = "archive")))]