diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..6e02f93 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,45 @@ +version: 2 +updates: + # Enable version updates for cargo + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "weekly" + timezone: "Europe/Berlin" + day: "monday" + time: "04:00" + open-pull-requests-limit: 5 + labels: + - "chore" + commit-message: + prefix: "chore(deps)" + prefix-development: "chore(deps-dev)" + rebase-strategy: "auto" + versioning-strategy: "auto" + # Group patch and minor updates together to reduce PR noise + groups: + rust-dependencies: + patterns: + - "*" + update-types: + - "minor" + - "patch" + + # Version updates for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + timezone: "Europe/Berlin" + day: "monday" + time: "04:00" + open-pull-requests-limit: 5 + labels: + - "chore" + commit-message: + prefix: "chore(actions)" + # Group all GitHub Actions updates together to reduce PR noise + groups: + github-actions: + patterns: + - "*" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..a61fb12 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,101 @@ +name: Build + +on: + push: + branches: [main, release] + paths: + - "crates/**" + - "Cargo.toml" + - "Cargo.lock" + - "rustfmt.toml" + - ".github/workflows/build.yml" + pull_request: + branches: [main, release] + paths: + - "crates/**" + - "Cargo.toml" + - "Cargo.lock" + - "rustfmt.toml" + - ".github/workflows/build.yml" + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + fmt: + name: Format + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust nightly + uses: dtolnay/rust-toolchain@nightly + with: + components: rustfmt + + - name: Check formatting + run: cargo +nightly fmt --all -- --check + + check: + name: Check & Clippy + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - name: Cache dependencies + uses: Swatinem/rust-cache@v2 + + - name: Check + run: cargo check --all-features --workspace + + - name: Clippy + run: cargo clippy --all-targets --all-features --workspace -- -D warnings + + test: + name: Test + runs-on: ubuntu-latest + needs: check + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache dependencies + uses: Swatinem/rust-cache@v2 + + - name: Run tests + run: cargo test --all-features --workspace + + docs: + name: Docs + runs-on: ubuntu-latest + needs: check + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache dependencies + uses: Swatinem/rust-cache@v2 + + - name: Build docs + run: cargo doc --no-deps --all-features --workspace + env: + RUSTDOCFLAGS: "-D warnings" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..1fe24f7 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,61 @@ +name: Release + +on: + push: + tags: + - "v*" + +env: + CARGO_TERM_COLOR: always + +jobs: + create-release: + name: Create Release + runs-on: ubuntu-latest + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + steps: + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + release_name: Release ${{ github.ref }} + draft: false + prerelease: false + + publish-crates: + name: Publish to crates.io + needs: create-release + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/v') + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + with: + toolchain: 1.89 + + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + + - name: Publish nvisy-core + run: cargo publish -p nvisy-core --token ${{ secrets.CARGO_TOKEN }} + continue-on-error: true + + - name: Wait for nvisy-core + run: sleep 30 + + - name: Publish nvisy-archive + run: cargo publish -p nvisy-archive --token ${{ secrets.CARGO_TOKEN }} + continue-on-error: true + + - name: Wait for nvisy-archive + run: sleep 30 + + - name: Publish nvisy-engine + run: cargo publish -p nvisy-engine --token ${{ secrets.CARGO_TOKEN }} + continue-on-error: true diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..835a818 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,46 @@ +name: Security + +on: + push: + branches: [main, release] + paths: + - "crates/**" + - "Cargo.toml" + - "Cargo.lock" + - "deny.toml" + - ".github/workflows/security.yml" + pull_request: + branches: [main, release] + paths: + - "crates/**" + - "Cargo.toml" + - "Cargo.lock" + - "deny.toml" + - ".github/workflows/security.yml" + schedule: + - cron: "0 6 * * 1" # Weekly on Monday at 6 AM + workflow_dispatch: + +env: + CARGO_TERM_COLOR: always + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + deny: + name: Deny + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Install cargo-binstall + uses: cargo-bins/cargo-binstall@main + + - name: Install cargo-deny + run: cargo binstall cargo-deny --no-confirm --no-symlinks + + - name: Run deny + run: cargo deny check all diff --git a/.gitignore b/.gitignore index ad67955..ccd680f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,47 @@ -# Generated by Cargo -# will have compiled files and executables -debug -target +# OS +Thumbs.db +.DS_Store -# These are backup files generated by rustfmt -**/*.rs.bk +# IDE and Editors +.vs/ +.vscode/ +.idea/ +.zed/ -# MSVC Windows builds of rustc generate these, which store debugging information +# Rust +debug/ +target/ +**/*.rs.bk *.pdb -# Generated by cargo mutants -# Contains mutation testing data -**/mutants.out*/ +# Generated files +*.backup +coverage/ +*.lcov + +# Build output +dist/ +build/ +output/ + +# Environment files +.env* +!.env.example + +# Logs +logs/ +*.log +*.log* + +# Backup and temporary files +*.bak +*.backup +*.tmp +tmp/ +temp/ -# RustRover -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +# Other +.ignore*/ +LLM.md +.claude +CLAUDE.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7870311 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,44 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to +[Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added + +### Changed + +### Fixed + +### Removed + +## [0.1.0] - 2025-10-21 + +### Added + +- Initial release of the Nvisy Runtime +- Full Tokio async runtime integration +- Memory-mapped file processing for large datasets +- Parallel processing capabilities with Rayon + +### Features + +- High-performance async I/O with Tokio +- Modular crate architecture for optimal compilation +- Comprehensive error handling with structured diagnostics +- Zero-copy operations for improved performance + +### Architecture + +- Workspace-based multi-crate organization +- Shared dependency management across crates +- Clean separation of concerns (core, engine, archive) +- Rust 2024 edition with modern language features +- Strict type safety with no unsafe code by default + +[Unreleased]: https://github.com/nvisycom/runtime/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/nvisycom/runtime/releases/tag/v0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..c3af59c --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,195 @@ +# Contributing + +Thank you for your interest in contributing to the Nvisy Runtime. + +## Requirements + +- Rust 1.89.0 or higher + +## Development Setup + +```bash +git clone https://github.com/nvisycom/runtime.git +cd runtime +cargo build +``` + +## Development + +### Building + +```bash +# Build all workspace crates +cargo build + +# Build with release optimizations +cargo build --release + +# Build specific crate +cargo build -p nvisy-core + +# Build with all features enabled +cargo build --all-features +``` + +### Testing + +```bash +# Run all tests +cargo test + +# Run tests for specific crate +cargo test -p nvisy-core + +# Run tests with all features +cargo test --all-features + +# Run tests with output +cargo test -- --nocapture + +# Run specific test +cargo test test_name +``` + +### Code Quality + +Before submitting changes: + +```bash +# Format code +cargo fmt + +# Check formatting without modifying files +cargo fmt --check + +# Run clippy for linting +cargo clippy --all-targets --all-features + +# Run clippy with strict warnings +cargo clippy --all-targets --all-features -- -D warnings + +# Check for outdated dependencies +cargo outdated + +# Audit dependencies for security issues +cargo audit +``` + +### Documentation + +```bash +# Build documentation +cargo doc + +# Build and open documentation in browser +cargo doc --open + +# Build documentation for all crates +cargo doc --workspace --no-deps + +# Check for broken documentation links +cargo doc --workspace --no-deps +``` + +### Benchmarking + +```bash +# Run benchmarks (if available) +cargo bench + +# Run benchmarks for specific crate +cargo bench -p nvisy-core +``` + +## Project Structure + +``` +runtime/ +├── crates/ +│ ├── nvisy-archive/ # Archive handling and compression +│ ├── nvisy-core/ # Core types and runtime +│ └── nvisy-engine/ # Processing engine +├── Cargo.toml # Workspace configuration +└── README.md +``` + +## Pull Request Process + +1. Fork the repository +2. Create a feature branch (`git checkout -b feature/amazing-feature`) +3. Make your changes +4. Add tests for new functionality +5. Ensure all tests pass: `cargo test` +6. Format your code: `cargo fmt` +7. Run clippy: `cargo clippy --all-targets --all-features` +8. Commit your changes (`git commit -m 'Add amazing feature'`) +9. Push to the branch (`git push origin feature/amazing-feature`) +10. Open a Pull Request + +### Pull Request Checklist + +- [ ] Tests pass (`cargo test`) +- [ ] Code is formatted (`cargo fmt --check`) +- [ ] Clippy shows no warnings (`cargo clippy`) +- [ ] Documentation is updated if needed +- [ ] CHANGELOG.md is updated for notable changes +- [ ] No breaking changes (or documented with migration guide) +- [ ] Commit messages are clear and descriptive + +## Code Standards + +- Follow Rust's official style guide and conventions +- Use `rustfmt` for consistent formatting +- Address all `clippy` warnings +- Write tests for new functionality +- Include documentation comments (`///`) for public APIs +- Use descriptive variable and function names +- Prefer explicit types in public APIs +- Minimize use of `unsafe` code (document when necessary) +- Follow semantic versioning for changes + +## Workspace Guidelines + +- Keep crates focused and single-purpose +- Use workspace dependencies for consistency +- Document cross-crate dependencies clearly +- Avoid circular dependencies between crates +- Use feature flags for optional functionality + +## Error Handling + +- Use the error types from `nvisy-core` crate +- Provide context with errors using `thiserror` +- Document error conditions in function docs +- Use `Result` for fallible operations +- Avoid panics in library code + +## Async Code + +- Use `tokio` runtime for async operations +- Mark async functions clearly +- Use `async-trait` for trait methods when needed +- Avoid blocking operations in async contexts +- Document async requirements in API docs + +## Testing Guidelines + +- Write unit tests in the same file as the code +- Write integration tests in `tests/` directory +- Use `#[cfg(test)]` for test modules +- Mock external dependencies +- Test both success and error paths +- Use property-based testing where appropriate + +## Performance Considerations + +- Profile before optimizing +- Document performance characteristics in comments +- Prefer zero-copy operations when possible +- Use benchmarks for performance-critical code +- Consider memory usage for large data processing + +## License + +By contributing, you agree your contributions will be licensed under the MIT +License. diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..56eaaa2 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1860 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "async-stream" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "backtrace" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-link 0.2.0", +] + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "bit-set" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0481a0e032742109b1133a095184ee93d88f3dc9e0d28a5d033dc77a073f44f" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22" + +[[package]] +name = "bitflags" +version = "2.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +dependencies = [ + "serde", +] + +[[package]] +name = "bzip2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] + +[[package]] +name = "bzip2" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" +dependencies = [ + "libbz2-rs-sys", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.13+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +dependencies = [ + "cc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.2.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" + +[[package]] +name = "chrono" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" +dependencies = [ + "num-traits", +] + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "convert_case" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "deflate64" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204" + +[[package]] +name = "deranged" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_more" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", + "unicode-xid", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys 0.61.1", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "filetime" +version = "0.2.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc0505cd1b6fa6580283f6bdf70a73fcf4aba1184038c90902b92b3dd0df63ed" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.60.2", +] + +[[package]] +name = "filetime_creation" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c25b5d475550e559de5b0c0084761c65325444e3b6c9e298af9cefe7a9ef3a5f" +dependencies = [ + "cfg-if", + "filetime", + "windows-sys 0.52.0", +] + +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + +[[package]] +name = "flate2" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" +dependencies = [ + "crc32fast", + "libz-rs-sys", + "miniz_oxide", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "generator" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows", +] + +[[package]] +name = "generic-array" +version = "0.14.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi 0.14.7+wasi-0.2.4", +] + +[[package]] +name = "gimli" +version = "0.32.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" + +[[package]] +name = "hashbrown" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hipstr" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07a5072958d04f9147e517881d929d3f4706612712f8f4cfcd247f2b716d5262" +dependencies = [ + "loom", + "serde", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "indexmap" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + +[[package]] +name = "io-uring" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" +dependencies = [ + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libbz2-rs-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" + +[[package]] +name = "libc" +version = "0.2.176" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" + +[[package]] +name = "libredox" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +dependencies = [ + "bitflags", + "libc", + "redox_syscall", +] + +[[package]] +name = "libz-rs-sys" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd" +dependencies = [ + "zlib-rs", +] + +[[package]] +name = "linux-raw-sys" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "lzma-rust" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baab2bbbd7d75a144d671e9ff79270e903957d92fb7386fd39034c709bd2661" +dependencies = [ + "byteorder", +] + +[[package]] +name = "lzma-rust2" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c60a23ffb90d527e23192f1246b14746e2f7f071cb84476dd879071696c18a4a" +dependencies = [ + "crc", + "sha2", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + +[[package]] +name = "mio" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +dependencies = [ + "libc", + "wasi 0.11.1+wasi-snapshot-preview1", + "windows-sys 0.59.0", +] + +[[package]] +name = "nt-time" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2de419e64947cd8830e66beb584acc3fb42ed411d103e3c794dda355d1b374b5" +dependencies = [ + "chrono", + "time", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "nvisy-archive" +version = "0.1.0" +dependencies = [ + "bzip2 0.5.2", + "flate2", + "nvisy-core", + "sevenz-rust", + "tar", + "tempfile", + "tokio", + "tokio-test", + "xz2", + "zip", +] + +[[package]] +name = "nvisy-core" +version = "0.1.0" +dependencies = [ + "bytes", + "hex", + "hipstr", + "jiff", + "serde", + "serde_json", + "sha2", + "strum", + "tempfile", + "thiserror", + "tokio", + "uuid", +] + +[[package]] +name = "nvisy-document" +version = "0.1.0" +dependencies = [ + "async-trait", + "base64", + "bytes", + "derive_more", + "jiff", + "nvisy-core", + "serde", + "serde_json", + "thiserror", + "tokio", + "uuid", +] + +[[package]] +name = "nvisy-docx" +version = "0.1.0" +dependencies = [ + "async-trait", + "bytes", + "nvisy-document", + "thiserror", +] + +[[package]] +name = "nvisy-engine" +version = "0.1.0" +dependencies = [ + "bytes", + "jiff", + "nvisy-archive", + "nvisy-document", + "nvisy-docx", + "nvisy-pdf", + "nvisy-text", + "serde", + "serde_json", + "uuid", +] + +[[package]] +name = "nvisy-pdf" +version = "0.1.0" +dependencies = [ + "async-trait", + "bytes", + "nvisy-document", + "thiserror", +] + +[[package]] +name = "nvisy-text" +version = "0.1.0" +dependencies = [ + "async-trait", + "bytes", + "nvisy-document", + "thiserror", +] + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "pbkdf2" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2" +dependencies = [ + "digest", + "hmac", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppmd-rust" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c834641d8ad1b348c9ee86dec3b9840d805acd5f24daa5f90c788951a52ff59b" + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex-automata" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" + +[[package]] +name = "rustc-demangle" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.61.1", +] + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + +[[package]] +name = "semver" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + +[[package]] +name = "sevenz-rust" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26482cf1ecce4540dc782fc70019eba89ffc4d87b3717eb5ec524b5db6fdefef" +dependencies = [ + "bit-set", + "byteorder", + "crc", + "filetime_creation", + "js-sys", + "lzma-rust", + "nt-time", + "sha2", + "wasm-bindgen", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + +[[package]] +name = "slab" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "strum" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.61.1", +] + +[[package]] +name = "thiserror" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "time" +version = "0.3.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +dependencies = [ + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" + +[[package]] +name = "time-macros" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +dependencies = [ + "num-conv", + "time-core", +] + +[[package]] +name = "tokio" +version = "1.47.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +dependencies = [ + "backtrace", + "bytes", + "io-uring", + "libc", + "mio", + "pin-project-lite", + "slab", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tokio-test" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7" +dependencies = [ + "async-stream", + "bytes", + "futures-core", + "tokio", + "tokio-stream", +] + +[[package]] +name = "tracing" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +dependencies = [ + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "uuid" +version = "1.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +dependencies = [ + "getrandom", + "js-sys", + "serde", + "wasm-bindgen", +] + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasi" +version = "0.14.7+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" +dependencies = [ + "wasip2", +] + +[[package]] +name = "wasip2" +version = "1.0.1+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "windows" +version = "0.61.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core", + "windows-future", + "windows-link 0.1.3", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core", + "windows-link 0.1.3", + "windows-threading", +] + +[[package]] +name = "windows-implement" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-link" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" + +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core", + "windows-link 0.1.3", +] + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.4", +] + +[[package]] +name = "windows-sys" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +dependencies = [ + "windows-link 0.2.0", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d42b7b7f66d2a06854650af09cfdf8713e427a439c97ad65a6375318033ac4b" +dependencies = [ + "windows-link 0.2.0", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + +[[package]] +name = "wit-bindgen" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" + +[[package]] +name = "xattr" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zip" +version = "5.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f852905151ac8d4d06fdca66520a661c09730a74c6d4e2b0f27b436b382e532" +dependencies = [ + "aes", + "arbitrary", + "bzip2 0.6.1", + "constant_time_eq", + "crc32fast", + "deflate64", + "flate2", + "getrandom", + "hmac", + "indexmap", + "lzma-rust2", + "memchr", + "pbkdf2", + "ppmd-rust", + "sha1", + "time", + "zeroize", + "zopfli", + "zstd", +] + +[[package]] +name = "zlib-rs" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2" + +[[package]] +name = "zopfli" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edfc5ee405f504cd4984ecc6f14d02d55cfda60fa4b689434ef4102aae150cd7" +dependencies = [ + "bumpalo", + "crc32fast", + "log", + "simd-adler32", +] + +[[package]] +name = "zstd" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.16+zstd.1.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..dfc23f7 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,101 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[workspace] +resolver = "2" +members = [ + "./crates/nvisy-archive", + "./crates/nvisy-core", + "./crates/nvisy-docx", + "./crates/nvisy-document", + "./crates/nvisy-engine", + "./crates/nvisy-pdf", + "./crates/nvisy-text", +] + +[workspace.package] +version = "0.1.0" +rust-version = "1.89" +edition = "2021" +license = "MIT" +publish = false + +authors = ["Nvisy Team "] +repository = "https://github.com/nvisycom/core" +homepage = "https://github.com/nvisycom/core" +documentation = "https://docs.rs/nvisy" + +[workspace.dependencies] +# Default features are disabled for certain dependencies to allow +# downstream workspaces/crates to selectively enable them as needed. +# +# See for more details: https://github.com/rust-lang/cargo/issues/11329 + +# Internal crates +nvisy-archive = { path = "./crates/nvisy-archive", version = "0.1.0", features = [] } +nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0", features = [] } +nvisy-docx = { path = "./crates/nvisy-docx", version = "0.1.0", features = [] } +nvisy-document = { path = "./crates/nvisy-document", version = "0.1.0", features = [] } +nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0", features = [] } +nvisy-pdf = { path = "./crates/nvisy-pdf", version = "0.1.0", features = [] } +nvisy-text = { path = "./crates/nvisy-text", version = "0.1.0", features = [] } + +# Multithreading +rayon = { version = "1.11", default-features = false, features = [] } + +# Async I/O and file handling +tokio = { version = "1.47", default-features = false, features = [] } +tokio-stream = { version = "0.1", default-features = false, features = [] } +tokio-util = { version = "0.7", default-features = false, features = [] } +futures = { version = "0.3", default-features = false, features = [] } +async-trait = { version = "0.1", default-features = false, features = [] } +walkdir = { version = "2.5", default-features = false, features = [] } +memmap2 = { version = "0.9", default-features = false, features = [] } +tempfile = { version = "3.22", default-features = false, features = [] } + +# Tracing and observability +tracing = { version = "0.1", features = [] } +tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } + +# Error handling +thiserror = { version = "2.0", features = [] } +anyhow = { version = "1.0", features = ["backtrace"] } + +# Serialization +serde = { version = "1.0", features = [] } +serde_json = { version = "1.0", features = [] } + +# Data types and utilities +uuid = { version = "1.6", features = [] } +jiff = { version = "0.2", default-features = false, features = [] } +size = { version = "0.5", default-features = false, features = [] } +bytes = { version = "1.10", default-features = false, features = [] } + +rust_decimal = { version = "1.36", default-features = false, features = [] } +semver = { version = "1.0", default-features = false, features = [] } +isolang = { version = "2.4", default-features = false, features = ["english_names"] } + +# Text processing and pattern matching +regex = { version = "1.11", default-features = false, features = [] } +fancy-regex = { version = "0.16", default-features = false, features = [] } +aho-corasick = { version = "1.1", default-features = false, features = [] } +unicode-segmentation = { version = "1.10", default-features = false, features = [] } +hipstr = { version = "0.8", default-features = false, features = [] } + +# Crypto and hashing +sha2 = { version = "0.10", default-features = false, features = [] } +blake3 = { version = "1.8", default-features = false, features = [] } +base64 = { version = "0.22", default-features = false, features = [] } +hex = { version = "0.4", features = [] } +zeroize = { version = "1.7", default-features = false, features = [] } +rand = { version = "0.9", default-features = false, features = [] } + +# Macros +derive_more = { version = "2.0", default-features = false, features = [] } +strum = { version = "0.27", default-features = false, features = [] } +const_format = { version = "0.2", default-features = false, features = [] } + +# Testing utilities +tokio-test = { version = "0.4", default-features = false, features = [] } +proptest = { version = "1.4", default-features = false, features = [] } +criterion = { version = "0.7", default-features = false, features = [] } +rstest = { version = "0.26", default-features = false, features = [] } diff --git a/LICENSE b/LICENSE.txt similarity index 95% rename from LICENSE rename to LICENSE.txt index 93dd471..8015683 100644 --- a/LICENSE +++ b/LICENSE.txt @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2025 Nvisy Redaction Software +Copyright (c) 2025 Nvisy Software Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index e997f26..1187bf8 100644 --- a/README.md +++ b/README.md @@ -1 +1,136 @@ -# engine \ No newline at end of file +# Nvisy Runtime for Rust + +[![build](https://img.shields.io/github/actions/workflow/status/nvisycom/runtime/build.yaml?branch=main&color=000000&style=flat-square)](https://github.com/nvisycom/runtime/actions/workflows/build.yaml) +[![crates.io](https://img.shields.io/crates/v/nvisy-core?color=000000&style=flat-square)](https://crates.io/crates/nvisy-core) +[![docs.rs](https://img.shields.io/docsrs/nvisy-core?color=000000&style=flat-square)](https://docs.rs/nvisy-core) +[![rust version](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) + +High-performance runtime library for data redaction and sensitive information +processing. + +## Features + +- Modern Rust 2024 edition with strict type safety +- High-performance async runtime powered by Tokio +- Flexible pattern matching and data detection +- Built-in archive and compression support +- Comprehensive error handling with structured diagnostics +- Modular architecture with optimized crate separation + +## Installation + +Add the core library to your `Cargo.toml`: + +```toml +[dependencies] +nvisy-core = "0.1" +``` + +Or install additional crates as needed: + +```toml +[dependencies] +nvisy-core = "0.1" +nvisy-engine = "0.1" +nvisy-archive = "0.1" +``` + +## Quick Start + +### Using the Core Library + +```rust +use nvisy_core::prelude::*; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Initialize the runtime + let runtime = Runtime::new().await?; + + // Process sensitive data + let result = runtime.process("example data").await?; + + Ok(()) +} +``` + +## Architecture + +The runtime is organized into specialized crates: + +- **nvisy-core** - Core types, traits, and runtime primitives +- **nvisy-engine** - Processing engine and orchestration +- **nvisy-archive** - Archive handling and compression + +## Requirements + +- Rust 1.89 or higher +- Cargo with workspace support + +## Development + +### Building + +```bash +# Build all crates +cargo build + +# Build with release optimizations +cargo build --release + +# Build specific crate +cargo build -p nvisy-core +``` + +### Testing + +```bash +# Run all tests +cargo test + +# Run tests for specific crate +cargo test -p nvisy-core + +# Run with coverage +cargo test --all-features +``` + +### Linting and Formatting + +```bash +# Check formatting +cargo fmt --check + +# Format code +cargo fmt + +# Run clippy +cargo clippy --all-targets --all-features +``` + +## Performance + +The runtime is designed for high-throughput scenarios: + +- Async I/O with Tokio for concurrent request handling +- Memory-mapped file processing for large datasets +- Parallel pattern matching with Rayon +- Zero-copy operations where possible + +## Changelog + +See [CHANGELOG.md](CHANGELOG.md) for release notes and version history. + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for development guidelines. + +## License + +MIT License - see [LICENSE.txt](LICENSE.txt) for details. + +## Support + +- Documentation: [docs.nvisy.com](https://docs.nvisy.com) +- Issues: [GitHub Issues](https://github.com/nvisycom/runtime/issues) +- Email: [support@nvisy.com](mailto:support@nvisy.com) diff --git a/crates/nvisy-archive/Cargo.toml b/crates/nvisy-archive/Cargo.toml new file mode 100644 index 0000000..0c055b3 --- /dev/null +++ b/crates/nvisy-archive/Cargo.toml @@ -0,0 +1,49 @@ +[package] +name = "nvisy-archive" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +description = "Archive handling library for Nvisy, supports ZIP, TAR, 7z, and other archive formats" +keywords = ["archive", "zip", "tar", "7z", "compression", "extraction"] +categories = ["compression", "filesystem"] + +[features] +default = ["zip", "tar", "gzip", "bzip2", "xz"] +zip = ["dep:zip"] +tar = ["dep:tar"] +sevenz = ["dep:sevenz-rust"] +gzip = ["dep:flate2"] +bzip2 = ["dep:bzip2"] +xz = ["dep:xz2"] + +[dependencies] +# Core dependencies +nvisy-core = { workspace = true } + +# Async and I/O +tokio = { workspace = true, features = ["fs", "io-util", "rt"] } +tempfile = { workspace = true, features = [] } + +# Archive formats +tar = { version = "0.4", optional = true, features = [] } +zip = { version = "5.1", optional = true, features = [] } +sevenz-rust = { version = "0.6", optional = true, features = [] } + +# Compression formats (all optional) +flate2 = { version = "1.0", optional = true, features = [] } +bzip2 = { version = "0.5", optional = true, features = [] } +xz2 = { version = "0.1", optional = true, features = [] } + +[dev-dependencies] +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } +tokio-test = { workspace = true } +tempfile = { workspace = true } diff --git a/crates/nvisy-archive/README.md b/crates/nvisy-archive/README.md new file mode 100644 index 0000000..05cdbf7 --- /dev/null +++ b/crates/nvisy-archive/README.md @@ -0,0 +1,23 @@ +# nvisy-archive + +Archive handling and compression library for the Nvisy runtime. + +[![rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) + +## Features + +- **Multiple Formats** - ZIP, TAR, TAR.GZ, TAR.BZ2, TAR.XZ, GZIP, BZIP2, and XZ +- **Async Operations** - Full async/await support with Tokio +- **Flexible Loading** - Load from file paths, memory, or byte streams +- **Type Safety** - Strong typing with `ArchiveType` enum +- **Memory Efficient** - Stream-based processing for large archives +- **Cross-Platform** - Works on Windows, macOS, and Linux + +## Key Dependencies + +- `tokio` - Async runtime for I/O operations +- `tar` - TAR archive format support +- `zip` - ZIP archive format support +- `flate2` - GZIP compression +- `bzip2` - BZIP2 compression +- `xz2` - XZ compression diff --git a/crates/nvisy-archive/src/file/archive_type.rs b/crates/nvisy-archive/src/file/archive_type.rs new file mode 100644 index 0000000..2ccda40 --- /dev/null +++ b/crates/nvisy-archive/src/file/archive_type.rs @@ -0,0 +1,225 @@ +//! Archive type definitions and utilities +//! +//! This module defines the different archive formats supported by the library +//! and provides utilities for working with archive types. + +use std::ffi::OsStr; +use std::fmt; + +/// Supported archive types +/// +/// This enum represents the different archive formats that can be processed. +/// It provides methods to determine the archive type from file extensions +/// and to get the supported extensions for each type. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ArchiveType { + /// ZIP archive format + Zip, + /// TAR archive format (uncompressed) + Tar, + /// GZIP compressed TAR archive + TarGz, + /// BZIP2 compressed TAR archive + TarBz2, + /// XZ compressed TAR archive + TarXz, + /// GZIP compression (single file) + Gz, + /// BZIP2 compression (single file) + Bz2, + /// XZ compression (single file) + Xz, + /// 7-Zip archive format + SevenZ, +} + +impl ArchiveType { + /// Determine archive type from file extension + /// + /// # Arguments + /// + /// * `extension` - File extension string (without the dot) + /// + /// # Returns + /// + /// `Some(ArchiveType)` if the extension is recognized, `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsStr; + /// use nvisy_archive::ArchiveType; + /// + /// assert_eq!(ArchiveType::from_file_extension(OsStr::new("zip")), Some(ArchiveType::Zip)); + /// assert_eq!(ArchiveType::from_file_extension(OsStr::new("tar.gz")), Some(ArchiveType::TarGz)); + /// assert_eq!(ArchiveType::from_file_extension(OsStr::new("unknown")), None); + /// ``` + pub fn from_file_extension(extension: &OsStr) -> Option { + let extension_str = extension.to_str()?.to_lowercase(); + match extension_str.as_str() { + "zip" => Some(Self::Zip), + "tar" => Some(Self::Tar), + "tar.gz" | "tgz" => Some(Self::TarGz), + "tar.bz2" | "tbz2" | "tb2" => Some(Self::TarBz2), + "tar.xz" | "txz" => Some(Self::TarXz), + "gz" | "gzip" => Some(Self::Gz), + "bz2" | "bzip2" => Some(Self::Bz2), + "xz" => Some(Self::Xz), + "7z" => Some(Self::SevenZ), + _ => None, + } + } + + /// Get the file extensions associated with this archive type + /// + /// Returns a slice of static string references representing all + /// the file extensions that correspond to this archive type. + /// + /// # Examples + /// + /// ``` + /// use nvisy_archive::ArchiveType; + /// + /// assert_eq!(ArchiveType::Zip.file_extensions(), &["zip"]); + /// assert_eq!(ArchiveType::TarGz.file_extensions(), &["tar.gz", "tgz"]); + /// ``` + pub fn file_extensions(&self) -> &'static [&'static str] { + match self { + Self::Zip => &["zip"], + Self::Tar => &["tar"], + Self::TarGz => &["tar.gz", "tgz"], + Self::TarBz2 => &["tar.bz2", "tbz2", "tb2"], + Self::TarXz => &["tar.xz", "txz"], + Self::Gz => &["gz", "gzip"], + Self::Bz2 => &["bz2", "bzip2"], + Self::Xz => &["xz"], + Self::SevenZ => &["7z"], + } + } + + /// Get the primary file extension for this archive type + /// + /// Returns the most common/preferred file extension for this archive type. + /// + /// # Examples + /// + /// ``` + /// use nvisy_archive::ArchiveType; + /// + /// assert_eq!(ArchiveType::Zip.primary_extension(), "zip"); + /// assert_eq!(ArchiveType::TarGz.primary_extension(), "tar.gz"); + /// ``` + pub fn primary_extension(&self) -> &'static str { + self.file_extensions()[0] + } + + /// Check if this archive type is a compressed TAR variant + pub fn is_tar_variant(&self) -> bool { + matches!(self, Self::Tar | Self::TarGz | Self::TarBz2 | Self::TarXz) + } + + /// Check if this archive type supports multiple files + pub fn supports_multiple_files(&self) -> bool { + matches!( + self, + Self::Zip | Self::Tar | Self::TarGz | Self::TarBz2 | Self::TarXz | Self::SevenZ + ) + } +} + +impl fmt::Display for ArchiveType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Zip => write!(f, "ZIP"), + Self::Tar => write!(f, "TAR"), + Self::TarGz => write!(f, "TAR.GZ"), + Self::TarBz2 => write!(f, "TAR.BZ2"), + Self::TarXz => write!(f, "TAR.XZ"), + Self::Gz => write!(f, "GZIP"), + Self::Bz2 => write!(f, "BZIP2"), + Self::Xz => write!(f, "XZ"), + Self::SevenZ => write!(f, "7Z"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_archive_type_from_extension() { + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("zip")), + Some(ArchiveType::Zip) + ); + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("ZIP")), + Some(ArchiveType::Zip) + ); + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("tar")), + Some(ArchiveType::Tar) + ); + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("tar.gz")), + Some(ArchiveType::TarGz) + ); + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("tgz")), + Some(ArchiveType::TarGz) + ); + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("unknown")), + None + ); + } + + #[test] + fn test_archive_type_extensions() { + assert_eq!(ArchiveType::Zip.file_extensions(), &["zip"]); + assert_eq!(ArchiveType::TarGz.file_extensions(), &["tar.gz", "tgz"]); + assert!(ArchiveType::TarBz2.file_extensions().contains(&"tar.bz2")); + } + + #[test] + fn test_archive_type_primary_extension() { + assert_eq!(ArchiveType::Zip.primary_extension(), "zip"); + assert_eq!(ArchiveType::TarGz.primary_extension(), "tar.gz"); + } + + #[test] + fn test_archive_type_variants() { + assert!(ArchiveType::Tar.is_tar_variant()); + assert!(ArchiveType::TarGz.is_tar_variant()); + assert!(!ArchiveType::Zip.is_tar_variant()); + assert!(!ArchiveType::Gz.is_tar_variant()); + } + + #[test] + fn test_archive_type_multiple_files() { + assert!(ArchiveType::Zip.supports_multiple_files()); + assert!(ArchiveType::Tar.supports_multiple_files()); + assert!(ArchiveType::SevenZ.supports_multiple_files()); + assert!(!ArchiveType::Gz.supports_multiple_files()); + assert!(!ArchiveType::Bz2.supports_multiple_files()); + } + + #[test] + fn test_archive_type_display() { + assert_eq!(ArchiveType::Zip.to_string(), "ZIP"); + assert_eq!(ArchiveType::TarGz.to_string(), "TAR.GZ"); + assert_eq!(ArchiveType::SevenZ.to_string(), "7Z"); + } + + #[test] + fn test_archive_type_7z() { + assert_eq!( + ArchiveType::from_file_extension(OsStr::new("7z")), + Some(ArchiveType::SevenZ) + ); + assert_eq!(ArchiveType::SevenZ.file_extensions(), &["7z"]); + assert_eq!(ArchiveType::SevenZ.primary_extension(), "7z"); + assert!(!ArchiveType::SevenZ.is_tar_variant()); + } +} diff --git a/crates/nvisy-archive/src/file/mod.rs b/crates/nvisy-archive/src/file/mod.rs new file mode 100644 index 0000000..a1abe1c --- /dev/null +++ b/crates/nvisy-archive/src/file/mod.rs @@ -0,0 +1,641 @@ +//! Archive file handling for content processing +//! +//! This module provides functionality for working with archive files, +//! including extraction to temporary directories and repacking from various sources. + +pub mod archive_type; + +use std::ffi::OsStr; +use std::io::Cursor; +use std::path::{Path, PathBuf}; + +pub use archive_type::ArchiveType; +use tempfile::TempDir; +use tokio::fs; + +use crate::handler::ArchiveHandler; +#[cfg(feature = "zip")] +use crate::ZipResultExt; +use crate::{ArchiveErrorExt, Error, Result}; + +/// Represents an archive file that can be loaded from various sources +/// +/// This struct encapsulates an archive and provides methods for +/// extracting its contents to a temporary directory for processing. +#[derive(Debug)] +pub struct ArchiveFile { + /// Type of archive + pub archive_type: ArchiveType, + /// Source data for the archive + source: ArchiveSource, +} + +/// Internal representation of archive data sources +#[derive(Debug)] +enum ArchiveSource { + /// Archive loaded from a file path + Path(PathBuf), + /// Archive loaded from memory + Memory(Vec), + /// Archive loaded from an iterator + Iterator(Vec), +} + +impl ArchiveFile { + /// Create a new archive file from a file path + /// + /// The archive type is automatically detected from the file extension. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_archive::ArchiveFile; + /// use std::path::PathBuf; + /// + /// let archive = ArchiveFile::from_path("archive.zip")?; + /// # Ok::<(), nvisy_archive::Error>(()) + /// ``` + pub fn from_path(path: impl AsRef) -> Result { + let path = path.as_ref(); + let extension = path + .extension() + .ok_or_else(|| Error::invalid_archive("No file extension found"))?; + + // Handle compound extensions like .tar.gz + let full_name = path + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or(""); + + let archive_type = if full_name.contains(".tar.") { + // Try to match compound extensions first + if let Some(pos) = full_name.find(".tar.") { + let compound_ext = &full_name[pos + 1..]; // Skip the dot + ArchiveType::from_file_extension(OsStr::new(compound_ext)) + } else { + None + } + } else { + None + } + .or_else(|| ArchiveType::from_file_extension(extension)) + .ok_or_else(|| Error::unsupported_format(extension.to_string_lossy().to_string()))?; + + Ok(Self { + archive_type, + source: ArchiveSource::Path(path.to_path_buf()), + }) + } + + /// Create a new archive file from memory with explicit archive type + /// + /// # Example + /// + /// ``` + /// use nvisy_archive::{ArchiveFile, ArchiveType}; + /// + /// let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature + /// let archive = ArchiveFile::from_memory(ArchiveType::Zip, data); + /// ``` + pub fn from_memory(archive_type: ArchiveType, data: Vec) -> Self { + Self { + archive_type, + source: ArchiveSource::Memory(data), + } + } + + /// Create a new archive file from an iterator of bytes + /// + /// The iterator will be consumed immediately and stored in memory. + /// + /// # Example + /// + /// ``` + /// use nvisy_archive::{ArchiveFile, ArchiveType}; + /// + /// let data = [0x50, 0x4B, 0x03, 0x04]; // ZIP signature + /// let archive = ArchiveFile::from_iterator(ArchiveType::Zip, data.into_iter()); + /// ``` + pub fn from_iterator(archive_type: ArchiveType, data: impl Iterator) -> Self { + let data: Vec = data.collect(); + Self { + archive_type, + source: ArchiveSource::Iterator(data), + } + } + + /// Create an archive with explicit type (useful for ambiguous extensions) + pub fn with_archive_type(mut self, archive_type: ArchiveType) -> Self { + self.archive_type = archive_type; + self + } + + /// Get the archive type + pub fn archive_type(&self) -> ArchiveType { + self.archive_type + } + + /// Check if the archive source exists (only meaningful for file-based sources) + pub async fn exists(&self) -> bool { + match &self.source { + ArchiveSource::Path(path) => fs::try_exists(path).await.unwrap_or(false), + ArchiveSource::Memory(_) | ArchiveSource::Iterator(_) => true, + } + } + + /// Get the file path (if loaded from a file) + pub fn path(&self) -> Option<&Path> { + match &self.source { + ArchiveSource::Path(path) => Some(path), + _ => None, + } + } + + /// Get the size of the archive data + pub async fn size(&self) -> Result { + match &self.source { + ArchiveSource::Path(path) => { + let metadata = fs::metadata(path).await?; + Ok(metadata.len()) + } + ArchiveSource::Memory(data) | ArchiveSource::Iterator(data) => Ok(data.len() as u64), + } + } + + /// Extract the archive to a temporary directory + /// + /// This method extracts all contents of the archive to a temporary + /// directory and returns an `ArchiveHandler` for managing the + /// extracted contents. + /// + /// # Errors + /// + /// Returns an error if: + /// - The archive file cannot be read + /// - The archive format is not supported + /// - Extraction fails + /// - Temporary directory creation fails + /// + /// # Example + /// + /// ```no_run + /// use nvisy_archive::ArchiveFile; + /// + /// # async fn example() -> nvisy_archive::Result<()> { + /// let archive = ArchiveFile::from_path("archive.zip")?; + /// let handler = archive.unpack().await?; + /// + /// // Work with extracted files + /// for file_path in handler.file_paths() { + /// println!("Found file: {:?}", file_path); + /// } + /// # Ok(()) + /// # } + /// ``` + pub async fn unpack(self) -> Result { + // Create temporary directory + let temp_dir = TempDir::new().map_err(|e| { + Error::invalid_archive(format!("Failed to create temporary directory: {}", e)) + })?; + + // Get archive data as bytes + let data = self.get_data().await?; + let cursor = Cursor::new(data); + + // Extract based on archive type + let files = self.extract_archive(cursor, temp_dir.path()).await?; + + Ok(ArchiveHandler::new( + self.archive_type, + self.path().map(|p| p.to_path_buf()), + temp_dir, + files, + )) + } + + /// Get the archive data as bytes + async fn get_data(&self) -> Result> { + match &self.source { + ArchiveSource::Path(path) => fs::read(path).await.map_err(Into::into), + ArchiveSource::Memory(data) | ArchiveSource::Iterator(data) => Ok(data.clone()), + } + } + + /// Extract archive contents to the specified directory + async fn extract_archive( + &self, + data: Cursor>, + target_dir: &Path, + ) -> Result> { + match self.archive_type { + #[cfg(feature = "zip")] + ArchiveType::Zip => self.extract_zip(data, target_dir).await, + #[cfg(not(feature = "zip"))] + ArchiveType::Zip => Err(Error::unsupported_format("ZIP support not enabled")), + + #[cfg(feature = "tar")] + ArchiveType::Tar => self.extract_tar(data, target_dir).await, + #[cfg(not(feature = "tar"))] + ArchiveType::Tar => Err(Error::unsupported_format("TAR support not enabled")), + + #[cfg(all(feature = "tar", feature = "gzip"))] + ArchiveType::TarGz => self.extract_tar_gz(data, target_dir).await, + #[cfg(not(all(feature = "tar", feature = "gzip")))] + ArchiveType::TarGz => Err(Error::unsupported_format("TAR.GZ support not enabled")), + + #[cfg(all(feature = "tar", feature = "bzip2"))] + ArchiveType::TarBz2 => self.extract_tar_bz2(data, target_dir).await, + #[cfg(not(all(feature = "tar", feature = "bzip2")))] + ArchiveType::TarBz2 => Err(Error::unsupported_format("TAR.BZ2 support not enabled")), + + #[cfg(all(feature = "tar", feature = "xz"))] + ArchiveType::TarXz => self.extract_tar_xz(data, target_dir).await, + #[cfg(not(all(feature = "tar", feature = "xz")))] + ArchiveType::TarXz => Err(Error::unsupported_format("TAR.XZ support not enabled")), + + #[cfg(feature = "gzip")] + ArchiveType::Gz => self.extract_gz(data, target_dir).await, + #[cfg(not(feature = "gzip"))] + ArchiveType::Gz => Err(Error::unsupported_format("GZIP support not enabled")), + + #[cfg(feature = "bzip2")] + ArchiveType::Bz2 => self.extract_bz2(data, target_dir).await, + #[cfg(not(feature = "bzip2"))] + ArchiveType::Bz2 => Err(Error::unsupported_format("BZIP2 support not enabled")), + + #[cfg(feature = "xz")] + ArchiveType::Xz => self.extract_xz(data, target_dir).await, + #[cfg(not(feature = "xz"))] + ArchiveType::Xz => Err(Error::unsupported_format("XZ support not enabled")), + + #[cfg(feature = "sevenz")] + ArchiveType::SevenZ => self.extract_7z(data, target_dir).await, + #[cfg(not(feature = "sevenz"))] + ArchiveType::SevenZ => Err(Error::unsupported_format("7z support not enabled")), + } + } + + /// Extract ZIP archive + #[cfg(feature = "zip")] + async fn extract_zip(&self, data: Cursor>, target_dir: &Path) -> Result> { + use tokio::io::AsyncWriteExt; + use zip::ZipArchive; + + // Use spawn_blocking for CPU-bound decompression + let target_dir = target_dir.to_path_buf(); + let (files, entries_data) = tokio::task::spawn_blocking(move || { + let mut archive = ZipArchive::new(data).map_zip_err()?; + let mut entries_data = Vec::new(); + + for i in 0..archive.len() { + let mut file = archive.by_index(i).map_zip_err()?; + let name = file.name().to_string(); + let is_dir = file.is_dir(); + + if !is_dir { + let mut content = Vec::new(); + std::io::Read::read_to_end(&mut file, &mut content)?; + entries_data.push((name, content)); + } else { + entries_data.push((name, Vec::new())); + } + } + + Ok::<_, Error>((Vec::new(), entries_data)) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + let mut files = files; + for (name, content) in entries_data { + let file_path = target_dir.join(&name); + + // Create parent directories if they don't exist + if let Some(parent) = file_path.parent() { + fs::create_dir_all(parent).await?; + } + + if name.ends_with('/') { + fs::create_dir_all(&file_path).await?; + } else { + let mut output_file = fs::File::create(&file_path).await?; + output_file.write_all(&content).await?; + files.push(file_path); + } + } + + Ok(files) + } + + /// Extract TAR archive + #[cfg(feature = "tar")] + async fn extract_tar(&self, data: Cursor>, target_dir: &Path) -> Result> { + use tar::Archive; + use tokio::io::AsyncWriteExt; + + let target_dir = target_dir.to_path_buf(); + + // Use spawn_blocking for CPU-bound decompression + let entries_data = tokio::task::spawn_blocking(move || { + let mut archive = Archive::new(data); + let mut entries_data = Vec::new(); + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?.to_path_buf(); + let is_dir = entry.header().entry_type().is_dir(); + + if !is_dir { + let mut content = Vec::new(); + std::io::Read::read_to_end(&mut entry, &mut content)?; + entries_data.push((path, content, false)); + } else { + entries_data.push((path, Vec::new(), true)); + } + } + + Ok::<_, Error>(entries_data) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + let mut files = Vec::new(); + for (path, content, is_dir) in entries_data { + let file_path = target_dir.join(&path); + + // Create parent directories if they don't exist + if let Some(parent) = file_path.parent() { + fs::create_dir_all(parent).await?; + } + + if is_dir { + fs::create_dir_all(&file_path).await?; + } else { + let mut output_file = fs::File::create(&file_path).await?; + output_file.write_all(&content).await?; + files.push(file_path); + } + } + + Ok(files) + } + + /// Extract GZIP-compressed TAR archive + #[cfg(all(feature = "tar", feature = "gzip"))] + async fn extract_tar_gz( + &self, + data: Cursor>, + target_dir: &Path, + ) -> Result> { + use flate2::read::GzDecoder; + + let decompressed = tokio::task::spawn_blocking(move || { + let decoder = GzDecoder::new(data); + let mut buf = Vec::new(); + std::io::Read::read_to_end(&mut { decoder }, &mut buf)?; + Ok::<_, Error>(buf) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + let cursor = Cursor::new(decompressed); + self.extract_tar(cursor, target_dir).await + } + + /// Extract BZIP2-compressed TAR archive + #[cfg(all(feature = "tar", feature = "bzip2"))] + async fn extract_tar_bz2( + &self, + data: Cursor>, + target_dir: &Path, + ) -> Result> { + use bzip2::read::BzDecoder; + + let decompressed = tokio::task::spawn_blocking(move || { + let decoder = BzDecoder::new(data); + let mut buf = Vec::new(); + std::io::Read::read_to_end(&mut { decoder }, &mut buf)?; + Ok::<_, Error>(buf) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + let cursor = Cursor::new(decompressed); + self.extract_tar(cursor, target_dir).await + } + + /// Extract XZ-compressed TAR archive + #[cfg(all(feature = "tar", feature = "xz"))] + async fn extract_tar_xz( + &self, + data: Cursor>, + target_dir: &Path, + ) -> Result> { + use xz2::read::XzDecoder; + + let decompressed = tokio::task::spawn_blocking(move || { + let mut decoder = XzDecoder::new(data); + let mut buf = Vec::new(); + std::io::Read::read_to_end(&mut decoder, &mut buf)?; + Ok::<_, Error>(buf) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + let cursor = Cursor::new(decompressed); + self.extract_tar(cursor, target_dir).await + } + + /// Extract single GZIP file + #[cfg(feature = "gzip")] + async fn extract_gz(&self, data: Cursor>, target_dir: &Path) -> Result> { + use flate2::read::GzDecoder; + use tokio::io::AsyncWriteExt; + + let path_clone = self.path().map(|p| p.to_path_buf()); + + let content = tokio::task::spawn_blocking(move || { + let mut decoder = GzDecoder::new(data); + let mut buf = Vec::new(); + std::io::Read::read_to_end(&mut decoder, &mut buf)?; + Ok::<_, Error>(buf) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + // For single files, we need to determine the output filename + let output_path = if let Some(path) = path_clone { + let stem = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("extracted"); + target_dir.join(stem) + } else { + target_dir.join("extracted") + }; + + let mut output_file = fs::File::create(&output_path).await?; + output_file.write_all(&content).await?; + + Ok(vec![output_path]) + } + + /// Extract single BZIP2 file + #[cfg(feature = "bzip2")] + async fn extract_bz2(&self, data: Cursor>, target_dir: &Path) -> Result> { + use bzip2::read::BzDecoder; + use tokio::io::AsyncWriteExt; + + let path_clone = self.path().map(|p| p.to_path_buf()); + + let content = tokio::task::spawn_blocking(move || { + let mut decoder = BzDecoder::new(data); + let mut buf = Vec::new(); + std::io::Read::read_to_end(&mut decoder, &mut buf)?; + Ok::<_, Error>(buf) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + let output_path = if let Some(path) = path_clone { + let stem = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("extracted"); + target_dir.join(stem) + } else { + target_dir.join("extracted") + }; + + let mut output_file = fs::File::create(&output_path).await?; + output_file.write_all(&content).await?; + + Ok(vec![output_path]) + } + + /// Extract single XZ file + #[cfg(feature = "xz")] + async fn extract_xz(&self, data: Cursor>, target_dir: &Path) -> Result> { + use tokio::io::AsyncWriteExt; + use xz2::read::XzDecoder; + + let path_clone = self.path().map(|p| p.to_path_buf()); + + let content = tokio::task::spawn_blocking(move || { + let mut decoder = XzDecoder::new(data); + let mut buf = Vec::new(); + std::io::Read::read_to_end(&mut decoder, &mut buf)?; + Ok::<_, Error>(buf) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + let output_path = if let Some(path) = path_clone { + let stem = path + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("extracted"); + target_dir.join(stem) + } else { + target_dir.join("extracted") + }; + + let mut output_file = fs::File::create(&output_path).await?; + output_file.write_all(&content).await?; + + Ok(vec![output_path]) + } + + /// Extract 7z archive + #[cfg(feature = "sevenz")] + async fn extract_7z(&self, data: Cursor>, target_dir: &Path) -> Result> { + let target_dir = target_dir.to_path_buf(); + let data_vec = data.into_inner(); + + // Use spawn_blocking for CPU-bound decompression + let files = tokio::task::spawn_blocking(move || { + // Write data to a temp file since sevenz-rust works better with files + let temp_file = tempfile::NamedTempFile::new().map_err(|e| { + Error::invalid_archive(format!("Failed to create temp file: {}", e)) + })?; + std::fs::write(temp_file.path(), &data_vec)?; + + // Decompress to target directory + sevenz_rust::decompress_file(temp_file.path(), &target_dir).map_err(|e| { + Error::invalid_archive(format!("Failed to extract 7z archive: {}", e)) + })?; + + // Collect extracted files + fn collect_files(dir: &Path) -> std::io::Result> { + let mut files = Vec::new(); + if dir.is_dir() { + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let path = entry.path(); + if path.is_file() { + files.push(path); + } else if path.is_dir() { + files.extend(collect_files(&path)?); + } + } + } + Ok(files) + } + + let files = collect_files(&target_dir)?; + Ok::<_, Error>(files) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + Ok(files) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_archive_file_from_memory() { + let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature + let archive = ArchiveFile::from_memory(ArchiveType::Zip, data); + assert_eq!(archive.archive_type(), ArchiveType::Zip); + assert!(archive.path().is_none()); + } + + #[test] + fn test_archive_file_from_iterator() { + let data = [0x50, 0x4B, 0x03, 0x04]; // ZIP signature + let archive = ArchiveFile::from_iterator(ArchiveType::Zip, data.into_iter()); + assert_eq!(archive.archive_type(), ArchiveType::Zip); + } + + #[test] + fn test_archive_file_from_path() -> Result<()> { + let archive = ArchiveFile::from_path("test.zip")?; + assert_eq!(archive.archive_type(), ArchiveType::Zip); + assert!(archive.path().is_some()); + Ok(()) + } + + #[test] + fn test_compound_extension() -> Result<()> { + let archive = ArchiveFile::from_path("test.tar.gz")?; + assert_eq!(archive.archive_type(), ArchiveType::TarGz); + Ok(()) + } + + #[test] + fn test_unsupported_extension() { + let result = ArchiveFile::from_path("test.unknown"); + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_memory_size() { + let data = vec![1, 2, 3, 4, 5]; + let archive = ArchiveFile::from_memory(ArchiveType::Zip, data); + assert_eq!(archive.size().await.unwrap(), 5); + } +} diff --git a/crates/nvisy-archive/src/handler/mod.rs b/crates/nvisy-archive/src/handler/mod.rs new file mode 100644 index 0000000..40a8398 --- /dev/null +++ b/crates/nvisy-archive/src/handler/mod.rs @@ -0,0 +1,349 @@ +//! Archive file handler for managing extracted archive contents +//! +//! This module provides the [`ArchiveHandler`] struct for managing +//! temporary directories containing extracted archive contents and +//! repacking them back into archives. + +pub mod tar_handler; +pub mod zip_handler; + +use std::fs; +use std::path::{Path, PathBuf}; + +// Re-exports for convenience +pub use tar_handler::{TarArchiveBuilder, TarArchiveHandler, TarDirectoryBuilder, TarEntryInfo}; +use tempfile::TempDir; +pub use zip_handler::{ZipArchiveBuilder, ZipArchiveHandler, ZipDirectoryBuilder, ZipEntryInfo}; + +use crate::{ArchiveErrorExt, ArchiveType, Error, Result}; + +/// Handler for unpacked archive contents +/// +/// This struct manages the temporary directory containing extracted +/// archive contents and provides methods for iterating over files +/// and repacking the archive. +#[derive(Debug)] +pub struct ArchiveHandler { + /// Type of the original archive + pub archive_type: ArchiveType, + /// Original archive file path (if loaded from file) + pub original_path: Option, + /// Temporary directory containing extracted files + temp_dir: TempDir, + /// Files found in the archive + files: Vec, +} + +impl ArchiveHandler { + /// Create a new archive file handler + /// + /// This is typically called internally by `ArchiveFile::unpack()`. + pub fn new( + archive_type: ArchiveType, + original_path: Option, + temp_dir: TempDir, + files: Vec, + ) -> Self { + Self { + archive_type, + original_path, + temp_dir, + files, + } + } + + /// Get the path to the temporary directory containing extracted files + pub fn temp_path(&self) -> &Path { + self.temp_dir.path() + } + + /// Get the number of files in the archive + pub fn file_count(&self) -> usize { + self.files.len() + } + + /// Check if the archive is empty + pub fn is_empty(&self) -> bool { + self.files.is_empty() + } + + /// Get a list of all file paths in the archive + pub fn file_paths(&self) -> &[PathBuf] { + &self.files + } + + /// Find files matching a specific predicate + pub fn find_files(&self, predicate: impl Fn(&PathBuf) -> bool) -> Vec<&PathBuf> { + self.files.iter().filter(|path| predicate(path)).collect() + } + + /// Find files with specific extension + pub fn find_files_by_extension(&self, extension: &str) -> Vec<&PathBuf> { + self.find_files(|path| { + path.extension() + .and_then(|ext| ext.to_str()) + .map(|ext| ext.eq_ignore_ascii_case(extension)) + .unwrap_or(false) + }) + } + + /// Get all files recursively in the temporary directory + pub fn refresh_file_list(&mut self) -> Result<()> { + self.files = Self::scan_files(self.temp_path())?; + Ok(()) + } + + /// Create a new archive from the current temporary directory contents + /// + /// This method packages all files in the temporary directory back into + /// an archive file at the specified location. + /// + /// # Errors + /// + /// Returns an error if: + /// - The target directory cannot be created + /// - Archive creation fails + /// - File I/O operations fail + /// + /// # Example + /// + /// ```no_run + /// use nvisy_archive::{ArchiveFile, ArchiveType}; + /// + /// # async fn example() -> nvisy_archive::Result<()> { + /// let archive = ArchiveFile::from_path("original.zip")?; + /// let handler = archive.unpack().await?; + /// + /// // Modify files in handler.temp_path()... + /// + /// let new_archive = handler.pack("modified.zip").await?; + /// # Ok(()) + /// # } + /// ``` + pub async fn pack(self, target_path: impl AsRef) -> Result { + let target_path = target_path.as_ref(); + + // Ensure parent directory exists + if let Some(parent) = target_path.parent() { + tokio::fs::create_dir_all(parent).await.map_err(|e| { + Error::invalid_archive(format!("Failed to create parent directory: {}", e)) + })?; + } + + // Determine archive type from target path extension or use original type + let archive_type = target_path + .extension() + .and_then(ArchiveType::from_file_extension) + .unwrap_or(self.archive_type); + + match archive_type { + ArchiveType::Zip => { + #[cfg(feature = "zip")] + { + zip_handler::ZipDirectoryBuilder::create(self.temp_path(), target_path).await?; + } + #[cfg(not(feature = "zip"))] + { + return Err(Error::unsupported_format("ZIP support not enabled")); + } + } + ArchiveType::Tar | ArchiveType::TarGz | ArchiveType::TarBz2 | ArchiveType::TarXz => { + #[cfg(feature = "tar")] + { + tar_handler::TarDirectoryBuilder::create( + self.temp_path(), + target_path, + archive_type, + ) + .await?; + } + #[cfg(not(feature = "tar"))] + { + return Err(Error::unsupported_format("TAR support not enabled")); + } + } + _ => { + return Err(Error::unsupported_format(format!( + "Packing format not supported: {}", + archive_type + ))); + } + } + + crate::ArchiveFile::from_path(target_path) + } + + /// Scan the directory for files recursively + pub fn scan_files(dir: &Path) -> Result> { + let mut files = Vec::new(); + let entries = fs::read_dir(dir)?; + + for entry in entries { + let entry = entry?; + let path = entry.path(); + + if path.is_file() { + files.push(path); + } else if path.is_dir() { + // Recursively scan subdirectories + let mut sub_files = Self::scan_files(&path)?; + files.append(&mut sub_files); + } + } + + files.sort(); + Ok(files) + } + + /// Get relative paths of all files (relative to temp directory) + pub fn relative_file_paths(&self) -> Result> { + let temp_path = self.temp_path(); + self.files + .iter() + .map(|path| { + path.strip_prefix(temp_path) + .map(|p| p.to_path_buf()) + .map_err(|e| Error::invalid_archive(format!("Invalid file path: {}", e))) + }) + .collect() + } + + /// Check if a specific file exists in the archive + pub fn contains_file(&self, relative_path: impl AsRef) -> bool { + let target_path = self.temp_path().join(relative_path); + self.files.contains(&target_path) + } + + /// Get the content of a specific file as bytes + pub async fn read_file(&self, relative_path: impl AsRef) -> Result> { + let target_path = self.temp_path().join(relative_path); + if !self.files.contains(&target_path) { + return Err(Error::entry_not_found( + target_path.to_string_lossy().to_string(), + )); + } + tokio::fs::read(&target_path).await.map_err(Into::into) + } + + /// Write content to a file in the archive + pub async fn write_file( + &mut self, + relative_path: impl AsRef, + content: &[u8], + ) -> Result<()> { + let target_path = self.temp_path().join(relative_path.as_ref()); + + // Create parent directories if they don't exist + if let Some(parent) = target_path.parent() { + tokio::fs::create_dir_all(parent).await?; + } + + tokio::fs::write(&target_path, content).await?; + + // Add to files list if not already present + if !self.files.contains(&target_path) { + self.files.push(target_path); + self.files.sort(); + } + + Ok(()) + } +} + +/// Iterator implementation for ArchiveHandler +/// +/// Iterates over all file paths in the extracted archive. +impl<'a> IntoIterator for &'a ArchiveHandler { + type IntoIter = std::slice::Iter<'a, PathBuf>; + type Item = &'a PathBuf; + + fn into_iter(self) -> Self::IntoIter { + self.files.iter() + } +} + +impl IntoIterator for ArchiveHandler { + type IntoIter = std::vec::IntoIter; + type Item = PathBuf; + + fn into_iter(self) -> Self::IntoIter { + self.files.into_iter() + } +} + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::*; + + #[test] + fn test_archive_handler_creation() { + let temp_dir = TempDir::new().unwrap(); + let files = vec![PathBuf::from("test.txt")]; + + let handler = ArchiveHandler::new( + ArchiveType::Zip, + Some(PathBuf::from("test.zip")), + temp_dir, + files.clone(), + ); + + assert_eq!(handler.archive_type, ArchiveType::Zip); + assert_eq!(handler.file_count(), 1); + assert!(!handler.is_empty()); + } + + #[test] + fn test_empty_archive_handler() { + let temp_dir = TempDir::new().unwrap(); + let files = vec![]; + + let handler = ArchiveHandler::new(ArchiveType::Zip, None, temp_dir, files); + + assert_eq!(handler.file_count(), 0); + assert!(handler.is_empty()); + } + + #[test] + fn test_find_files_by_extension() { + let temp_dir = TempDir::new().unwrap(); + let files = vec![ + PathBuf::from("test.txt"), + PathBuf::from("data.json"), + PathBuf::from("image.png"), + ]; + + let handler = ArchiveHandler::new(ArchiveType::Zip, None, temp_dir, files); + + let txt_files = handler.find_files_by_extension("txt"); + assert_eq!(txt_files.len(), 1); + + let json_files = handler.find_files_by_extension("json"); + assert_eq!(json_files.len(), 1); + } + + #[test] + fn test_iterator() { + let temp_dir = TempDir::new().unwrap(); + let files = vec![PathBuf::from("file1.txt"), PathBuf::from("file2.txt")]; + + let handler = ArchiveHandler::new(ArchiveType::Zip, None, temp_dir, files.clone()); + + let collected: Vec<&PathBuf> = (&handler).into_iter().collect(); + assert_eq!(collected.len(), 2); + } + + #[tokio::test] + async fn test_write_and_read_file() { + let temp_dir = TempDir::new().unwrap(); + let mut handler = ArchiveHandler::new(ArchiveType::Zip, None, temp_dir, vec![]); + + let content = b"Hello, World!"; + handler.write_file("test.txt", content).await.unwrap(); + + assert!(handler.contains_file("test.txt")); + let read_content = handler.read_file("test.txt").await.unwrap(); + assert_eq!(read_content, content); + } +} diff --git a/crates/nvisy-archive/src/handler/tar_handler.rs b/crates/nvisy-archive/src/handler/tar_handler.rs new file mode 100644 index 0000000..26a8e2e --- /dev/null +++ b/crates/nvisy-archive/src/handler/tar_handler.rs @@ -0,0 +1,593 @@ +//! TAR archive handler implementation +//! +//! This module provides specialized handling for TAR archives using the tar crate, +//! including support for compressed TAR formats (tar.gz, tar.bz2, tar.xz). + +use std::io::{Cursor, Read, Write}; +use std::path::{Path, PathBuf}; + +use tar::{Archive, Builder, EntryType}; +use tokio::fs; +use tokio::io::AsyncWriteExt; + +use crate::{ArchiveErrorExt, ArchiveType, Error, Result}; + +/// Buffered writer for XZ compression using liblzma-rs +/// +/// This writer buffers all data and compresses it when dropped or explicitly finished. +struct XzBufferedWriter { + writer: Option, + buffer: Vec, +} + +impl XzBufferedWriter { + fn new(writer: W, _buffer: Vec) -> Self { + Self { + writer: Some(writer), + buffer: Vec::new(), + } + } + + fn finish(&mut self) -> std::io::Result<()> { + if let Some(writer) = self.writer.take() { + use xz2::write::XzEncoder; + let mut encoder = XzEncoder::new(writer, 6); + encoder.write_all(&self.buffer)?; + encoder.finish()?; + } + Ok(()) + } +} + +impl Write for XzBufferedWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.buffer.extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + // For buffered XZ compression, we don't flush until finish() + Ok(()) + } +} + +impl Drop for XzBufferedWriter { + fn drop(&mut self) { + let _ = self.finish(); + } +} + +/// Specialized handler for TAR archive operations +/// +/// This handler provides efficient TAR-specific operations using the tar crate, +/// with support for various compression formats. +pub struct TarArchiveHandler { + /// The underlying TAR archive + archive: Archive, + /// Archive type (for compression handling) + archive_type: ArchiveType, +} + +impl TarArchiveHandler { + /// Create a new TAR handler from a reader + pub fn new(reader: R, archive_type: ArchiveType) -> Result { + if !archive_type.is_tar_variant() { + return Err(Error::unsupported_format(format!( + "Expected TAR variant, got: {}", + archive_type + ))); + } + + Ok(Self { + archive: Archive::new(reader), + archive_type, + }) + } + + /// Get the archive type + pub fn archive_type(&self) -> ArchiveType { + self.archive_type + } + + /// Set whether to preserve permissions when extracting + pub fn set_preserve_permissions(&mut self, preserve: bool) { + self.archive.set_preserve_permissions(preserve); + } + + /// Set whether to preserve modification times when extracting + pub fn set_preserve_mtime(&mut self, preserve: bool) { + self.archive.set_preserve_mtime(preserve); + } + + /// Set whether to unpack extended attributes + pub fn set_unpack_xattrs(&mut self, unpack: bool) { + self.archive.set_unpack_xattrs(unpack); + } + + /// Extract all entries to the specified directory + pub async fn extract_to(&mut self, target_dir: impl AsRef) -> Result> { + let target_dir = target_dir.as_ref(); + fs::create_dir_all(target_dir).await?; + + let mut extracted_files = Vec::new(); + + for entry in self.archive.entries()? { + let mut entry = entry?; + let path = entry.path()?.to_path_buf(); + let target_path = target_dir.join(&path); + + // Create parent directories + if let Some(parent) = target_path.parent() { + fs::create_dir_all(parent).await?; + } + + match entry.header().entry_type() { + EntryType::Regular => { + let mut content = Vec::new(); + entry.read_to_end(&mut content)?; + + let mut file = fs::File::create(&target_path).await?; + file.write_all(&content).await?; + + extracted_files.push(target_path); + } + EntryType::Directory => { + fs::create_dir_all(&target_path).await?; + } + EntryType::Symlink => { + if let Ok(Some(link_target)) = entry.link_name() { + #[cfg(unix)] + { + tokio::fs::symlink(&link_target, &target_path).await?; + } + #[cfg(windows)] + { + // Windows requires different handling for symlinks + if target_path.is_dir() { + tokio::fs::symlink_dir(&link_target, &target_path).await?; + } else { + tokio::fs::symlink_file(&link_target, &target_path).await?; + } + } + } + } + EntryType::Link => { + // Hard links - create a copy for simplicity + if let Ok(Some(link_target)) = entry.link_name() { + let source_path = target_dir.join(link_target); + if source_path.exists() { + fs::copy(&source_path, &target_path).await?; + extracted_files.push(target_path); + } + } + } + _ => { + // Handle other entry types as needed + // For now, we skip unsupported types + } + } + } + + Ok(extracted_files) + } + + /// Get entries as an iterator + pub fn entries(&mut self) -> Result> { + Ok(self.archive.entries()?) + } + + /// List all entries without extracting + pub fn list_entries(&mut self) -> Result> { + let mut entries = Vec::new(); + + for entry in self.archive.entries()? { + let entry = entry?; + let header = entry.header(); + + let info = TarEntryInfo { + path: entry.path()?.to_path_buf(), + size: header.size()?, + entry_type: header.entry_type(), + mode: header.mode()?, + uid: header.uid()?, + gid: header.gid()?, + mtime: header.mtime()?, + }; + + entries.push(info); + } + + Ok(entries) + } +} + +/// Information about a TAR entry +#[derive(Debug, Clone)] +pub struct TarEntryInfo { + /// Path of the entry within the archive + pub path: PathBuf, + /// Size of the entry in bytes + pub size: u64, + /// Type of entry (file, directory, symlink, etc.) + pub entry_type: EntryType, + /// File mode/permissions + pub mode: u32, + /// User ID + pub uid: u64, + /// Group ID + pub gid: u64, + /// Modification time (Unix timestamp) + pub mtime: u64, +} + +/// Builder for creating TAR archives +pub struct TarArchiveBuilder { + builder: Builder, + archive_type: ArchiveType, +} + +impl TarArchiveBuilder { + /// Create a new TAR archive builder + pub fn new(writer: W, archive_type: ArchiveType) -> Result { + if !archive_type.is_tar_variant() { + return Err(Error::unsupported_format(format!( + "Expected TAR variant, got: {}", + archive_type + ))); + } + + Ok(Self { + builder: Builder::new(writer), + archive_type, + }) + } + + /// Get the archive type + pub fn archive_type(&self) -> ArchiveType { + self.archive_type + } + + /// Add a file to the archive from a path + pub fn append_path_with_name, N: AsRef>( + &mut self, + path: P, + name: N, + ) -> Result<()> { + self.builder.append_path_with_name(path, name)?; + Ok(()) + } + + /// Add a file to the archive with the same name as the path + pub fn append_path>(&mut self, path: P) -> Result<()> { + self.builder.append_path(path)?; + Ok(()) + } + + /// Add a directory to the archive + pub fn append_dir, Q: AsRef>( + &mut self, + path: P, + src_path: Q, + ) -> Result<()> { + self.builder.append_dir(path, src_path)?; + Ok(()) + } + + /// Add a directory recursively to the archive + pub fn append_dir_all, Q: AsRef>( + &mut self, + path: P, + src_path: Q, + ) -> Result<()> { + self.builder.append_dir_all(path, src_path)?; + Ok(()) + } + + /// Add data from a reader to the archive + pub fn append_data, R: Read>( + &mut self, + path: P, + size: u64, + data: R, + ) -> Result<()> { + let mut header = tar::Header::new_gnu(); + header.set_size(size); + header.set_mode(0o644); + header.set_cksum(); + + self.builder.append_data(&mut header, path, data)?; + Ok(()) + } + + /// Finish writing the archive + pub fn finish(self) -> Result { + Ok(self.builder.into_inner()?) + } +} + +/// Builder for creating TAR archives from directories +pub struct TarDirectoryBuilder; + +impl TarDirectoryBuilder { + /// Create a TAR archive from a directory + /// + /// This method collects all files in the source directory and creates + /// a TAR archive at the target path with the specified compression. + pub async fn create( + source_dir: &Path, + target_path: &Path, + archive_type: ArchiveType, + ) -> Result<()> { + use std::fs; + + // Collect all files in the directory + fn collect_files(dir: &Path) -> Result> { + let mut files = Vec::new(); + let entries = fs::read_dir(dir)?; + + for entry in entries { + let entry = entry?; + let path = entry.path(); + + if path.is_file() { + files.push(path); + } else if path.is_dir() { + let mut sub_files = collect_files(&path)?; + files.append(&mut sub_files); + } + } + + files.sort(); + Ok(files) + } + + let files = collect_files(source_dir)?; + let source_dir = source_dir.to_path_buf(); + let target_path = target_path.to_path_buf(); + + // Use spawn_blocking for CPU-bound compression + tokio::task::spawn_blocking(move || { + match archive_type { + ArchiveType::Tar => { + let file = std::fs::File::create(&target_path)?; + let mut builder = Builder::new(file); + + for file_path in files { + let relative_path = file_path.strip_prefix(&source_dir).map_err(|e| { + Error::invalid_archive(format!("Invalid file path: {}", e)) + })?; + builder.append_path_with_name(&file_path, relative_path)?; + } + + builder.finish()?; + } + #[cfg(feature = "gzip")] + ArchiveType::TarGz => { + use flate2::write::GzEncoder; + use flate2::Compression; + + let file = std::fs::File::create(&target_path)?; + let encoder = GzEncoder::new(file, Compression::default()); + let mut builder = Builder::new(encoder); + + for file_path in files { + let relative_path = file_path.strip_prefix(&source_dir).map_err(|e| { + Error::invalid_archive(format!("Invalid file path: {}", e)) + })?; + builder.append_path_with_name(&file_path, relative_path)?; + } + + builder.finish()?; + } + #[cfg(feature = "bzip2")] + ArchiveType::TarBz2 => { + use bzip2::write::BzEncoder; + use bzip2::Compression; + + let file = std::fs::File::create(&target_path)?; + let encoder = BzEncoder::new(file, Compression::default()); + let mut builder = Builder::new(encoder); + + for file_path in files { + let relative_path = file_path.strip_prefix(&source_dir).map_err(|e| { + Error::invalid_archive(format!("Invalid file path: {}", e)) + })?; + builder.append_path_with_name(&file_path, relative_path)?; + } + + builder.finish()?; + } + #[cfg(feature = "xz")] + ArchiveType::TarXz => { + use xz2::write::XzEncoder; + + let file = std::fs::File::create(&target_path)?; + let encoder = XzEncoder::new(file, 6); + let mut builder = Builder::new(encoder); + + for file_path in files { + let relative_path = file_path.strip_prefix(&source_dir).map_err(|e| { + Error::invalid_archive(format!("Invalid file path: {}", e)) + })?; + builder.append_path_with_name(&file_path, relative_path)?; + } + + let encoder = builder.into_inner()?; + encoder.finish()?; + } + _ => { + return Err(Error::unsupported_format(format!( + "Unsupported TAR variant: {}", + archive_type + ))); + } + } + + Ok::<_, Error>(()) + }) + .await + .map_err(|e| Error::invalid_archive(format!("Task join error: {}", e)))??; + + Ok(()) + } +} + +/// Convenience functions for creating compressed TAR handlers +impl TarArchiveHandler>> { + /// Create a TAR handler from compressed data + pub fn from_compressed_data( + data: Vec, + archive_type: ArchiveType, + ) -> Result>> { + let cursor = Cursor::new(data); + + match archive_type { + ArchiveType::Tar => { + let reader: Box = Box::new(cursor); + Ok(TarArchiveHandler { + archive: Archive::new(reader), + archive_type, + }) + } + ArchiveType::TarGz => { + use flate2::read::GzDecoder; + let decoder = GzDecoder::new(cursor); + let reader: Box = Box::new(decoder); + Ok(TarArchiveHandler { + archive: Archive::new(reader), + archive_type, + }) + } + ArchiveType::TarBz2 => { + use bzip2::read::BzDecoder; + let decoder = BzDecoder::new(cursor); + let reader: Box = Box::new(decoder); + Ok(TarArchiveHandler { + archive: Archive::new(reader), + archive_type, + }) + } + ArchiveType::TarXz => { + use xz2::read::XzDecoder; + let decoder = XzDecoder::new(cursor); + let reader: Box = Box::new(decoder); + Ok(TarArchiveHandler { + archive: Archive::new(reader), + archive_type, + }) + } + _ => Err(Error::unsupported_format(format!( + "Not a TAR variant: {}", + archive_type + ))), + } + } +} + +/// Convenience functions for creating compressed TAR builders +impl TarArchiveBuilder { + /// Create a compressed TAR builder + pub fn compressed( + writer: W, + archive_type: ArchiveType, + ) -> Result>> { + match archive_type { + ArchiveType::Tar => { + let writer: Box = Box::new(writer); + Ok(TarArchiveBuilder { + builder: Builder::new(writer), + archive_type, + }) + } + ArchiveType::TarGz => { + use flate2::write::GzEncoder; + use flate2::Compression; + let encoder = GzEncoder::new(writer, Compression::default()); + let writer: Box = Box::new(encoder); + Ok(TarArchiveBuilder { + builder: Builder::new(writer), + archive_type, + }) + } + ArchiveType::TarBz2 => { + use bzip2::write::BzEncoder; + use bzip2::Compression; + let encoder = BzEncoder::new(writer, Compression::default()); + let writer: Box = Box::new(encoder); + Ok(TarArchiveBuilder { + builder: Builder::new(writer), + archive_type, + }) + } + ArchiveType::TarXz => { + // For XZ compression, we need to buffer the data and compress it at the end + // This is a limitation of liblzma-rs compared to xz2's streaming interface + let buffer = Vec::new(); + let xz_writer = XzBufferedWriter::new(writer, buffer); + let writer: Box = Box::new(xz_writer); + Ok(TarArchiveBuilder { + builder: Builder::new(writer), + archive_type, + }) + } + _ => Err(Error::unsupported_format(format!( + "Not a TAR variant: {}", + archive_type + ))), + } + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use super::*; + + #[tokio::test] + async fn test_tar_handler_creation() { + let data = Vec::new(); + let cursor = Cursor::new(data); + let handler = TarArchiveHandler::new(cursor, ArchiveType::Tar); + assert!(handler.is_ok()); + } + + #[test] + fn test_tar_handler_invalid_type() { + let data = Vec::new(); + let cursor = Cursor::new(data); + let handler = TarArchiveHandler::new(cursor, ArchiveType::Zip); + assert!(handler.is_err()); + } + + #[test] + fn test_tar_builder_creation() { + let writer = Vec::new(); + let builder = TarArchiveBuilder::new(writer, ArchiveType::Tar); + assert!(builder.is_ok()); + } + + #[test] + fn test_compressed_builder_creation() { + let writer = Vec::new(); + let builder = TarArchiveBuilder::compressed(writer, ArchiveType::TarGz); + assert!(builder.is_ok()); + } + + #[test] + fn test_entry_info() { + let info = TarEntryInfo { + path: PathBuf::from("test.txt"), + size: 100, + entry_type: EntryType::Regular, + mode: 0o644, + uid: 1000, + gid: 1000, + mtime: 1234567890, + }; + + assert_eq!(info.path, PathBuf::from("test.txt")); + assert_eq!(info.size, 100); + assert_eq!(info.mode, 0o644); + } +} diff --git a/crates/nvisy-archive/src/handler/zip_handler.rs b/crates/nvisy-archive/src/handler/zip_handler.rs new file mode 100644 index 0000000..50469b5 --- /dev/null +++ b/crates/nvisy-archive/src/handler/zip_handler.rs @@ -0,0 +1,575 @@ +//! ZIP archive handler implementation +//! +//! This module provides specialized handling for ZIP archives using the zip crate, +//! with support for various compression methods and ZIP-specific features. + +use std::io::{Cursor, Read, Seek, Write}; +use std::path::{Path, PathBuf}; + +use tokio::fs; +use tokio::io::AsyncWriteExt; +use zip::read::ZipFile; +use zip::write::{ExtendedFileOptions, SimpleFileOptions}; +use zip::{CompressionMethod, DateTime, ZipArchive, ZipWriter}; + +use crate::{ArchiveErrorExt, ArchiveType, Error, Result, ZipResultExt}; + +/// Specialized handler for ZIP archive operations +/// +/// This handler provides efficient ZIP-specific operations using the zip crate, +/// with support for various compression methods and ZIP features. +#[derive(Debug)] +pub struct ZipArchiveHandler { + /// The underlying ZIP archive + archive: ZipArchive, + /// Archive type (should always be ZIP) + archive_type: ArchiveType, +} + +impl ZipArchiveHandler { + /// Create a new ZIP handler from a reader + pub fn new(reader: R, archive_type: ArchiveType) -> Result { + if archive_type != ArchiveType::Zip { + return Err(Error::unsupported_format(format!( + "Expected ZIP, got: {}", + archive_type + ))); + } + + let archive = ZipArchive::new(reader).map_zip_err()?; + + Ok(Self { + archive, + archive_type, + }) + } + + /// Get the archive type + pub fn archive_type(&self) -> ArchiveType { + self.archive_type + } + + /// Get the number of files in the archive + pub fn len(&self) -> usize { + self.archive.len() + } + + /// Check if the archive is empty + pub fn is_empty(&self) -> bool { + self.archive.len() == 0 + } + + /// Extract all entries to the specified directory + pub async fn extract_to(&mut self, target_dir: impl AsRef) -> Result> { + let target_dir = target_dir.as_ref(); + fs::create_dir_all(target_dir).await?; + + let mut extracted_files = Vec::new(); + + for i in 0..self.archive.len() { + let mut file = self.archive.by_index(i).map_zip_err()?; + let file_path = target_dir.join(file.name()); + + // Create parent directories + if let Some(parent) = file_path.parent() { + fs::create_dir_all(parent).await?; + } + + if file.is_dir() { + fs::create_dir_all(&file_path).await?; + } else { + let mut content = Vec::with_capacity(file.size() as usize); + std::io::Read::read_to_end(&mut file, &mut content)?; + + let mut output_file = fs::File::create(&file_path).await?; + output_file.write_all(&content).await?; + + // Set file permissions on Unix systems + #[cfg(unix)] + { + if let Some(mode) = file.unix_mode() { + use std::os::unix::fs::PermissionsExt; + let permissions = std::fs::Permissions::from_mode(mode); + std::fs::set_permissions(&file_path, permissions)?; + } + } + + extracted_files.push(file_path); + } + } + + Ok(extracted_files) + } + + /// Extract a specific file by name + pub async fn extract_file(&mut self, name: &str, target_path: impl AsRef) -> Result<()> { + let mut file = self.archive.by_name(name).map_zip_err()?; + let target_path = target_path.as_ref(); + + if let Some(parent) = target_path.parent() { + fs::create_dir_all(parent).await?; + } + + let mut content = Vec::with_capacity(file.size() as usize); + std::io::Read::read_to_end(&mut file, &mut content)?; + + let mut output_file = fs::File::create(target_path).await?; + output_file.write_all(&content).await?; + + Ok(()) + } + + /// Read a file's content directly into memory + pub fn read_file(&mut self, name: &str) -> Result> { + let mut file = self.archive.by_name(name).map_zip_err()?; + let mut content = Vec::with_capacity(file.size() as usize); + std::io::Read::read_to_end(&mut file, &mut content)?; + Ok(content) + } + + /// Get file by index + pub fn by_index(&mut self, index: usize) -> Result> { + self.archive.by_index(index).map_zip_err() + } + + /// Get file by name + pub fn by_name(&mut self, name: &str) -> Result> { + self.archive.by_name(name).map_zip_err() + } + + /// List all entries without extracting + pub fn list_entries(&mut self) -> Result> { + let mut entries = Vec::new(); + + for i in 0..self.archive.len() { + let file = self.archive.by_index(i).map_zip_err()?; + + let info = ZipEntryInfo { + name: file.name().to_string(), + size: file.size(), + compressed_size: file.compressed_size(), + compression_method: file.compression(), + is_dir: file.is_dir(), + is_file: file.is_file(), + unix_mode: file.unix_mode(), + last_modified: file.last_modified().unwrap_or_default(), + crc32: file.crc32(), + extra_data: file.extra_data().unwrap_or(&[]).to_vec(), + comment: file.comment().to_string(), + }; + + entries.push(info); + } + + Ok(entries) + } + + /// Get file names + pub fn file_names(&self) -> Vec { + self.archive.file_names().map(|s| s.to_string()).collect() + } + + /// Check if a file exists in the archive + pub fn contains_file(&mut self, name: &str) -> bool { + self.archive.by_name(name).is_ok() + } + + /// Get the comment of the archive + pub fn comment(&self) -> String { + String::from_utf8_lossy(self.archive.comment()).to_string() + } +} + +/// Information about a ZIP entry +#[derive(Debug, Clone)] +pub struct ZipEntryInfo { + /// Name of the file within the archive + pub name: String, + /// Uncompressed size in bytes + pub size: u64, + /// Compressed size in bytes + pub compressed_size: u64, + /// Compression method used + pub compression_method: CompressionMethod, + /// Whether this entry is a directory + pub is_dir: bool, + /// Whether this entry is a file + pub is_file: bool, + /// Unix file permissions (if available) + pub unix_mode: Option, + /// Last modification time + pub last_modified: DateTime, + /// CRC32 checksum + pub crc32: u32, + /// Extra data field + pub extra_data: Vec, + /// File comment + pub comment: String, +} + +/// Builder for creating ZIP archives +pub struct ZipArchiveBuilder { + writer: ZipWriter, + archive_type: ArchiveType, +} + +impl ZipArchiveBuilder { + /// Create a new ZIP archive builder + pub fn new(writer: W) -> Self { + Self { + writer: ZipWriter::new(writer), + archive_type: ArchiveType::Zip, + } + } + + /// Get the archive type + pub fn archive_type(&self) -> ArchiveType { + self.archive_type + } + + /// Set the comment for the archive + pub fn set_comment(&mut self, comment: String) { + self.writer.set_comment(comment); + } + + /// Start a new file in the archive with default options + pub fn start_file(&mut self, name: &str) -> Result<()> { + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + self.writer.start_file(name, options).map_zip_err()?; + Ok(()) + } + + /// Start a new file with custom options + pub fn start_file_with_options( + &mut self, + name: &str, + options: SimpleFileOptions, + ) -> Result<()> { + self.writer.start_file(name, options).map_zip_err()?; + Ok(()) + } + + /// Start a new file with extended options + pub fn start_file_with_extra_data( + &mut self, + name: &str, + _options: ExtendedFileOptions, + ) -> Result<()> { + // Note: ExtendedFileOptions may not be supported in this version + // Convert to SimpleFileOptions for compatibility + let simple_options = + SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + self.writer.start_file(name, simple_options).map_zip_err()?; + Ok(()) + } + + /// Write data to the current file + pub fn write(&mut self, data: &[u8]) -> Result { + Ok(self.writer.write(data)?) + } + + /// Write all data to the current file + pub fn write_all(&mut self, data: &[u8]) -> Result<()> { + self.writer.write_all(data)?; + Ok(()) + } + + /// Add a file from a path with default compression + pub async fn add_file_from_path( + &mut self, + archive_path: &str, + file_path: impl AsRef, + ) -> Result<()> { + let file_path = file_path.as_ref(); + let content = fs::read(file_path).await?; + + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + + self.writer + .start_file(archive_path, options) + .map_zip_err()?; + self.writer.write_all(&content)?; + + Ok(()) + } + + /// Add a file from memory + pub fn add_file_from_memory(&mut self, name: &str, data: &[u8]) -> Result<()> { + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + + self.writer.start_file(name, options).map_zip_err()?; + self.writer.write_all(data)?; + + Ok(()) + } + + /// Add a directory entry + pub fn add_directory(&mut self, name: &str) -> Result<()> { + let dir_name = if name.ends_with('/') { + name.to_string() + } else { + format!("{}/", name) + }; + + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + + self.writer.start_file(&dir_name, options).map_zip_err()?; + Ok(()) + } + + /// Add an entire directory recursively + pub async fn add_directory_recursively( + &mut self, + archive_prefix: &str, + dir_path: impl AsRef, + ) -> Result<()> { + let dir_path = dir_path.as_ref(); + let mut entries = fs::read_dir(dir_path).await?; + + while let Some(entry) = entries.next_entry().await? { + let entry_path = entry.path(); + let file_name = entry.file_name(); + let file_name_str = file_name.to_string_lossy(); + + let archive_path = if archive_prefix.is_empty() { + file_name_str.to_string() + } else { + format!("{}/{}", archive_prefix, file_name_str) + }; + + if entry_path.is_dir() { + self.add_directory(&archive_path)?; + self.add_directory_recursively(&archive_path, &entry_path) + .await?; + } else { + self.add_file_from_path(&archive_path, &entry_path).await?; + } + } + + Ok(()) + } + + /// Create options for storing files without compression + pub fn stored_options() -> SimpleFileOptions { + SimpleFileOptions::default().compression_method(CompressionMethod::Stored) + } + + /// Create options for maximum compression + pub fn max_compression_options() -> SimpleFileOptions { + SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .compression_level(Some(9)) + } + + /// Create options with custom compression level + pub fn compression_options(level: i32) -> SimpleFileOptions { + SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .compression_level(Some(level.into())) + } + + /// Finish writing the archive and return the underlying writer + pub fn finish(self) -> Result { + self.writer.finish().map_zip_err() + } +} + +/// Builder for creating ZIP archives from directories +pub struct ZipDirectoryBuilder; + +impl ZipDirectoryBuilder { + /// Create a ZIP archive from a directory + /// + /// This method collects all files in the source directory and creates + /// a ZIP archive at the target path. + pub async fn create(source_dir: &Path, target_path: &Path) -> Result<()> { + use std::fs; + use std::io::Write; + + use zip::write::SimpleFileOptions; + use zip::{CompressionMethod, ZipWriter}; + + // Collect all files in the directory + fn collect_files(dir: &Path) -> Result> { + let mut files = Vec::new(); + let entries = fs::read_dir(dir)?; + + for entry in entries { + let entry = entry?; + let path = entry.path(); + + if path.is_file() { + files.push(path); + } else if path.is_dir() { + let mut sub_files = collect_files(&path)?; + files.append(&mut sub_files); + } + } + + files.sort(); + Ok(files) + } + + let files = collect_files(source_dir)?; + let file = std::fs::File::create(target_path)?; + let mut zip = ZipWriter::new(file); + + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Deflated); + + for file_path in files { + let relative_path = file_path + .strip_prefix(source_dir) + .map_err(|e| Error::invalid_archive(format!("Invalid file path: {}", e)))?; + + let file_content = tokio::fs::read(&file_path).await?; + + zip.start_file(relative_path.to_string_lossy().as_ref(), options) + .map_zip_err()?; + zip.write_all(&file_content)?; + } + + zip.finish().map_zip_err()?; + Ok(()) + } +} + +/// Convenience constructor for ZIP handlers from memory +impl ZipArchiveHandler>> { + /// Create a ZIP handler from in-memory data + pub fn from_memory(data: Vec) -> Result { + let cursor = Cursor::new(data); + Self::new(cursor, ArchiveType::Zip) + } +} + +/// Convenience constructor for ZIP builders with memory backing +impl ZipArchiveBuilder>> { + /// Create a ZIP builder that writes to memory + pub fn new_in_memory() -> Self { + let cursor = Cursor::new(Vec::new()); + Self::new(cursor) + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use tempfile::TempDir; + + use super::*; + + #[test] + fn test_zip_handler_from_memory() { + // Create a minimal ZIP file in memory + let cursor = Cursor::new(Vec::new()); + let mut builder = ZipArchiveBuilder::new(cursor); + + builder + .add_file_from_memory("test.txt", b"Hello, World!") + .unwrap(); + let cursor = builder.finish().unwrap(); + + // Test the handler + let data = cursor.into_inner(); + let handler = ZipArchiveHandler::from_memory(data); + assert!(handler.is_ok()); + + let mut handler = handler.unwrap(); + assert_eq!(handler.len(), 1); + assert!(!handler.is_empty()); + assert!(handler.contains_file("test.txt")); + } + + #[test] + fn test_zip_handler_invalid_type() { + let data = Vec::new(); + let cursor = Cursor::new(data); + let handler = ZipArchiveHandler::new(cursor, ArchiveType::Tar); + assert!(handler.is_err()); + } + + #[test] + fn test_zip_builder_creation() { + let cursor = Cursor::new(Vec::new()); + let builder = ZipArchiveBuilder::new(cursor); + assert_eq!(builder.archive_type(), ArchiveType::Zip); + } + + #[test] + fn test_zip_builder_in_memory() { + let mut builder = ZipArchiveBuilder::new_in_memory(); + builder + .add_file_from_memory("test.txt", b"Hello, World!") + .unwrap(); + builder.add_directory("subdir").unwrap(); + + let cursor = builder.finish().unwrap(); + let data = cursor.into_inner(); + assert!(!data.is_empty()); + } + + #[test] + fn test_compression_options() { + // Test that options can be created without panicking + let _stored = ZipArchiveBuilder::>>::stored_options(); + let _max_compression = ZipArchiveBuilder::>>::max_compression_options(); + let _custom = ZipArchiveBuilder::>>::compression_options(5); + + // Note: compression_method field is private, so we can't test it directly + // but we can verify the options are created successfully + } + + #[tokio::test] + async fn test_zip_extract_operations() { + // Create a ZIP file with test data + let mut builder = ZipArchiveBuilder::new_in_memory(); + builder + .add_file_from_memory("file1.txt", b"Content 1") + .unwrap(); + builder + .add_file_from_memory("file2.txt", b"Content 2") + .unwrap(); + builder.add_directory("subdir").unwrap(); + builder + .add_file_from_memory("subdir/file3.txt", b"Content 3") + .unwrap(); + + let cursor = builder.finish().unwrap(); + let data = cursor.into_inner(); + + // Test extraction + let mut handler = ZipArchiveHandler::from_memory(data).unwrap(); + let temp_dir = TempDir::new().unwrap(); + + let extracted_files = handler.extract_to(temp_dir.path()).await.unwrap(); + assert_eq!(extracted_files.len(), 3); // 3 files (directories don't count) + + // Test reading specific file + let content = handler.read_file("file1.txt").unwrap(); + assert_eq!(content, b"Content 1"); + } + + #[test] + fn test_entry_info() { + let info = ZipEntryInfo { + name: "test.txt".to_string(), + size: 100, + compressed_size: 80, + compression_method: CompressionMethod::Deflated, + is_dir: false, + is_file: true, + unix_mode: Some(0o644), + last_modified: DateTime::default(), + crc32: 12345, + extra_data: Vec::new(), + comment: String::new(), + }; + + assert_eq!(info.name, "test.txt"); + assert_eq!(info.size, 100); + assert_eq!(info.compressed_size, 80); + assert!(!info.is_dir); + assert!(info.is_file); + } +} diff --git a/crates/nvisy-archive/src/lib.rs b/crates/nvisy-archive/src/lib.rs new file mode 100644 index 0000000..8fc23af --- /dev/null +++ b/crates/nvisy-archive/src/lib.rs @@ -0,0 +1,166 @@ +//! Archive handling library for nvisy +//! +//! This crate provides functionality for working with various archive formats +//! including ZIP, TAR, 7z, and other compressed archive types. It supports both +//! reading from files and memory, with flexible loading options. +//! +//! # Features +//! +//! - `zip` - ZIP archive support (enabled by default) +//! - `tar` - TAR archive support (enabled by default) +//! - `sevenz` - 7z archive support +//! - `gzip` - GZIP compression support (enabled by default) +//! - `bzip2` - BZIP2 compression support (enabled by default) +//! - `xz` - XZ/LZMA compression support (enabled by default) + +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] + +pub mod file; +pub mod handler; + +// Re-exports for convenience +pub use file::{ArchiveFile, ArchiveType}; +pub use handler::ArchiveHandler; +// Re-export error types from nvisy-core +pub use nvisy_core::error::{Error, ErrorResource, ErrorType, Result}; + +/// Extension trait for creating archive-specific errors +pub trait ArchiveErrorExt { + /// Create an unsupported format error + fn unsupported_format(format: impl Into) -> Error; + + /// Create an invalid archive error + fn invalid_archive(message: impl Into) -> Error; + + /// Create an entry not found error + fn entry_not_found(name: impl Into) -> Error; + + /// Create a permission denied error + fn archive_permission_denied(message: impl Into) -> Error; + + /// Create a corrupted archive error + fn corrupted(message: impl Into) -> Error; + + /// Create a resource limit error + fn archive_resource_limit(message: impl Into) -> Error; +} + +impl ArchiveErrorExt for Error { + fn unsupported_format(format: impl Into) -> Error { + Error::new( + ErrorType::Runtime, + ErrorResource::Archive, + format!("Unsupported archive format: {}", format.into()), + ) + } + + fn invalid_archive(message: impl Into) -> Error { + Error::new( + ErrorType::Runtime, + ErrorResource::Archive, + format!("Invalid archive: {}", message.into()), + ) + } + + fn entry_not_found(name: impl Into) -> Error { + Error::new( + ErrorType::Runtime, + ErrorResource::Archive, + format!("Entry not found: {}", name.into()), + ) + } + + fn archive_permission_denied(message: impl Into) -> Error { + Error::new( + ErrorType::Runtime, + ErrorResource::Archive, + format!("Permission denied: {}", message.into()), + ) + } + + fn corrupted(message: impl Into) -> Error { + Error::new( + ErrorType::Runtime, + ErrorResource::Archive, + format!("Corrupted archive: {}", message.into()), + ) + } + + fn archive_resource_limit(message: impl Into) -> Error { + Error::new( + ErrorType::Runtime, + ErrorResource::Archive, + format!("Resource limit exceeded: {}", message.into()), + ) + } +} + +/// Extension trait for converting ZIP errors to our Error type +#[cfg(feature = "zip")] +pub trait ZipErrorExt { + /// Convert a ZIP error to an archive Error + fn into_archive_error(self) -> Error; +} + +#[cfg(feature = "zip")] +impl ZipErrorExt for zip::result::ZipError { + fn into_archive_error(self) -> Error { + Error::from_source( + ErrorType::Runtime, + ErrorResource::Archive, + "ZIP operation failed", + self, + ) + } +} + +/// Extension to convert zip::Result to our Result type +#[cfg(feature = "zip")] +pub trait ZipResultExt { + /// Convert a ZIP result to an archive Result + fn map_zip_err(self) -> Result; +} + +#[cfg(feature = "zip")] +impl ZipResultExt for std::result::Result { + fn map_zip_err(self) -> Result { + self.map_err(|e| e.into_archive_error()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_creation() { + // Test archive-specific error constructors from ArchiveErrorExt trait + let error = ::unsupported_format("custom"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::invalid_archive("test message"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::entry_not_found("missing.txt"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::archive_permission_denied("access denied"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::corrupted("bad data"); + assert_eq!(error.resource, ErrorResource::Archive); + + let error = ::archive_resource_limit("too big"); + assert_eq!(error.resource, ErrorResource::Archive); + } + + #[test] + fn test_error_display() { + let error = ::unsupported_format("test"); + assert!(error.to_string().contains("Unsupported archive format")); + + let error = ::invalid_archive("bad archive"); + assert!(error.to_string().contains("Invalid archive")); + } +} diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml new file mode 100644 index 0000000..13130f3 --- /dev/null +++ b/crates/nvisy-core/Cargo.toml @@ -0,0 +1,46 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-core" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +# Async runtime and I/O +tokio = { workspace = true, features = ["fs", "io-util", "rt", "macros"] } + +# Data structures and utilities +uuid = { workspace = true, features = ["v4", "v7", "serde"] } +jiff = { workspace = true, features = ["std", "serde"] } +bytes = { workspace = true, features = ["serde"] } + +# Cryptography +sha2 = { workspace = true, features = [] } +hex = { workspace = true, features = [] } + +# (De)serialization +serde = { workspace = true, features = ["derive"] } + +# Utilities +strum = { workspace = true, features = ["derive"] } + +# Error handling (moved from nvisy-error crate) +thiserror = { workspace = true, features = ["std"] } +hipstr = { workspace = true, features = ["std", "serde"] } + +[dev-dependencies] +serde_json = { workspace = true, features = ["std"] } +tempfile = { workspace = true, features = [] } diff --git a/crates/nvisy-core/README.md b/crates/nvisy-core/README.md new file mode 100644 index 0000000..524b07b --- /dev/null +++ b/crates/nvisy-core/README.md @@ -0,0 +1,48 @@ +# nvisy-core + +Core types, traits, runtime primitives, and error handling for the Nvisy data +processing system. + +[![rust](https://img.shields.io/badge/Rust-1.89+-000000?style=flat-square&logo=rust&logoColor=white)](https://www.rust-lang.org/) +[![tokio](https://img.shields.io/badge/Tokio-1.0+-000000?style=flat-square&logo=rust&logoColor=white)](https://tokio.rs/) + +## Overview + +This crate provides the foundational building blocks for the Nvisy ecosystem, +including data processing primitives, structured error handling, and component +health monitoring. + +## Features + +### Data Processing + +- **Content Management** - Unified content structures with SHA256 hashing and + metadata +- **File Operations** - Async file handling with content source tracking +- **Data Classification** - Sensitivity levels and structure type classification +- **Format Detection** - Automatic content kind detection from file extensions +- **I/O Abstractions** - Modern async traits for content reading and writing +- **Zero-Copy Operations** - Efficient data handling using `bytes::Bytes` + +### Error Handling & Monitoring + +- **Structured Errors** - Rich error types with source classification and + context tracking +- **Component Health** - Health status monitoring with operational state + tracking +- **Status Reporting** - Comprehensive status information with severity levels +- **Component Trait** - Standardized interface for component health checks +- **Result Types** - Ergonomic error handling with custom `Result` type + +## Feature Flags + +- `serde` - Enable serialization/deserialization support for all types using + serde. This allows converting structs to/from JSON, YAML, and other formats. +- `jiff` - Enable timestamp support using the jiff datetime library. This adds + timestamp fields to `ComponentStatus` and time-based operations. + +## Dependencies + +- `tokio` - Async runtime for I/O operations +- `bytes` - Zero-copy byte buffer management +- `uuid` - Unique identifiers with v7 support diff --git a/crates/nvisy-core/src/error/error_source.rs b/crates/nvisy-core/src/error/error_source.rs new file mode 100644 index 0000000..8839fa9 --- /dev/null +++ b/crates/nvisy-core/src/error/error_source.rs @@ -0,0 +1,97 @@ +use serde::{Deserialize, Serialize}; +use strum::{AsRefStr, Display}; + +/// System component sources where errors can originate. +/// +/// This enum identifies the subsystem or component that generated an error, +/// enabling better error categorization and handling across the nvisy ecosystem. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, AsRefStr, Display)] +#[derive(Serialize, Deserialize)] +#[strum(serialize_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum ErrorResource { + /// Core framework and foundational components. + Core, + /// Execution engine and processing components. + Engine, + /// Document format handling components. + Document, + /// Archive handling components. + Archive, + /// Pattern matching and rule processing components. + Pattern, + /// Runtime environment and dynamic execution components. + Runtime, + /// Gateway and API boundary components. + Gateway, +} + +impl ErrorResource { + /// Returns `true` if the error source is from internal system components. + #[must_use] + pub const fn is_internal(&self) -> bool { + matches!( + self, + Self::Core | Self::Pattern | Self::Engine | Self::Document | Self::Archive + ) + } + + /// Returns `true` if the error source is from external or runtime components. + #[must_use] + pub const fn is_external(&self) -> bool { + matches!(self, Self::Runtime | Self::Gateway) + } + + /// Returns the priority level of the error source for logging and alerting. + /// + /// Higher values indicate more critical components. + #[must_use] + pub const fn priority_level(&self) -> u8 { + match self { + Self::Core => 6, // Highest priority + Self::Engine => 5, + Self::Document | Self::Archive => 4, + Self::Pattern => 3, + Self::Runtime => 2, + Self::Gateway => 1, // Lowest priority + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_string_representations() { + assert_eq!(ErrorResource::Core.as_ref(), "core"); + assert_eq!(ErrorResource::Engine.as_ref(), "engine"); + assert_eq!(ErrorResource::Document.as_ref(), "document"); + assert_eq!(ErrorResource::Archive.as_ref(), "archive"); + assert_eq!(ErrorResource::Pattern.as_ref(), "pattern"); + assert_eq!(ErrorResource::Runtime.as_ref(), "runtime"); + assert_eq!(ErrorResource::Gateway.as_ref(), "gateway"); + } + + #[test] + fn test_priority_levels() { + assert_eq!(ErrorResource::Core.priority_level(), 6); + assert_eq!(ErrorResource::Engine.priority_level(), 5); + assert_eq!(ErrorResource::Document.priority_level(), 4); + assert_eq!(ErrorResource::Archive.priority_level(), 4); + assert_eq!(ErrorResource::Pattern.priority_level(), 3); + assert_eq!(ErrorResource::Runtime.priority_level(), 2); + assert_eq!(ErrorResource::Gateway.priority_level(), 1); + } + + #[test] + fn test_internal_external_classification() { + assert!(ErrorResource::Core.is_internal()); + assert!(ErrorResource::Pattern.is_internal()); + assert!(ErrorResource::Engine.is_internal()); + assert!(ErrorResource::Document.is_internal()); + assert!(ErrorResource::Archive.is_internal()); + assert!(ErrorResource::Runtime.is_external()); + assert!(ErrorResource::Gateway.is_external()); + } +} diff --git a/crates/nvisy-core/src/error/error_type.rs b/crates/nvisy-core/src/error/error_type.rs new file mode 100644 index 0000000..a1f6073 --- /dev/null +++ b/crates/nvisy-core/src/error/error_type.rs @@ -0,0 +1,36 @@ +use serde::{Deserialize, Serialize}; +use strum::{AsRefStr, Display}; + +/// Classification of error types by their operational domain. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, AsRefStr, Display)] +#[derive(Serialize, Deserialize)] +#[strum(serialize_all = "snake_case")] +#[serde(rename_all = "snake_case")] +pub enum ErrorType { + /// Configuration loading, parsing, or validation failures. + Config, + /// Execution-time operational failures. + Runtime, + /// Internal system logic or state failures. + Other, +} + +impl ErrorType { + /// Check if this error type is typically recoverable + #[must_use] + pub fn is_recoverable(&self) -> bool { + matches!(self, ErrorType::Runtime) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_recoverability() { + assert!(ErrorType::Runtime.is_recoverable()); + assert!(!ErrorType::Other.is_recoverable()); + assert!(!ErrorType::Config.is_recoverable()); + } +} diff --git a/crates/nvisy-core/src/error/mod.rs b/crates/nvisy-core/src/error/mod.rs new file mode 100644 index 0000000..62cb82f --- /dev/null +++ b/crates/nvisy-core/src/error/mod.rs @@ -0,0 +1,281 @@ +//! Structured error handling for the nvisy ecosystem. +//! +//! This module provides structured error handling with source classification and context tracking +//! that can be reused across all nvisy crates. + +use std::fmt; + +use hipstr::HipStr; + +pub use crate::error::error_source::ErrorResource; +pub use crate::error::error_type::ErrorType; + +mod error_source; +mod error_type; + +/// Type alias for boxed standard errors. +pub type BoxError = Box; + +/// Structured error type with source classification and context tracking. +/// +/// This error type is designed to be used across the entire nvisy ecosystem, +/// providing consistent error handling with classification and context. +#[must_use] +#[derive(Debug)] +pub struct Error { + /// Error classification type. + pub etype: ErrorType, + /// Component where the error originated. + pub resource: ErrorResource, + /// Primary error message. + pub message: HipStr<'static>, + + /// Underlying source error, if any. + source: Option, + /// Additional context information. + pub context: Option>, +} + +/// Result type alias using the nvisy Error. +pub type Result = std::result::Result; + +impl Error { + /// Creates a new error with the specified type, source, and message. + pub fn new( + etype: ErrorType, + resource: ErrorResource, + message: impl Into>, + ) -> Self { + Self { + etype, + resource, + source: None, + context: None, + message: message.into(), + } + } + + /// Creates a new error with the specified type, source, message, and source error. + pub fn from_source( + etype: ErrorType, + resource: ErrorResource, + message: impl Into>, + source: impl Into, + ) -> Self { + Self { + etype, + resource, + source: Some(source.into()), + context: None, + message: message.into(), + } + } + + /// Sets the type of the error. + pub const fn with_type(mut self, etype: ErrorType) -> Self { + self.etype = etype; + self + } + + /// Sets the resource of the error. + pub const fn with_resource(mut self, resource: ErrorResource) -> Self { + self.resource = resource; + self + } + + /// Sets the source of the error. + pub fn with_source(mut self, source: impl Into) -> Self { + self.source = Some(source.into()); + self + } + + /// Adds context to the error. + pub fn with_context(mut self, context: impl Into>) -> Self { + self.context = Some(context.into()); + self + } + + /// Returns the underlying source error, if any. + #[must_use] + pub fn source_error(&self) -> Option<&(dyn std::error::Error + Send + Sync)> { + self.source.as_deref() + } + + /// Check if this error is recoverable based on its type. + #[must_use] + pub fn is_recoverable(&self) -> bool { + self.etype.is_recoverable() + } + + /// Returns the display message for the error. + fn display_message(&self) -> String { + let mut parts = Vec::new(); + + parts.push(format!( + "[{}:{}]", + self.resource.as_ref(), + self.etype.as_ref() + )); + parts.push(self.message.to_string()); + + if let Some(ref context) = self.context { + parts.push(format!("(context: {context})")); + } + + parts.join(" ") + } + + // Convenience constructors for common error patterns + + /// Creates a runtime error. + pub fn runtime(resource: ErrorResource, message: impl Into>) -> Self { + Self::new(ErrorType::Runtime, resource, message) + } + + /// Creates a configuration error. + pub fn config(resource: ErrorResource, message: impl Into>) -> Self { + Self::new(ErrorType::Config, resource, message) + } + + /// Creates an unsupported format error. + pub fn unsupported_format(message: impl Into>) -> Self { + Self::new(ErrorType::Runtime, ErrorResource::Core, message) + } + + /// Creates an invalid input error. + pub fn invalid_input(message: impl Into>) -> Self { + Self::new(ErrorType::Runtime, ErrorResource::Core, message) + } + + /// Creates a not found error. + pub fn not_found(message: impl Into>) -> Self { + Self::new(ErrorType::Runtime, ErrorResource::Core, message) + } + + /// Creates a permission denied error. + pub fn permission_denied(message: impl Into>) -> Self { + Self::new(ErrorType::Runtime, ErrorResource::Core, message) + } + + /// Creates a resource limit exceeded error. + pub fn resource_limit(message: impl Into>) -> Self { + Self::new(ErrorType::Runtime, ErrorResource::Core, message) + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.display_message()) + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.source + .as_ref() + .map(|e| e.as_ref() as &(dyn std::error::Error + 'static)) + } +} + +impl From for Error { + fn from(error: std::io::Error) -> Self { + Self::from_source( + ErrorType::Runtime, + ErrorResource::Core, + "I/O operation failed", + error, + ) + } +} + +impl From for Error { + fn from(error: std::string::FromUtf8Error) -> Self { + Self::from_source( + ErrorType::Runtime, + ErrorResource::Core, + "Invalid UTF-8 encoding", + error, + ) + } +} + +impl From for Error { + fn from(error: std::str::Utf8Error) -> Self { + Self::from_source( + ErrorType::Runtime, + ErrorResource::Core, + "Invalid UTF-8 encoding", + error, + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_error_builder() { + let error = Error::new(ErrorType::Config, ErrorResource::Core, "test message"); + assert_eq!(error.etype, ErrorType::Config); + assert_eq!(error.resource, ErrorResource::Core); + assert_eq!(error.message, "test message"); + assert!(error.source.is_none()); + assert!(error.context.is_none()); + } + + #[test] + fn test_error_with_context() { + let error = Error::new(ErrorType::Other, ErrorResource::Engine, "test") + .with_context("additional context"); + assert_eq!(error.context.as_deref(), Some("additional context")); + } + + #[test] + fn test_error_display() { + let error = Error::new(ErrorType::Runtime, ErrorResource::Core, "test error") + .with_context("additional info"); + + let display_str = error.to_string(); + assert!(display_str.contains("core")); + assert!(display_str.contains("runtime")); + assert!(display_str.contains("test error")); + assert!(display_str.contains("(context: additional info)")); + } + + #[test] + fn test_error_from_io() { + let io_error = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found"); + let error = Error::from(io_error); + + assert_eq!(error.etype, ErrorType::Runtime); + assert_eq!(error.resource, ErrorResource::Core); + assert_eq!(error.message, "I/O operation failed"); + assert!(error.source.is_some()); + } + + #[test] + fn test_convenience_constructors() { + let runtime_err = Error::runtime(ErrorResource::Engine, "runtime failure"); + assert_eq!(runtime_err.etype, ErrorType::Runtime); + assert_eq!(runtime_err.resource, ErrorResource::Engine); + + let config_err = Error::config(ErrorResource::Core, "config failure"); + assert_eq!(config_err.etype, ErrorType::Config); + + let unsupported = Error::unsupported_format("unknown format"); + assert_eq!(unsupported.etype, ErrorType::Runtime); + + let not_found = Error::not_found("file missing"); + assert_eq!(not_found.etype, ErrorType::Runtime); + } + + #[test] + fn test_is_recoverable() { + let runtime_err = Error::runtime(ErrorResource::Core, "test"); + assert!(runtime_err.is_recoverable()); + + let config_err = Error::config(ErrorResource::Core, "test"); + assert!(!config_err.is_recoverable()); + } +} diff --git a/crates/nvisy-core/src/fs/content_file.rs b/crates/nvisy-core/src/fs/content_file.rs new file mode 100644 index 0000000..a9d102d --- /dev/null +++ b/crates/nvisy-core/src/fs/content_file.rs @@ -0,0 +1,646 @@ +//! Content file handling for filesystem operations +//! +//! This module provides the [`ContentFile`] struct for working with files +//! on the filesystem while maintaining content source tracking and metadata. + +use std::io; +use std::path::{Path, PathBuf}; + +use tokio::fs::{File, OpenOptions}; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt, SeekFrom}; + +use crate::error::{Error, ErrorResource, ErrorType, Result}; +use crate::fs::{ContentKind, ContentMetadata}; +use crate::io::{AsyncContentRead, AsyncContentWrite, ContentData}; +use crate::path::ContentSource; + +/// A file wrapper that combines filesystem operations with content tracking +/// +/// This struct provides a high-level interface for working with files while +/// maintaining content source identification and metadata throughout the +/// processing pipeline. +#[derive(Debug)] +pub struct ContentFile { + /// Unique identifier for this content source + content_source: ContentSource, + /// The underlying tokio file handle + file: File, + /// Path to the file + path: PathBuf, +} + +impl ContentFile { + /// Create a new `ContentFile` by opening an existing file + /// + /// # Errors + /// + /// Returns an error if the file cannot be opened or doesn't exist. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::fs::ContentFile; + /// use std::path::Path; + /// + /// async fn open_file() -> Result<(), Box> { + /// let content_file = ContentFile::open("example.txt").await?; + /// println!("Opened file with source: {}", content_file.content_source()); + /// Ok(()) + /// } + /// ``` + pub async fn open(path: impl AsRef) -> io::Result { + let path_buf = path.as_ref().to_path_buf(); + let file = File::open(&path_buf).await?; + let content_source = ContentSource::new(); + + Ok(Self { + content_source, + file, + path: path_buf, + }) + } + + /// Create a new `ContentFile` with a specific content source + /// + /// # Errors + /// + /// Returns an error if the file cannot be opened or read. + pub async fn open_with_source( + path: impl AsRef, + content_source: ContentSource, + ) -> io::Result { + let path_buf = path.as_ref().to_path_buf(); + let file = File::open(&path_buf).await?; + + Ok(Self { + content_source, + file, + path: path_buf, + }) + } + + /// Create a new file and return a `ContentFile` + /// + /// # Errors + /// + /// Returns an error if the file cannot be created. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::fs::ContentFile; + /// + /// async fn create_file() -> Result<(), Box> { + /// let content_file = ContentFile::create("new_file.txt").await?; + /// println!("Created file with source: {}", content_file.content_source()); + /// Ok(()) + /// } + /// ``` + pub async fn create(path: impl AsRef) -> io::Result { + let path_buf = path.as_ref().to_path_buf(); + let file = File::create(&path_buf).await?; + let content_source = ContentSource::new(); + + Ok(Self { + content_source, + file, + path: path_buf, + }) + } + + /// Create a new file with a specific content source + /// + /// # Errors + /// + /// Returns an error if the file cannot be created or written to. + pub async fn create_with_source( + path: impl AsRef, + content_source: ContentSource, + ) -> io::Result { + let path_buf = path.as_ref().to_path_buf(); + let file = File::create(&path_buf).await?; + + Ok(Self { + content_source, + file, + path: path_buf, + }) + } + + /// Open a file with custom options + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::fs::ContentFile; + /// use tokio::fs::OpenOptions; + /// + /// async fn open_with_options() -> Result<(), Box> { + /// let mut options = OpenOptions::new(); + /// options.read(true) + /// .write(true) + /// .create(true); + /// + /// let content_file = ContentFile::open_with_options("data.txt", &options).await?; + /// Ok(()) + /// } + /// ``` + /// + /// # Errors + /// + /// Returns an error if the file cannot be opened with the specified options. + pub async fn open_with_options( + path: impl AsRef, + options: &OpenOptions, + ) -> io::Result { + let path_buf = path.as_ref().to_path_buf(); + let file = options.open(&path_buf).await?; + let content_source = ContentSource::new(); + + Ok(Self { + content_source, + file, + path: path_buf, + }) + } + + /// Read all content from the file into a `ContentData` structure + /// + /// # Errors + /// + /// Returns an error if the file cannot be read or if an I/O error occurs. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::fs::ContentFile; + /// + /// async fn read_content() -> Result<(), Box> { + /// let mut content_file = ContentFile::open("example.txt").await?; + /// let content_data = content_file.read_to_content_data().await?; + /// + /// println!("Read {} bytes", content_data.size()); + /// Ok(()) + /// } + /// ``` + pub async fn read_to_content_data(&mut self) -> Result { + let mut buffer = Vec::new(); + self.file.read_to_end(&mut buffer).await?; + + let content_data = ContentData::new(self.content_source, buffer.into()); + + Ok(content_data) + } + + /// Read content with size limit to prevent memory issues + /// + /// # Errors + /// + /// Returns an error if the file cannot be read, if an I/O error occurs, + /// or if the file size exceeds the specified maximum size. + pub async fn read_to_content_data_limited(&mut self, max_size: usize) -> Result { + let mut buffer = Vec::new(); + let mut temp_buffer = vec![0u8; 8192]; + let mut total_read = 0; + + loop { + let bytes_read = self.file.read(&mut temp_buffer).await?; + if bytes_read == 0 { + break; // EOF + } + + if total_read + bytes_read > max_size { + return Err(Error::new( + ErrorType::Runtime, + ErrorResource::Core, + format!("File size exceeds maximum limit of {max_size} bytes"), + )); + } + + buffer.extend_from_slice(&temp_buffer[..bytes_read]); + total_read += bytes_read; + } + + let content_data = ContentData::new(self.content_source, buffer.into()); + + Ok(content_data) + } + + /// Write `ContentData` to the file + /// + /// # Errors + /// + /// Returns an error if the data cannot be written or if an I/O error occurs. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::fs::ContentFile; + /// use nvisy_core::io::ContentData; + /// + /// async fn write_content() -> Result<(), Box> { + /// let mut content_file = ContentFile::create("output.txt").await?; + /// let content_data = ContentData::from("Hello, world!"); + /// + /// let metadata = content_file.write_from_content_data(content_data).await?; + /// println!("Written to: {:?}", metadata.source_path); + /// Ok(()) + /// } + /// ``` + pub async fn write_from_content_data( + &mut self, + content_data: ContentData, + ) -> Result { + self.file.write_all(content_data.as_bytes()).await?; + self.file.flush().await?; + + let metadata = ContentMetadata::with_path(content_data.content_source, self.path.clone()); + Ok(metadata) + } + + /// Append `ContentData` to the file + /// + /// # Errors + /// + /// Returns an error if the data cannot be appended or if an I/O error occurs. + pub async fn append_from_content_data( + &mut self, + content_data: ContentData, + ) -> Result { + self.file.seek(SeekFrom::End(0)).await?; + self.file.write_all(content_data.as_bytes()).await?; + self.file.flush().await?; + + let metadata = ContentMetadata::with_path(content_data.content_source, self.path.clone()); + Ok(metadata) + } + + /// Write `ContentData` in chunks for better memory efficiency + /// + /// # Errors + /// + /// Returns an error if the data cannot be written or if an I/O error occurs. + pub async fn write_from_content_data_chunked( + &mut self, + content_data: ContentData, + chunk_size: usize, + ) -> Result { + let data = content_data.as_bytes(); + + for chunk in data.chunks(chunk_size) { + self.file.write_all(chunk).await?; + } + + self.file.flush().await?; + + let metadata = ContentMetadata::with_path(content_data.content_source, self.path.clone()); + Ok(metadata) + } + + /// Get content metadata for this file + pub fn content_metadata(&self) -> ContentMetadata { + ContentMetadata::with_path(self.content_source, self.path.clone()) + } + + /// Get the file path + pub fn path(&self) -> &Path { + &self.path + } + + /// Get the content source + pub fn content_source(&self) -> ContentSource { + self.content_source + } + + /// Get the source identifier for this content + pub fn source(&self) -> ContentSource { + self.content_source + } + + /// Get a reference to the underlying file + pub fn as_file(&self) -> &File { + &self.file + } + + /// Get a mutable reference to the underlying file + pub fn as_file_mut(&mut self) -> &mut File { + &mut self.file + } + + /// Convert into the underlying file, consuming the `ContentFile` + pub fn into_file(self) -> File { + self.file + } + + /// Get file size in bytes + /// + /// # Errors + /// + /// Returns an error if the file metadata cannot be retrieved. + pub async fn size(&mut self) -> Result { + let metadata = self.file.metadata().await?; + Ok(metadata.len()) + } + + /// Check if the file exists + pub fn exists(&self) -> bool { + self.path.exists() + } + + /// Get the filename + pub fn filename(&self) -> Option<&str> { + self.path.file_name().and_then(|name| name.to_str()) + } + + /// Get the file extension + pub fn extension(&self) -> Option<&str> { + self.path.extension().and_then(|ext| ext.to_str()) + } + + /// Detect content kind from file extension + pub fn detect_content_kind(&self) -> ContentKind { + self.extension() + .map(ContentKind::from_file_extension) + .unwrap_or_default() + } + + /// Sync all data to disk + /// + /// # Errors + /// + /// Returns an error if the sync operation fails. + pub async fn sync_all(&mut self) -> Result<()> { + self.file.sync_all().await?; + Ok(()) + } + + /// Sync data (but not metadata) to disk + /// + /// # Errors + /// + /// Returns an error if the sync operation fails. + pub async fn sync_data(&mut self) -> Result<()> { + self.file.sync_data().await?; + Ok(()) + } + + /// Seek to a specific position in the file + /// + /// # Errors + /// + /// Returns an error if the seek operation fails. + pub async fn seek(&mut self, pos: SeekFrom) -> Result { + let position = self.file.seek(pos).await?; + Ok(position) + } + + /// Get current position in the file + /// + /// # Errors + /// + /// Returns an error if the current position cannot be determined. + pub async fn stream_position(&mut self) -> Result { + let position = self.file.stream_position().await?; + Ok(position) + } +} + +// Implement AsyncRead for ContentFile by delegating to the underlying file +impl AsyncRead for ContentFile { + fn poll_read( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> std::task::Poll> { + std::pin::Pin::new(&mut self.file).poll_read(cx, buf) + } +} + +// Implement AsyncWrite for ContentFile by delegating to the underlying file +impl AsyncWrite for ContentFile { + fn poll_write( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &[u8], + ) -> std::task::Poll> { + std::pin::Pin::new(&mut self.file).poll_write(cx, buf) + } + + fn poll_flush( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + std::pin::Pin::new(&mut self.file).poll_flush(cx) + } + + fn poll_shutdown( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + std::pin::Pin::new(&mut self.file).poll_shutdown(cx) + } +} + +// Implement AsyncContentRead for ContentFile by delegating to the underlying file +impl AsyncContentRead for ContentFile { + // Default implementations from the trait will work since File implements AsyncRead +} + +// Implement AsyncContentWrite for ContentFile by delegating to the underlying file +impl AsyncContentWrite for ContentFile { + // Default implementations from the trait will work since File implements AsyncWrite +} + +#[cfg(test)] +mod tests { + use tempfile::NamedTempFile; + + use super::*; + + #[tokio::test] + async fn test_create_and_open() { + let temp_file = NamedTempFile::new().unwrap(); + let path = temp_file.path(); + + // Create file + let content_file = ContentFile::create(path).await.unwrap(); + assert_eq!(content_file.path(), path); + assert!(!content_file.content_source.as_uuid().is_nil()); + + // Clean up + drop(content_file); + + // Open existing file + let content_file = ContentFile::open(path).await.unwrap(); + assert_eq!(content_file.path(), path); + } + + #[tokio::test] + async fn test_write_and_read_content_data() { + let temp_file = NamedTempFile::new().unwrap(); + let path = temp_file.path(); + + // Write content + let mut content_file = ContentFile::create(path).await.unwrap(); + let content_data = ContentData::from("Hello, world!"); + let metadata = content_file + .write_from_content_data(content_data) + .await + .unwrap(); + + assert_eq!(metadata.source_path, Some(path.to_path_buf())); + + // Read content back + drop(content_file); + let mut content_file = ContentFile::open(path).await.unwrap(); + let read_content = content_file.read_to_content_data().await.unwrap(); + + assert_eq!(read_content.as_string().unwrap(), "Hello, world!"); + } + + #[tokio::test] + async fn test_content_kind_detection() { + let temp_file = NamedTempFile::new().unwrap(); + let mut path = temp_file.path().to_path_buf(); + path.set_extension("txt"); + + let content_file = ContentFile::create(&path).await.unwrap(); + assert_eq!(content_file.detect_content_kind(), ContentKind::Text); + assert_eq!(content_file.extension(), Some("txt")); + assert_eq!( + content_file.filename(), + path.file_name().and_then(|n| n.to_str()) + ); + } + + #[tokio::test] + async fn test_write_chunked() { + let temp_file = NamedTempFile::new().unwrap(); + let path = temp_file.path(); + + let mut content_file = ContentFile::create(path).await.unwrap(); + let large_data = vec![b'A'; 1000]; + let content_data = ContentData::from(large_data.clone()); + + let metadata = content_file + .write_from_content_data_chunked(content_data, 100) + .await + .unwrap(); + assert_eq!(metadata.source_path, Some(path.to_path_buf())); + + // Verify content + drop(content_file); + let mut content_file = ContentFile::open(path).await.unwrap(); + let read_content = content_file.read_to_content_data().await.unwrap(); + + assert_eq!(read_content.as_bytes(), large_data.as_slice()); + } + + #[tokio::test] + async fn test_append_content() { + let temp_file = NamedTempFile::new().unwrap(); + let path = temp_file.path(); + + // Write initial content + let mut content_file = ContentFile::create(path).await.unwrap(); + let initial_content = ContentData::from("Hello, "); + content_file + .write_from_content_data(initial_content) + .await + .unwrap(); + + // Append more content + let append_content = ContentData::from("world!"); + content_file + .append_from_content_data(append_content) + .await + .unwrap(); + + // Verify combined content + drop(content_file); + let mut content_file = ContentFile::open(path).await.unwrap(); + let read_content = content_file.read_to_content_data().await.unwrap(); + + assert_eq!(read_content.as_string().unwrap(), "Hello, world!"); + } + + #[tokio::test] + async fn test_read_with_limit() { + let temp_file = NamedTempFile::new().unwrap(); + let path = temp_file.path(); + + // Write content larger than limit + let mut content_file = ContentFile::create(path).await.unwrap(); + let large_content = ContentData::from(vec![b'X'; 1000]); + content_file + .write_from_content_data(large_content) + .await + .unwrap(); + + drop(content_file); + + // Try to read with small limit + let mut content_file = ContentFile::open(path).await.unwrap(); + let result = content_file.read_to_content_data_limited(100).await; + assert!(result.is_err()); + } + + #[tokio::test] + async fn test_file_operations() { + let temp_file = NamedTempFile::new().unwrap(); + let path = temp_file.path(); + + let mut content_file = ContentFile::create(path).await.unwrap(); + + // Test size (should be 0 for new file) + let size = content_file.size().await.unwrap(); + assert_eq!(size, 0); + + // Test existence + assert!(content_file.exists()); + + // Write some content + let content = ContentData::from("Test content"); + content_file.write_from_content_data(content).await.unwrap(); + + // Test size after writing + let size = content_file.size().await.unwrap(); + assert!(size > 0); + + // Test sync operations + content_file.sync_all().await.unwrap(); + content_file.sync_data().await.unwrap(); + } + + #[tokio::test] + async fn test_seeking() { + let temp_file = NamedTempFile::new().unwrap(); + let path = temp_file.path(); + + let mut content_file = ContentFile::create(path).await.unwrap(); + let content = ContentData::from("0123456789"); + content_file.write_from_content_data(content).await.unwrap(); + + // Test seeking + let pos = content_file.seek(SeekFrom::Start(5)).await.unwrap(); + assert_eq!(pos, 5); + + let current_pos = content_file.stream_position().await.unwrap(); + assert_eq!(current_pos, 5); + } + + #[tokio::test] + async fn test_with_specific_source() { + let temp_file = NamedTempFile::new().unwrap(); + let path = temp_file.path(); + + let source = ContentSource::new(); + let content_file = ContentFile::create_with_source(path, source).await.unwrap(); + + assert_eq!(content_file.content_source, source); + + let metadata = content_file.content_metadata(); + assert_eq!(metadata.content_source, source); + assert_eq!(metadata.source_path, Some(path.to_path_buf())); + } +} diff --git a/crates/nvisy-core/src/fs/content_kind.rs b/crates/nvisy-core/src/fs/content_kind.rs new file mode 100644 index 0000000..0994bf4 --- /dev/null +++ b/crates/nvisy-core/src/fs/content_kind.rs @@ -0,0 +1,208 @@ +//! Content type classification for different categories of data +//! +//! This module provides the [`ContentKind`] enum for classifying content +//! based on file extensions. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Content type classification for different categories of data +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Display, EnumString, EnumIter)] +#[derive(Serialize, Deserialize)] +#[strum(serialize_all = "lowercase")] +#[serde(rename_all = "lowercase")] +pub enum ContentKind { + /// Plain text content + Text, + /// Document files (PDF, Word, etc.) + Document, + /// Spreadsheet files (Excel, CSV, etc.) + Spreadsheet, + /// Image files + Image, + /// Archive files (ZIP, TAR, etc.) + Archive, + /// Unknown or unsupported content type + #[default] + Unknown, +} + +impl ContentKind { + /// Detect content kind from file extension + #[must_use] + pub fn from_file_extension(extension: &str) -> Self { + let ext = extension.to_lowercase(); + match ext.as_str() { + // Text formats + "txt" | "text" | "md" | "markdown" | "rst" | "xml" | "json" | "yaml" | "yml" + | "toml" | "ini" | "cfg" | "conf" | "log" => Self::Text, + + // Document formats + "pdf" | "doc" | "docx" | "rtf" | "odt" | "pages" => Self::Document, + + // Spreadsheet formats + "csv" | "tsv" | "xls" | "xlsx" | "ods" | "numbers" => Self::Spreadsheet, + + // Image formats + "jpg" | "jpeg" | "png" | "gif" | "bmp" | "svg" | "webp" | "ico" | "tiff" | "tif" => { + Self::Image + } + + // Archive formats + "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "tgz" | "tbz2" | "txz" => { + Self::Archive + } + + _ => Self::Unknown, + } + } + + /// Check if this content kind represents text-based content + #[must_use] + pub fn is_text_based(&self) -> bool { + matches!(self, Self::Text) + } + + /// Check if this content kind represents a document + #[must_use] + pub fn is_document(&self) -> bool { + matches!(self, Self::Document) + } + + /// Check if this content kind represents a spreadsheet + #[must_use] + pub fn is_spreadsheet(&self) -> bool { + matches!(self, Self::Spreadsheet) + } + + /// Check if this content kind represents an image + #[must_use] + pub fn is_image(&self) -> bool { + matches!(self, Self::Image) + } + + /// Check if this content kind represents an archive + #[must_use] + pub fn is_archive(&self) -> bool { + matches!(self, Self::Archive) + } + + /// Get common file extensions for this content kind + #[must_use] + pub fn common_extensions(&self) -> &'static [&'static str] { + match self { + Self::Text => &["txt", "md", "json", "xml", "yaml", "toml"], + Self::Document => &["pdf", "doc", "docx", "rtf", "odt"], + Self::Spreadsheet => &["csv", "xls", "xlsx", "ods"], + Self::Image => &["jpg", "jpeg", "png", "gif", "svg", "webp"], + Self::Archive => &["zip", "tar", "gz", "7z", "rar"], + Self::Unknown => &[], + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_content_kind_from_extension() { + assert_eq!(ContentKind::from_file_extension("txt"), ContentKind::Text); + assert_eq!(ContentKind::from_file_extension("TXT"), ContentKind::Text); + assert_eq!(ContentKind::from_file_extension("json"), ContentKind::Text); + assert_eq!( + ContentKind::from_file_extension("pdf"), + ContentKind::Document + ); + assert_eq!( + ContentKind::from_file_extension("csv"), + ContentKind::Spreadsheet + ); + assert_eq!( + ContentKind::from_file_extension("xlsx"), + ContentKind::Spreadsheet + ); + assert_eq!(ContentKind::from_file_extension("png"), ContentKind::Image); + assert_eq!( + ContentKind::from_file_extension("zip"), + ContentKind::Archive + ); + assert_eq!(ContentKind::from_file_extension("7z"), ContentKind::Archive); + assert_eq!( + ContentKind::from_file_extension("unknown"), + ContentKind::Unknown + ); + } + + #[test] + fn test_content_kind_predicates() { + assert!(ContentKind::Text.is_text_based()); + assert!(!ContentKind::Document.is_text_based()); + + assert!(ContentKind::Document.is_document()); + assert!(!ContentKind::Text.is_document()); + + assert!(ContentKind::Spreadsheet.is_spreadsheet()); + assert!(!ContentKind::Document.is_spreadsheet()); + + assert!(ContentKind::Image.is_image()); + assert!(!ContentKind::Text.is_image()); + + assert!(ContentKind::Archive.is_archive()); + assert!(!ContentKind::Document.is_archive()); + } + + #[test] + fn test_content_kind_display() { + assert_eq!(ContentKind::Text.to_string(), "text"); + assert_eq!(ContentKind::Document.to_string(), "document"); + assert_eq!(ContentKind::Spreadsheet.to_string(), "spreadsheet"); + assert_eq!(ContentKind::Image.to_string(), "image"); + assert_eq!(ContentKind::Archive.to_string(), "archive"); + assert_eq!(ContentKind::Unknown.to_string(), "unknown"); + } + + #[test] + fn test_common_extensions() { + let text_ext = ContentKind::Text.common_extensions(); + assert!(text_ext.contains(&"txt")); + assert!(text_ext.contains(&"json")); + + let archive_ext = ContentKind::Archive.common_extensions(); + assert!(archive_ext.contains(&"zip")); + assert!(archive_ext.contains(&"7z")); + + let unknown_ext = ContentKind::Unknown.common_extensions(); + assert!(unknown_ext.is_empty()); + } + + #[test] + fn test_case_insensitive_extension_detection() { + assert_eq!(ContentKind::from_file_extension("TXT"), ContentKind::Text); + assert_eq!( + ContentKind::from_file_extension("PDF"), + ContentKind::Document + ); + assert_eq!(ContentKind::from_file_extension("PNG"), ContentKind::Image); + assert_eq!( + ContentKind::from_file_extension("ZIP"), + ContentKind::Archive + ); + } + + #[test] + fn test_default() { + assert_eq!(ContentKind::default(), ContentKind::Unknown); + } + + #[test] + fn test_serialization() { + let kind = ContentKind::Spreadsheet; + let serialized = serde_json::to_string(&kind).unwrap(); + assert_eq!(serialized, "\"spreadsheet\""); + + let deserialized: ContentKind = serde_json::from_str(&serialized).unwrap(); + assert_eq!(deserialized, kind); + } +} diff --git a/crates/nvisy-core/src/fs/content_metadata.rs b/crates/nvisy-core/src/fs/content_metadata.rs new file mode 100644 index 0000000..401ed4f --- /dev/null +++ b/crates/nvisy-core/src/fs/content_metadata.rs @@ -0,0 +1,205 @@ +//! Content metadata for filesystem operations +//! +//! This module provides the [`ContentMetadata`] struct for handling metadata +//! about content files, including paths, content types, and source tracking. + +use std::path::{Path, PathBuf}; + +use serde::{Deserialize, Serialize}; + +use super::ContentKind; +use crate::path::ContentSource; + +/// Metadata associated with content files +/// +/// This struct stores metadata about content including its source identifier, +/// file path, and detected content kind based on file extension. +#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Deserialize)] +pub struct ContentMetadata { + /// Unique identifier for the content source + pub content_source: ContentSource, + /// Optional path to the source file + pub source_path: Option, +} + +impl ContentMetadata { + /// Create new content metadata with just a source + /// + /// # Example + /// + /// ``` + /// use nvisy_core::{fs::ContentMetadata, path::ContentSource}; + /// + /// let source = ContentSource::new(); + /// let metadata = ContentMetadata::new(source); + /// ``` + #[must_use] + pub fn new(content_source: ContentSource) -> Self { + Self { + content_source, + source_path: None, + } + } + + /// Create content metadata with a file path + /// + /// # Example + /// + /// ``` + /// use nvisy_core::{fs::ContentMetadata, path::ContentSource}; + /// use std::path::PathBuf; + /// + /// let source = ContentSource::new(); + /// let metadata = ContentMetadata::with_path(source, PathBuf::from("document.pdf")); + /// assert_eq!(metadata.file_extension(), Some("pdf")); + /// ``` + pub fn with_path(content_source: ContentSource, path: impl Into) -> Self { + Self { + content_source, + source_path: Some(path.into()), + } + } + + /// Get the file extension if available + #[must_use] + pub fn file_extension(&self) -> Option<&str> { + self.source_path + .as_ref() + .and_then(|path| path.extension()) + .and_then(|ext| ext.to_str()) + } + + /// Detect content kind from file extension + /// + /// # Example + /// + /// ``` + /// use nvisy_core::{fs::{ContentMetadata, ContentKind}, path::ContentSource}; + /// use std::path::PathBuf; + /// + /// let source = ContentSource::new(); + /// let metadata = ContentMetadata::with_path(source, PathBuf::from("image.png")); + /// assert_eq!(metadata.content_kind(), ContentKind::Image); + /// ``` + pub fn content_kind(&self) -> ContentKind { + self.file_extension() + .map(ContentKind::from_file_extension) + .unwrap_or_default() + } + + /// Get the filename if available + #[must_use] + pub fn filename(&self) -> Option<&str> { + self.source_path + .as_ref() + .and_then(|path| path.file_name()) + .and_then(|name| name.to_str()) + } + + /// Get the parent directory if available + #[must_use] + pub fn parent_directory(&self) -> Option<&Path> { + self.source_path.as_ref().and_then(|path| path.parent()) + } + + /// Get the full path if available + #[must_use] + pub fn path(&self) -> Option<&Path> { + self.source_path.as_deref() + } + + /// Set the source path + pub fn set_path(&mut self, path: impl Into) { + self.source_path = Some(path.into()); + } + + /// Remove the source path + pub fn clear_path(&mut self) { + self.source_path = None; + } + + /// Check if this metadata has a path + #[must_use] + pub fn has_path(&self) -> bool { + self.source_path.is_some() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_content_metadata_creation() { + let source = ContentSource::new(); + let metadata = ContentMetadata::new(source); + + assert_eq!(metadata.content_source, source); + assert!(metadata.source_path.is_none()); + assert!(!metadata.has_path()); + } + + #[test] + fn test_content_metadata_with_path() { + let source = ContentSource::new(); + let path = PathBuf::from("/path/to/document.pdf"); + let metadata = ContentMetadata::with_path(source, path.clone()); + + assert_eq!(metadata.content_source, source); + assert_eq!(metadata.source_path, Some(path)); + assert!(metadata.has_path()); + } + + #[test] + fn test_file_extension_detection() { + let source = ContentSource::new(); + let metadata = ContentMetadata::with_path(source, PathBuf::from("document.pdf")); + + assert_eq!(metadata.file_extension(), Some("pdf")); + assert_eq!(metadata.content_kind(), ContentKind::Document); + } + + #[test] + fn test_metadata_filename() { + let source = ContentSource::new(); + let metadata = ContentMetadata::with_path(source, PathBuf::from("/path/to/file.txt")); + + assert_eq!(metadata.filename(), Some("file.txt")); + } + + #[test] + fn test_metadata_parent_directory() { + let source = ContentSource::new(); + let metadata = ContentMetadata::with_path(source, PathBuf::from("/path/to/file.txt")); + + assert_eq!(metadata.parent_directory(), Some(Path::new("/path/to"))); + } + + #[test] + fn test_path_operations() { + let source = ContentSource::new(); + let mut metadata = ContentMetadata::new(source); + + assert!(!metadata.has_path()); + + metadata.set_path("test.txt"); + assert!(metadata.has_path()); + assert_eq!(metadata.filename(), Some("test.txt")); + + metadata.clear_path(); + assert!(!metadata.has_path()); + assert_eq!(metadata.filename(), None); + } + + #[test] + fn test_serde_serialization() { + let source = ContentSource::new(); + let metadata = ContentMetadata::with_path(source, PathBuf::from("test.json")); + + let serialized = serde_json::to_string(&metadata).unwrap(); + let deserialized: ContentMetadata = serde_json::from_str(&serialized).unwrap(); + + assert_eq!(metadata, deserialized); + } +} diff --git a/crates/nvisy-core/src/fs/data_sensitivity.rs b/crates/nvisy-core/src/fs/data_sensitivity.rs new file mode 100644 index 0000000..93f636c --- /dev/null +++ b/crates/nvisy-core/src/fs/data_sensitivity.rs @@ -0,0 +1,227 @@ +//! Data sensitivity level classification +//! +//! This module provides a systematic way to classify data based on sensitivity +//! and risk levels for proper handling and compliance requirements. + +use serde::{Deserialize, Serialize}; +use strum::{Display, EnumIter, EnumString}; + +/// Data sensitivity levels for risk assessment and handling requirements +/// +/// This enum provides a hierarchical classification system for data sensitivity, +/// allowing for proper risk assessment and appropriate security controls. +/// +/// The levels are ordered from lowest to highest sensitivity: +/// `None < Low < Medium < High` +/// +/// # Examples +/// +/// ```rust +/// use nvisy_core::fs::DataSensitivity; +/// +/// let high = DataSensitivity::High; +/// let medium = DataSensitivity::Medium; +/// let low = DataSensitivity::Low; +/// +/// assert!(high > medium); +/// assert!(medium > low); +/// assert!(high.requires_special_handling()); +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(EnumIter, EnumString, Display, Serialize, Deserialize)] +pub enum DataSensitivity { + /// No sensitivity - public or non-sensitive data + /// + /// Data that can be freely shared without privacy or security concerns. + /// Examples: Public documentation, marketing materials, published research. + None = 0, + + /// Low sensitivity - internal or limited distribution + /// + /// Data with minimal privacy implications, typically internal business data. + /// Examples: General business metrics, non-personal analytics, public contact info. + Low = 1, + + /// Medium sensitivity - requires basic protection + /// + /// Data that could cause minor harm if exposed inappropriately. + /// Examples: Internal communications, aggregated demographics, business contacts. + Medium = 2, + + /// High sensitivity - requires maximum protection + /// + /// Data that could cause severe harm, legal liability, or regulatory violations if exposed. + /// Examples: Financial data, health records, biometric data, government IDs, personal contact information. + High = 3, +} + +impl DataSensitivity { + /// Get the numeric value of this sensitivity level (0-3) + #[must_use] + pub fn level(&self) -> u8 { + *self as u8 + } + + /// Check if this sensitivity level requires special handling + #[must_use] + pub fn requires_special_handling(&self) -> bool { + *self >= DataSensitivity::High + } + + /// Check if this sensitivity level requires encryption + #[must_use] + pub fn requires_encryption(&self) -> bool { + *self >= DataSensitivity::Medium + } + + /// Check if this sensitivity level requires access logging + #[must_use] + pub fn requires_access_logging(&self) -> bool { + *self >= DataSensitivity::High + } + + /// Check if this sensitivity level requires a retention policy + #[must_use] + pub fn requires_retention_policy(&self) -> bool { + *self >= DataSensitivity::Medium + } + + /// Check if this sensitivity level requires compliance oversight + #[must_use] + pub fn requires_compliance_oversight(&self) -> bool { + *self >= DataSensitivity::High + } + + /// Get the recommended maximum retention period in days (None = indefinite) + #[must_use] + pub fn max_retention_days(&self) -> Option { + match self { + DataSensitivity::None => None, // Indefinite + DataSensitivity::Low => Some(2555), // ~7 years + DataSensitivity::Medium => Some(1095), // 3 years + DataSensitivity::High => Some(90), // 90 days + } + } + + /// Get all sensitivity levels in ascending order + #[must_use] + pub fn all() -> Vec { + vec![ + DataSensitivity::None, + DataSensitivity::Low, + DataSensitivity::Medium, + DataSensitivity::High, + ] + } + + /// Create from a numeric level (0-3) + #[must_use] + pub fn from_level(level: u8) -> Option { + match level { + 0 => Some(DataSensitivity::None), + 1 => Some(DataSensitivity::Low), + 2 => Some(DataSensitivity::Medium), + 3 => Some(DataSensitivity::High), + _ => None, + } + } +} + +impl PartialOrd for DataSensitivity { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for DataSensitivity { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + (*self as u8).cmp(&(*other as u8)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ordering() { + assert!(DataSensitivity::High > DataSensitivity::Medium); + assert!(DataSensitivity::Medium > DataSensitivity::Low); + assert!(DataSensitivity::Low > DataSensitivity::None); + } + + #[test] + fn test_levels() { + assert_eq!(DataSensitivity::None.level(), 0); + assert_eq!(DataSensitivity::Low.level(), 1); + assert_eq!(DataSensitivity::Medium.level(), 2); + assert_eq!(DataSensitivity::High.level(), 3); + } + + #[test] + fn test_from_level() { + assert_eq!(DataSensitivity::from_level(0), Some(DataSensitivity::None)); + assert_eq!(DataSensitivity::from_level(4), None); + } + + #[test] + fn test_requirements() { + let none = DataSensitivity::None; + let low = DataSensitivity::Low; + let medium = DataSensitivity::Medium; + let high = DataSensitivity::High; + // Special handling + assert!(!none.requires_special_handling()); + assert!(!low.requires_special_handling()); + assert!(!medium.requires_special_handling()); + assert!(high.requires_special_handling()); + + // Encryption + assert!(!none.requires_encryption()); + assert!(!low.requires_encryption()); + assert!(medium.requires_encryption()); + assert!(high.requires_encryption()); + + // Access logging + assert!(!none.requires_access_logging()); + assert!(!low.requires_access_logging()); + assert!(!medium.requires_access_logging()); + assert!(high.requires_access_logging()); + + // Compliance oversight + assert!(!none.requires_compliance_oversight()); + assert!(!low.requires_compliance_oversight()); + assert!(!medium.requires_compliance_oversight()); + assert!(high.requires_compliance_oversight()); + } + + #[test] + fn test_retention_periods() { + assert_eq!(DataSensitivity::None.max_retention_days(), None); + assert_eq!(DataSensitivity::Low.max_retention_days(), Some(2555)); + assert_eq!(DataSensitivity::Medium.max_retention_days(), Some(1095)); + assert_eq!(DataSensitivity::High.max_retention_days(), Some(90)); + } + + #[test] + fn test_display() { + assert_eq!(format!("{}", DataSensitivity::High), "High"); + assert_eq!(format!("{}", DataSensitivity::None), "None"); + } + + #[test] + fn test_all_levels() { + let all = DataSensitivity::all(); + assert_eq!(all.len(), 4); + assert_eq!(all[0], DataSensitivity::None); + assert_eq!(all[3], DataSensitivity::High); + } + + #[test] + fn test_serialization() { + let level = DataSensitivity::High; + let json = serde_json::to_string(&level).unwrap(); + let deserialized: DataSensitivity = serde_json::from_str(&json).unwrap(); + assert_eq!(level, deserialized); + } +} diff --git a/crates/nvisy-core/src/fs/data_structure_kind.rs b/crates/nvisy-core/src/fs/data_structure_kind.rs new file mode 100644 index 0000000..81562fa --- /dev/null +++ b/crates/nvisy-core/src/fs/data_structure_kind.rs @@ -0,0 +1,130 @@ +//! Data structure type classification +//! +//! This module provides classification for different ways data can be structured, +//! from highly organized formats to completely unstructured content. + +use serde::{Deserialize, Serialize}; +use strum::{EnumIter, EnumString}; + +use crate::fs::DataSensitivity; + +/// Classification of data based on its structural organization +/// +/// This enum distinguishes between different levels of data organization, +/// from highly structured formats with defined schemas to completely +/// unstructured content without predefined organization. +/// +/// # Examples +/// +/// ```rust +/// use nvisy_core::fs::DataStructureKind; +/// +/// let structured = DataStructureKind::HighlyStructured; +/// assert!(structured.has_schema()); +/// +/// let unstructured = DataStructureKind::Unstructured; +/// assert!(!unstructured.has_schema()); +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[derive(Serialize, Deserialize, EnumIter, EnumString)] +pub enum DataStructureKind { + /// Highly Structured Data + /// + /// Data with rigid schema, defined relationships, and strict formatting rules. + /// Examples: Relational database tables, XML with XSD schema, JSON with JSON Schema. + /// + /// **Schema**: Required and enforced + /// **Queryable**: Highly queryable with structured query languages + /// **Parsing**: Predictable parsing with validation + HighlyStructured, + + /// Semi-Structured Data + /// + /// Data with some organizational structure but flexible schema. + /// Examples: JSON without strict schema, XML without XSD, CSV files, log files. + /// + /// **Schema**: Optional or loosely defined + /// **Queryable**: Moderately queryable with specialized tools + /// **Parsing**: Parseable but may require schema inference + SemiStructured, + + /// Unstructured Data + /// + /// Data without predefined format, schema, or organizational structure. + /// Examples: Plain text, images, audio, video, documents, emails. + /// + /// **Schema**: No schema + /// **Queryable**: Requires full-text search or content analysis + /// **Parsing**: Content-dependent parsing and analysis + Unstructured, +} + +impl DataStructureKind { + /// Get the base sensitivity level for this structure type + /// + /// Note: Actual sensitivity depends on the content, not just the structure + #[must_use] + pub fn base_sensitivity_level(&self) -> DataSensitivity { + match self { + // Structure type alone doesn't determine sensitivity + // Content analysis is required for actual sensitivity assessment + DataStructureKind::HighlyStructured + | DataStructureKind::SemiStructured + | DataStructureKind::Unstructured => DataSensitivity::Low, + } + } + + /// Check if this structure type has a defined schema + #[must_use] + pub fn has_schema(&self) -> bool { + matches!(self, DataStructureKind::HighlyStructured) + } + + /// Check if this structure type is easily queryable + #[must_use] + pub fn is_queryable(&self) -> bool { + !matches!(self, DataStructureKind::Unstructured) + } + + /// Check if parsing is predictable for this structure type + #[must_use] + pub fn has_predictable_parsing(&self) -> bool { + matches!(self, DataStructureKind::HighlyStructured) + } + + /// Check if this structure type supports relationship queries + #[must_use] + pub fn supports_relationships(&self) -> bool { + matches!(self, DataStructureKind::HighlyStructured) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_structure_characteristics() { + let highly_structured = DataStructureKind::HighlyStructured; + assert!(highly_structured.has_schema()); + assert!(highly_structured.is_queryable()); + assert!(highly_structured.has_predictable_parsing()); + + let unstructured = DataStructureKind::Unstructured; + assert!(!unstructured.has_schema()); + assert!(!unstructured.is_queryable()); + assert!(!unstructured.has_predictable_parsing()); + + let highly_structured = DataStructureKind::HighlyStructured; + assert!(highly_structured.supports_relationships()); + assert!(highly_structured.has_schema()); + } + + #[test] + fn test_serialization() { + let structure_type = DataStructureKind::SemiStructured; + let json = serde_json::to_string(&structure_type).unwrap(); + let deserialized: DataStructureKind = serde_json::from_str(&json).unwrap(); + assert_eq!(structure_type, deserialized); + } +} diff --git a/crates/nvisy-core/src/fs/mod.rs b/crates/nvisy-core/src/fs/mod.rs new file mode 100644 index 0000000..ab2638f --- /dev/null +++ b/crates/nvisy-core/src/fs/mod.rs @@ -0,0 +1,114 @@ +//! Filesystem module for content file operations +//! +//! This module provides filesystem-specific functionality for working with +//! content files, including file metadata handling and archive operations. +//! +//! # Core Types +//! +//! - [`ContentFile`]: A file wrapper that combines filesystem operations with content tracking +//! - [`ContentFileMetadata`]: Metadata information for content files +//! +//! # Example +//! +//! ```no_run +//! use nvisy_core::fs::ContentFile; +//! use nvisy_core::io::ContentData; +//! +//! async fn example() -> Result<(), Box> { +//! // Create a new file +//! let mut content_file = ContentFile::create("example.txt").await?; +//! +//! // Write some content +//! let content_data = ContentData::from("Hello, world!"); +//! let metadata = content_file.write_from_content_data(content_data).await?; +//! +//! println!("Written to: {:?}", metadata.source_path); +//! Ok(()) +//! } +//! ``` + +mod content_file; +mod content_kind; +mod content_metadata; +mod data_sensitivity; +mod data_structure_kind; + +use std::path::PathBuf; + +// Re-export main types +pub use content_file::ContentFile; +pub use content_kind::ContentKind; +pub use content_metadata::ContentMetadata; +pub use data_sensitivity::DataSensitivity; +pub use data_structure_kind::DataStructureKind; +use serde::{Deserialize, Serialize}; + +use crate::path::ContentSource; + +/// Metadata information for content files +/// +/// TODO: Implement comprehensive file metadata handling including: +/// - File timestamps (created, modified, accessed) +/// - File permissions and ownership +/// - File size and disk usage +/// - Extended attributes +/// - Content type detection beyond extensions +#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Serialize, Deserialize)] +pub struct ContentFileMetadata { + /// Content source identifier + pub content_source: ContentSource, + /// Path to the file + pub path: PathBuf, + /// Detected content kind + pub content_kind: Option, + /// File size in bytes + pub size: Option, +} + +impl ContentFileMetadata { + /// Create new file metadata + #[must_use] + pub fn new(content_source: ContentSource, path: PathBuf) -> Self { + Self { + content_source, + path, + content_kind: None, + size: None, + } + } + + /// Set the content kind + #[must_use] + pub fn with_content_kind(mut self, kind: ContentKind) -> Self { + self.content_kind = Some(kind); + self + } + + /// Set the file size + #[must_use] + pub fn with_size(mut self, size: u64) -> Self { + self.size = Some(size); + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_content_file_metadata() { + let source = ContentSource::new(); + let path = PathBuf::from("test.txt"); + + let metadata = ContentFileMetadata::new(source, path.clone()) + .with_content_kind(ContentKind::Text) + .with_size(1024); + + assert_eq!(metadata.content_source, source); + assert_eq!(metadata.path, path); + assert_eq!(metadata.content_kind, Some(ContentKind::Text)); + assert_eq!(metadata.size, Some(1024)); + } +} diff --git a/crates/nvisy-core/src/io/content.rs b/crates/nvisy-core/src/io/content.rs new file mode 100644 index 0000000..cf0af5f --- /dev/null +++ b/crates/nvisy-core/src/io/content.rs @@ -0,0 +1,174 @@ +//! Content types supported by the Nvisy system +//! +//! This module provides the Content enum for representing different types +//! of data content within the system. + +use bytes::Bytes; +use serde::{Deserialize, Serialize}; + +/// Content types supported by the Nvisy system +/// +/// Simplified content representation for efficient processing. +/// +/// # Examples +/// +/// ```rust +/// use nvisy_core::io::Content; +/// use bytes::Bytes; +/// +/// let text_content = Content::Text("Sample text".to_string()); +/// let binary_content = Content::Binary { +/// data: Bytes::from(vec![0x48, 0x65, 0x6C, 0x6C, 0x6F]), +/// mime_type: "application/octet-stream".to_string(), +/// }; +/// +/// assert!(text_content.is_textual()); +/// assert!(!binary_content.is_textual()); +/// ``` +#[derive(Debug, Clone, PartialEq)] +#[derive(Serialize, Deserialize)] +pub enum Content { + /// Text content stored as UTF-8 string + Text(String), + + /// Generic binary content with MIME type + Binary { + /// Raw binary data + data: Bytes, + /// MIME type describing the content + mime_type: String, + }, + + /// Empty or null content + Empty, +} + +impl Content { + /// Get the type name of this content + pub fn type_name(&self) -> &'static str { + match self { + Content::Text(_) => "text", + Content::Binary { .. } => "binary", + Content::Empty => "empty", + } + } + + /// Check if this content is textual + pub fn is_textual(&self) -> bool { + matches!(self, Content::Text(_)) + } + + /// Check if this content is multimedia (audio, video, image) + pub fn is_multimedia(&self) -> bool { + false // Simplified - no specific multimedia types + } + + /// Check if this content has binary data + pub fn has_binary_data(&self) -> bool { + !matches!(self, Content::Text(_) | Content::Empty) + } + + /// Get the estimated size in bytes + pub fn estimated_size(&self) -> usize { + match self { + Content::Text(text) => text.len(), + Content::Binary { data, .. } => data.len(), + Content::Empty => 0, + } + } + + /// Get the format/MIME type of this content + pub fn format(&self) -> Option<&str> { + match self { + Content::Text(_) => Some("text/plain"), + Content::Binary { mime_type, .. } => Some(mime_type), + Content::Empty => None, + } + } + + /// Extract raw bytes from content, if available + pub fn as_bytes(&self) -> Option<&Bytes> { + match self { + Content::Binary { data, .. } => Some(data), + Content::Text(_) | Content::Empty => None, + } + } + + /// Extract text from content, if it's textual + pub fn as_text(&self) -> Option<&str> { + match self { + Content::Text(text) => Some(text), + _ => None, + } + } + + /// Create text content + pub fn text>(content: S) -> Self { + Content::Text(content.into()) + } + + /// Create binary content + pub fn binary>(data: Bytes, mime_type: S) -> Self { + Content::Binary { + data, + mime_type: mime_type.into(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_content_types() { + let text = Content::text("Hello"); + assert!(text.is_textual()); + assert!(!text.is_multimedia()); + assert!(!text.has_binary_data()); + assert_eq!(text.type_name(), "text"); + assert_eq!(text.format(), Some("text/plain")); + + let binary_data = Bytes::from(vec![1, 2, 3, 4]); + let binary = Content::binary(binary_data, "application/octet-stream"); + assert!(!binary.is_textual()); + assert!(!binary.is_multimedia()); + assert!(binary.has_binary_data()); + assert_eq!(binary.type_name(), "binary"); + } + + #[test] + fn test_content_size_estimation() { + let text = Content::text("Hello, world!"); + assert_eq!(text.estimated_size(), 13); + + let binary_data = Bytes::from(vec![0; 100]); + let binary = Content::binary(binary_data, "application/octet-stream"); + assert_eq!(binary.estimated_size(), 100); + + let empty = Content::Empty; + assert_eq!(empty.estimated_size(), 0); + } + + #[test] + fn test_content_data_access() { + let text_content = Content::text("Hello"); + assert_eq!(text_content.as_text(), Some("Hello")); + assert!(text_content.as_bytes().is_none()); + + let binary_data = Bytes::from(vec![1, 2, 3]); + let binary_content = Content::binary(binary_data.clone(), "test"); + assert_eq!(binary_content.as_bytes(), Some(&binary_data)); + assert!(binary_content.as_text().is_none()); + } + + #[test] + fn test_serialization() { + let content = Content::text("Test content"); + + let json = serde_json::to_string(&content).unwrap(); + let deserialized: Content = serde_json::from_str(&json).unwrap(); + + assert_eq!(content, deserialized); + } +} diff --git a/crates/nvisy-core/src/io/content_data.rs b/crates/nvisy-core/src/io/content_data.rs new file mode 100644 index 0000000..dda1542 --- /dev/null +++ b/crates/nvisy-core/src/io/content_data.rs @@ -0,0 +1,417 @@ +//! Content data structure for storing and managing content with metadata +//! +//! This module provides the [`ContentData`] struct for storing content data +//! along with its metadata and source information. + +use std::fmt; +use std::sync::OnceLock; + +use bytes::Bytes; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +use crate::error::{Error, ErrorResource, ErrorType, Result}; +use crate::path::ContentSource; + +/// Content data with metadata and computed hashes +/// +/// This struct is a minimal wrapper around `bytes::Bytes` that stores content data +/// along with metadata about its source and optional computed SHA256 hash. +/// It's designed to be cheap to clone using the `bytes::Bytes` type. +/// The SHA256 hash is lazily computed using `OnceLock` for lock-free access after initialization. +#[derive(Debug)] +#[derive(Serialize, Deserialize)] +pub struct ContentData { + /// Unique identifier for the content source + pub content_source: ContentSource, + /// The actual content data + pub content_data: Bytes, + /// Lazily computed SHA256 hash of the content + #[serde(skip)] + content_sha256: OnceLock, +} + +impl ContentData { + /// Create new content data + /// + /// # Example + /// + /// ``` + /// use nvisy_core::{io::ContentData, path::ContentSource}; + /// use bytes::Bytes; + /// + /// let source = ContentSource::new(); + /// let data = Bytes::from("Hello, world!"); + /// let content = ContentData::new(source, data); + /// + /// assert_eq!(content.size(), 13); + /// ``` + pub fn new(content_source: ContentSource, content_data: Bytes) -> Self { + Self { + content_source, + content_data, + content_sha256: OnceLock::new(), + } + } + + /// Get the size of the content in bytes + pub fn size(&self) -> usize { + self.content_data.len() + } + + /// Get pretty formatted size string + #[allow(clippy::cast_precision_loss)] + pub fn get_pretty_size(&self) -> String { + let bytes = self.size(); + match bytes { + 0..=1023 => format!("{bytes} B"), + 1024..=1_048_575 => format!("{:.1} KB", bytes as f64 / 1024.0), + 1_048_576..=1_073_741_823 => format!("{:.1} MB", bytes as f64 / 1_048_576.0), + _ => format!("{:.1} GB", bytes as f64 / 1_073_741_824.0), + } + } + + /// Get the content data as bytes slice + pub fn as_bytes(&self) -> &[u8] { + &self.content_data + } + + /// Get the content data as bytes + pub fn into_bytes(self) -> Bytes { + self.content_data + } + + /// Check if the content is likely text (basic heuristic) + pub fn is_likely_text(&self) -> bool { + self.content_data + .iter() + .all(|&b| b.is_ascii_graphic() || b.is_ascii_whitespace()) + } + + /// Try to convert the content data to a UTF-8 string + /// + /// # Errors + /// + /// Returns an error if the content data contains invalid UTF-8 sequences. + pub fn as_string(&self) -> Result { + String::from_utf8(self.content_data.to_vec()).map_err(|e| { + Error::new( + ErrorType::Runtime, + ErrorResource::Core, + format!("Invalid UTF-8: {e}"), + ) + }) + } + + /// Try to convert the content data to a UTF-8 string slice + /// + /// # Errors + /// + /// Returns an error if the content data contains invalid UTF-8 sequences. + pub fn as_str(&self) -> Result<&str> { + std::str::from_utf8(&self.content_data).map_err(|e| { + Error::new( + ErrorType::Runtime, + ErrorResource::Core, + format!("Invalid UTF-8: {e}"), + ) + }) + } + + /// Compute SHA256 hash of the content + fn compute_sha256_internal(&self) -> Bytes { + let mut hasher = Sha256::new(); + hasher.update(&self.content_data); + Bytes::from(hasher.finalize().to_vec()) + } + + /// Get the SHA256 hash, computing it if not already done + pub fn sha256(&self) -> &Bytes { + self.content_sha256 + .get_or_init(|| self.compute_sha256_internal()) + } + + /// Get the SHA256 hash as hex string + pub fn sha256_hex(&self) -> String { + hex::encode(self.sha256()) + } + + /// Verify the content against a provided SHA256 hash + /// + /// # Errors + /// + /// Returns an error if the computed hash does not match the expected hash. + pub fn verify_sha256(&self, expected_hash: impl AsRef<[u8]>) -> Result<()> { + let actual_hash = self.sha256(); + let expected = expected_hash.as_ref(); + + if actual_hash.as_ref() == expected { + Ok(()) + } else { + Err(Error::new( + ErrorType::Runtime, + ErrorResource::Core, + format!( + "Hash mismatch: expected {}, got {}", + hex::encode(expected), + hex::encode(actual_hash) + ), + )) + } + } + + /// Get a slice of the content data + /// + /// # Errors + /// + /// Returns an error if the end index is beyond the content length or if start is greater than end. + pub fn slice(&self, start: usize, end: usize) -> Result { + if end > self.content_data.len() { + return Err(Error::new( + ErrorType::Runtime, + ErrorResource::Core, + format!( + "Slice end {} exceeds content length {}", + end, + self.content_data.len() + ), + )); + } + if start > end { + return Err(Error::new( + ErrorType::Runtime, + ErrorResource::Core, + format!("Slice start {start} is greater than end {end}"), + )); + } + Ok(self.content_data.slice(start..end)) + } + + /// Check if the content is empty + pub fn is_empty(&self) -> bool { + self.content_data.is_empty() + } +} + +// Manual implementation of Clone since OnceLock doesn't propagate the computed value +impl Clone for ContentData { + fn clone(&self) -> Self { + let new_lock = OnceLock::new(); + // Copy the computed hash if available + if let Some(hash) = self.content_sha256.get() { + let _ = new_lock.set(hash.clone()); + } + + Self { + content_source: self.content_source, + content_data: self.content_data.clone(), + content_sha256: new_lock, + } + } +} + +// Manual implementation of PartialEq +impl PartialEq for ContentData { + fn eq(&self, other: &Self) -> bool { + self.content_source == other.content_source && self.content_data == other.content_data + } +} + +impl Eq for ContentData {} + +// Implement From conversions for common types +impl From<&str> for ContentData { + fn from(s: &str) -> Self { + let source = ContentSource::new(); + Self::new(source, Bytes::from(s.to_string())) + } +} + +impl From for ContentData { + fn from(s: String) -> Self { + let source = ContentSource::new(); + Self::new(source, Bytes::from(s)) + } +} + +impl From<&[u8]> for ContentData { + fn from(bytes: &[u8]) -> Self { + let source = ContentSource::new(); + Self::new(source, Bytes::copy_from_slice(bytes)) + } +} + +impl From> for ContentData { + fn from(vec: Vec) -> Self { + let source = ContentSource::new(); + Self::new(source, Bytes::from(vec)) + } +} + +impl From for ContentData { + fn from(bytes: Bytes) -> Self { + let source = ContentSource::new(); + Self::new(source, bytes) + } +} + +impl fmt::Display for ContentData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Ok(text) = self.as_str() { + write!(f, "{text}") + } else { + write!(f, "[Binary data: {} bytes]", self.size()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_content_data_creation() { + let source = ContentSource::new(); + let data = Bytes::from("Hello, world!"); + let content = ContentData::new(source, data); + + assert_eq!(content.content_source, source); + assert_eq!(content.size(), 13); + // Check that hash is not computed yet + assert!(content.content_sha256.get().is_none()); + } + + #[test] + fn test_size_methods() { + let content = ContentData::from("Hello"); + assert_eq!(content.size(), 5); + + let pretty_size = content.get_pretty_size(); + assert!(!pretty_size.is_empty()); + } + + #[test] + fn test_sha256_computation() { + let content = ContentData::from("Hello, world!"); + let hash = content.sha256(); + + assert!(content.content_sha256.get().is_some()); + assert_eq!(hash.len(), 32); // SHA256 is 32 bytes + + // Test getting cached hash + let hash2 = content.sha256(); + assert_eq!(hash, hash2); + } + + #[test] + fn test_sha256_verification() { + let content = ContentData::from("Hello, world!"); + let hash = content.sha256().clone(); + + // Should verify successfully against itself + assert!(content.verify_sha256(&hash).is_ok()); + + // Should fail against different hash + let wrong_hash = vec![0u8; 32]; + assert!(content.verify_sha256(&wrong_hash).is_err()); + } + + #[test] + fn test_string_conversion() { + let content = ContentData::from("Hello, world!"); + assert_eq!(content.as_string().unwrap(), "Hello, world!"); + assert_eq!(content.as_str().unwrap(), "Hello, world!"); + + let binary_content = ContentData::from(vec![0xFF, 0xFE, 0xFD]); + assert!(binary_content.as_string().is_err()); + assert!(binary_content.as_str().is_err()); + } + + #[test] + fn test_is_likely_text() { + let text_content = ContentData::from("Hello, world!"); + assert!(text_content.is_likely_text()); + + let binary_content = ContentData::from(vec![0xFF, 0xFE, 0xFD]); + assert!(!binary_content.is_likely_text()); + } + + #[test] + fn test_slice() { + let content = ContentData::from("Hello, world!"); + + let slice = content.slice(0, 5).unwrap(); + assert_eq!(slice, Bytes::from("Hello")); + + let slice = content.slice(7, 12).unwrap(); + assert_eq!(slice, Bytes::from("world")); + + // Test bounds checking + assert!(content.slice(0, 100).is_err()); + assert!(content.slice(10, 5).is_err()); + } + + #[test] + fn test_from_conversions() { + let from_str = ContentData::from("test"); + let from_string = ContentData::from("test".to_string()); + let from_bytes = ContentData::from(b"test".as_slice()); + let from_vec = ContentData::from(b"test".to_vec()); + let from_bytes_type = ContentData::from(Bytes::from("test")); + + assert_eq!(from_str.as_str().unwrap(), "test"); + assert_eq!(from_string.as_str().unwrap(), "test"); + assert_eq!(from_bytes.as_str().unwrap(), "test"); + assert_eq!(from_vec.as_str().unwrap(), "test"); + assert_eq!(from_bytes_type.as_str().unwrap(), "test"); + } + + #[test] + fn test_display() { + let text_content = ContentData::from("Hello"); + assert_eq!(format!("{text_content}"), "Hello"); + + let binary_content = ContentData::from(vec![0xFF, 0xFE]); + assert!(format!("{binary_content}").contains("Binary data")); + } + + #[test] + fn test_cloning_preserves_hash() { + let original = ContentData::from("Hello, world!"); + // Compute hash first + let _ = original.sha256(); + + let cloned = original.clone(); + + // Both should have the hash computed + assert!(original.content_sha256.get().is_some()); + assert!(cloned.content_sha256.get().is_some()); + assert_eq!(original.sha256(), cloned.sha256()); + } + + #[test] + fn test_cloning_shares_bytes() { + let original = ContentData::from("Hello, world!"); + let cloned = original.clone(); + + // They should be equal + assert_eq!(original, cloned); + + // The underlying bytes should share the same memory + assert_eq!(original.content_data.as_ptr(), cloned.content_data.as_ptr()); + } + + #[test] + fn test_into_bytes() { + let content = ContentData::from("Hello, world!"); + let bytes = content.into_bytes(); + assert_eq!(bytes, Bytes::from("Hello, world!")); + } + + #[test] + fn test_empty_content() { + let content = ContentData::from(""); + assert!(content.is_empty()); + assert_eq!(content.size(), 0); + } +} diff --git a/crates/nvisy-core/src/io/content_read.rs b/crates/nvisy-core/src/io/content_read.rs new file mode 100644 index 0000000..3f3b61e --- /dev/null +++ b/crates/nvisy-core/src/io/content_read.rs @@ -0,0 +1,372 @@ +//! Content reading trait for async I/O operations +//! +//! This module provides the [`AsyncContentRead`] trait for reading content data +//! from various async sources into [`ContentData`] structures. + +use std::future::Future; +use std::io; + +use bytes::Bytes; +use tokio::io::{AsyncRead, AsyncReadExt}; + +use super::ContentData; +use crate::path::ContentSource; + +/// Trait for reading content from async sources +/// +/// This trait provides methods for reading content data from async sources +/// and converting them into [`ContentData`] structures with various options +/// for size limits, and verification. +pub trait AsyncContentRead: AsyncRead + Unpin + Send { + /// Read all content from the source into a `ContentData` structure + /// + /// # Errors + /// + /// Returns an error if the read operation fails or if there are I/O issues. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::io::{AsyncContentRead, ContentData}; + /// use tokio::fs::File; + /// use std::io; + /// + /// async fn read_file() -> io::Result { + /// let mut file = File::open("example.txt").await?; + /// file.read_content().await + /// } + /// ``` + fn read_content(&mut self) -> impl Future> + Send + where + Self: Sized, + { + async move { + let mut buffer = Vec::new(); + self.read_to_end(&mut buffer).await?; + + let content_data = ContentData::new(ContentSource::new(), buffer.into()); + Ok(content_data) + } + } + + /// Read content with a specified content source + /// + /// # Errors + /// + /// Returns an error if the read operation fails or if there are I/O issues. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::{io::{AsyncContentRead, ContentData}, path::ContentSource}; + /// use tokio::fs::File; + /// use std::io; + /// + /// async fn read_with_source() -> io::Result { + /// let mut file = File::open("example.txt").await?; + /// let source = ContentSource::new(); + /// file.read_content_with_source(source).await + /// } + /// ``` + fn read_content_with_source( + &mut self, + source: ContentSource, + ) -> impl Future> + Send + where + Self: Sized, + { + async move { + let mut buffer = Vec::new(); + self.read_to_end(&mut buffer).await?; + + let content_data = ContentData::new(source, buffer.into()); + Ok(content_data) + } + } + + /// Read content up to a maximum size limit + /// + /// This method prevents reading extremely large files that could cause + /// memory issues. + /// + /// # Errors + /// + /// Returns an error if the read operation fails, if there are I/O issues, + /// or if the content exceeds the maximum size limit. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::io::{AsyncContentRead, ContentData}; + /// use tokio::fs::File; + /// use std::io; + /// + /// async fn read_limited_content() -> io::Result { + /// let mut file = File::open("example.txt").await?; + /// // Limit to 1MB + /// file.read_content_limited(1024 * 1024).await + /// } + /// ``` + fn read_content_limited( + &mut self, + max_size: usize, + ) -> impl Future> + Send + where + Self: Sized, + { + async move { + let mut buffer = Vec::with_capacity(std::cmp::min(max_size, 8192)); + let mut total_read = 0; + + loop { + let mut temp_buf = vec![0u8; 8192]; + let bytes_read = self.read(&mut temp_buf).await?; + + if bytes_read == 0 { + break; // EOF reached + } + + if total_read + bytes_read > max_size { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Content size exceeds maximum limit of {max_size} bytes"), + )); + } + + buffer.extend_from_slice(&temp_buf[..bytes_read]); + total_read += bytes_read; + } + + let content_data = ContentData::new(ContentSource::new(), buffer.into()); + Ok(content_data) + } + } + + /// Read content in chunks, calling a callback for each chunk + /// + /// This is useful for processing large files without loading them + /// entirely into memory. + /// + /// # Errors + /// + /// Returns an error if the read operation fails or if the callback + /// returns an error. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::io::AsyncContentRead; + /// use tokio::fs::File; + /// use bytes::Bytes; + /// use std::io; + /// + /// async fn process_chunks() -> io::Result<()> { + /// let mut file = File::open("large_file.txt").await?; + /// + /// file.read_content_chunked(8192, |chunk| { + /// println!("Processing chunk of {} bytes", chunk.len()); + /// Ok(()) + /// }).await + /// } + /// ``` + fn read_content_chunked( + &mut self, + chunk_size: usize, + mut callback: impl FnMut(Bytes) -> std::result::Result<(), E> + Send, + ) -> impl Future> + Send + where + Self: Sized, + E: From + Send, + { + async move { + let mut buffer = vec![0u8; chunk_size]; + + loop { + let bytes_read = self.read(&mut buffer).await?; + if bytes_read == 0 { + break; // EOF reached + } + + let chunk = Bytes::copy_from_slice(&buffer[..bytes_read]); + callback(chunk)?; + } + + Ok(()) + } + } + + /// Read content with verification + /// + /// This method reads the content and optionally verifies it meets + /// certain criteria. + /// + /// # Errors + /// + /// Returns an error if the read operation fails, if there are I/O issues, + /// or if verification fails. + fn read_content_verified( + &mut self, + verify_fn: F, + ) -> impl Future> + Send + where + Self: Sized, + F: FnOnce(&[u8]) -> bool + Send, + { + async move { + let mut buffer = Vec::new(); + self.read_to_end(&mut buffer).await?; + + // Verify with a reference to the buffer data + if !verify_fn(&buffer) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Content verification failed", + )); + } + + // Convert to ContentData after verification + let content_data = ContentData::new(ContentSource::new(), buffer.into()); + Ok(content_data) + } + } +} + +// Implementations for common types +impl AsyncContentRead for tokio::fs::File {} +impl AsyncContentRead for Box {} + +// Test-specific implementations +#[cfg(test)] +impl + Unpin + Send> AsyncContentRead for std::io::Cursor {} + +#[cfg(test)] +mod tests { + use std::io::{Cursor, Result}; + + use super::*; + + #[tokio::test] + async fn test_read_content() -> Result<()> { + let data = b"Hello, world!"; + let mut cursor = Cursor::new(data); + + let content = cursor.read_content().await.unwrap(); + assert_eq!(content.as_bytes(), data); + assert_eq!(content.size(), data.len()); + + Ok(()) + } + + #[tokio::test] + async fn test_read_content_with_source() -> Result<()> { + let data = b"Hello, world!"; + let mut cursor = Cursor::new(data); + let source = ContentSource::new(); + + let content = cursor.read_content_with_source(source).await.unwrap(); + assert_eq!(content.content_source, source); + assert_eq!(content.as_bytes(), data); + + Ok(()) + } + + #[tokio::test] + async fn test_read_content_limited() -> Result<()> { + let data = b"Hello, world!"; + let mut cursor = Cursor::new(data); + + // Should succeed within limit + let content = cursor.read_content_limited(20).await?; + assert_eq!(content.as_bytes(), data); + + Ok(()) + } + + #[tokio::test] + async fn test_read_content_limited_exceeds() -> Result<()> { + let data = b"Hello, world!"; + let mut cursor = Cursor::new(data); + + // Should fail when exceeding limit + let result = cursor.read_content_limited(5).await; + assert!(result.is_err()); + + Ok(()) + } + + #[tokio::test] + async fn test_read_content_chunked() -> Result<()> { + let data = b"Hello, world!"; + let mut cursor = Cursor::new(data); + + let mut chunks = Vec::new(); + let result = cursor + .read_content_chunked(5, |chunk| { + chunks.push(chunk); + Ok::<(), io::Error>(()) + }) + .await; + + assert!(result.is_ok()); + assert!(!chunks.is_empty()); + + // Concatenate chunks and verify they match original data + let concatenated: Vec = chunks + .into_iter() + .flat_map(|chunk| chunk.to_vec()) + .collect(); + assert_eq!(concatenated, data); + + Ok(()) + } + + #[tokio::test] + async fn test_read_content_verified() -> Result<()> { + let data = b"Hello, world!"; + let mut cursor = Cursor::new(data); + + // Should succeed with passing verification + let content = cursor + .read_content_verified(|data| !data.is_empty()) + .await?; + assert_eq!(content.as_bytes(), data); + + Ok(()) + } + + #[tokio::test] + async fn test_read_content_verified_fails() -> Result<()> { + let data = b"Hello, world!"; + let mut cursor = Cursor::new(data); + + // Should fail with failing verification + let result = cursor.read_content_verified(<[u8]>::is_empty).await; + assert!(result.is_err()); + + Ok(()) + } + + #[tokio::test] + async fn test_read_empty_content() -> Result<()> { + let data = b""; + let mut cursor = Cursor::new(data); + + let content = cursor.read_content().await?; + assert_eq!(content.size(), 0); + assert!(content.is_empty()); + + Ok(()) + } + + #[tokio::test] + async fn test_read_large_content() -> Result<()> { + let data = vec![42u8; 10000]; + let mut cursor = Cursor::new(data.clone()); + + let content = cursor.read_content().await?; + assert_eq!(content.as_bytes(), data.as_slice()); + assert_eq!(content.size(), 10000); + + Ok(()) + } +} diff --git a/crates/nvisy-core/src/io/content_write.rs b/crates/nvisy-core/src/io/content_write.rs new file mode 100644 index 0000000..99e749e --- /dev/null +++ b/crates/nvisy-core/src/io/content_write.rs @@ -0,0 +1,372 @@ +//! Content writing trait for async I/O operations +//! +//! This module provides the [`AsyncContentWrite`] trait for writing content data +//! to various async destinations from [`ContentData`] structures. + +use std::future::Future; +use std::io; + +use tokio::io::{AsyncWrite, AsyncWriteExt}; + +use super::ContentData; +use crate::fs::ContentMetadata; + +/// Trait for writing content to async destinations +/// +/// This trait provides methods for writing content data to async destinations +/// with various options for chunking, and verification. +pub trait AsyncContentWrite: AsyncWrite + Unpin + Send { + /// Write content data to the destination + /// + /// # Errors + /// + /// Returns an error if the write operation fails or if there are I/O issues. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::io::{AsyncContentWrite, ContentData}; + /// use nvisy_core::fs::ContentMetadata; + /// use tokio::fs::File; + /// use std::io; + /// + /// async fn write_file() -> io::Result { + /// let mut file = File::create("output.txt").await?; + /// let content = ContentData::from("Hello, world!"); + /// file.write_content(content).await + /// } + /// ``` + fn write_content( + &mut self, + content_data: ContentData, + ) -> impl Future> + Send + where + Self: Sized, + { + async move { + self.write_all(content_data.as_bytes()).await?; + self.flush().await?; + + let metadata = ContentMetadata::new(content_data.content_source); + Ok(metadata) + } + } + + /// Write content data and return metadata with specified source path + /// + /// # Errors + /// + /// Returns an error if the write operation fails or if there are I/O issues. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::io::{AsyncContentWrite, ContentData}; + /// use nvisy_core::fs::ContentMetadata; + /// use tokio::fs::File; + /// use std::path::PathBuf; + /// use std::io; + /// + /// async fn write_with_path() -> io::Result { + /// let mut file = File::create("output.txt").await?; + /// let content = ContentData::from("Hello, world!"); + /// let path = PathBuf::from("output.txt"); + /// file.write_content_with_path(content, path).await + /// } + /// ``` + fn write_content_with_path( + &mut self, + content_data: ContentData, + path: impl Into + Send, + ) -> impl Future> + Send + where + Self: Sized, + { + async move { + self.write_all(content_data.as_bytes()).await?; + self.flush().await?; + + let metadata = ContentMetadata::with_path(content_data.content_source, path); + Ok(metadata) + } + } + + /// Write content data in chunks for better memory efficiency + /// + /// This method is useful for writing large content without keeping it + /// all in memory at once. + /// + /// # Errors + /// + /// Returns an error if the write operation fails or if there are I/O issues. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::io::{AsyncContentWrite, ContentData}; + /// use nvisy_core::fs::ContentMetadata; + /// use tokio::fs::File; + /// use std::io; + /// + /// async fn write_chunked() -> io::Result { + /// let mut file = File::create("output.txt").await?; + /// let content = ContentData::from(vec![0u8; 1_000_000]); // 1MB + /// file.write_content_chunked(content, 8192).await + /// } + /// ``` + fn write_content_chunked( + &mut self, + content_data: ContentData, + chunk_size: usize, + ) -> impl Future> + Send + where + Self: Sized, + { + async move { + let data = content_data.as_bytes(); + + for chunk in data.chunks(chunk_size) { + self.write_all(chunk).await?; + } + + self.flush().await?; + + let metadata = ContentMetadata::new(content_data.content_source); + Ok(metadata) + } + } + + /// Write multiple content data items sequentially + /// + /// # Errors + /// + /// Returns an error if any write operation fails or if there are I/O issues. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::io::{AsyncContentWrite, ContentData}; + /// use nvisy_core::fs::ContentMetadata; + /// use tokio::fs::File; + /// use std::io; + /// + /// async fn write_multiple() -> io::Result> { + /// let mut file = File::create("output.txt").await?; + /// let contents = vec![ + /// ContentData::from("Hello, "), + /// ContentData::from("world!"), + /// ]; + /// file.write_multiple_content(contents).await + /// } + /// ``` + fn write_multiple_content( + &mut self, + content_data_list: Vec, + ) -> impl Future>> + Send + where + Self: Sized, + { + async move { + let mut metadata_list = Vec::with_capacity(content_data_list.len()); + + for content_data in content_data_list { + self.write_all(content_data.as_bytes()).await?; + let metadata = ContentMetadata::new(content_data.content_source); + metadata_list.push(metadata); + } + + self.flush().await?; + Ok(metadata_list) + } + } + + /// Append content data to the destination without truncating + /// + /// This method assumes the destination supports append operations. + /// + /// # Errors + /// + /// Returns an error if the write operation fails or if there are I/O issues. + /// + /// # Example + /// + /// ```no_run + /// use nvisy_core::io::{AsyncContentWrite, ContentData}; + /// use nvisy_core::fs::ContentMetadata; + /// use tokio::fs::OpenOptions; + /// use std::io; + /// + /// async fn append_content() -> io::Result { + /// let mut file = OpenOptions::new() + /// .create(true) + /// .append(true) + /// .open("log.txt") + /// .await?; + /// + /// let content = ContentData::from("New log entry\n"); + /// file.append_content(content).await + /// } + /// ``` + fn append_content( + &mut self, + content_data: ContentData, + ) -> impl Future> + Send + where + Self: Sized, + { + async move { + self.write_all(content_data.as_bytes()).await?; + self.flush().await?; + + let metadata = ContentMetadata::new(content_data.content_source); + Ok(metadata) + } + } + + /// Write content data with verification + /// + /// This method writes the content and then optionally verifies it was + /// written correctly by checking the expected size. + /// + /// # Errors + /// + /// Returns an error if the write operation fails, if there are I/O issues, + /// or if verification fails. + fn write_content_verified( + &mut self, + content_data: ContentData, + verify_size: bool, + ) -> impl Future> + Send + where + Self: Sized, + { + async move { + let expected_size = content_data.size(); + let data = content_data.as_bytes(); + + let bytes_written = self.write(data).await?; + self.flush().await?; + + if verify_size && bytes_written != expected_size { + return Err(io::Error::new( + io::ErrorKind::WriteZero, + format!( + "Expected to write {expected_size} bytes, but only wrote {bytes_written} bytes" + ), + )); + } + + let metadata = ContentMetadata::new(content_data.content_source); + Ok(metadata) + } + } +} + +// Implementations for common types +impl AsyncContentWrite for tokio::fs::File {} +impl AsyncContentWrite for Vec {} +impl AsyncContentWrite for Box {} + +#[cfg(test)] +mod tests { + use std::io::Result; + + use super::*; + + #[tokio::test] + async fn test_write_content() -> Result<()> { + let mut writer = Vec::::new(); + let content = ContentData::from("Hello, world!"); + + let metadata = writer.write_content(content).await?; + assert!(!metadata.content_source.as_uuid().is_nil()); + + Ok(()) + } + + #[tokio::test] + async fn test_write_content_with_path() -> Result<()> { + let mut writer = Vec::::new(); + let content = ContentData::from("Hello, world!"); + + let metadata = writer.write_content_with_path(content, "test.txt").await?; + assert!(metadata.has_path()); + assert_eq!(metadata.filename(), Some("test.txt")); + + Ok(()) + } + + #[tokio::test] + async fn test_write_content_chunked() -> Result<()> { + let mut writer = Vec::::new(); + let data = vec![42u8; 1000]; + let content = ContentData::from(data.clone()); + + let metadata = writer.write_content_chunked(content, 100).await?; + assert!(!metadata.content_source.as_uuid().is_nil()); + assert_eq!(writer.as_slice(), data.as_slice()); + + Ok(()) + } + + #[tokio::test] + async fn test_write_multiple_content() -> Result<()> { + let mut writer = Vec::::new(); + let contents = vec![ContentData::from("Hello, "), ContentData::from("world!")]; + + let metadata_list = writer.write_multiple_content(contents).await?; + assert_eq!(metadata_list.len(), 2); + assert_eq!(writer.as_slice(), b"Hello, world!"); + + Ok(()) + } + + #[tokio::test] + async fn test_append_content() -> Result<()> { + let mut writer = Vec::::new(); + let content = ContentData::from("Hello, world!"); + + let metadata = writer.append_content(content).await?; + assert!(!metadata.content_source.as_uuid().is_nil()); + assert_eq!(writer.as_slice(), b"Hello, world!"); + + Ok(()) + } + + #[tokio::test] + async fn test_write_content_verified() -> Result<()> { + let mut writer = Vec::::new(); + let content = ContentData::from("Hello, world!"); + + let metadata = writer.write_content_verified(content, true).await?; + assert!(!metadata.content_source.as_uuid().is_nil()); + assert_eq!(writer.as_slice(), b"Hello, world!"); + + Ok(()) + } + + #[tokio::test] + async fn test_write_empty_content() -> Result<()> { + let mut writer = Vec::::new(); + let content = ContentData::from(""); + + let metadata = writer.write_content(content).await?; + assert!(!metadata.content_source.as_uuid().is_nil()); + assert_eq!(writer.as_slice(), b""); + + Ok(()) + } + + #[tokio::test] + async fn test_write_large_content() -> Result<()> { + let mut writer = Vec::::new(); + let data = vec![123u8; 10000]; + let content = ContentData::from(data.clone()); + + let metadata = writer.write_content(content).await?; + assert!(!metadata.content_source.as_uuid().is_nil()); + assert_eq!(writer.as_slice(), data.as_slice()); + + Ok(()) + } +} diff --git a/crates/nvisy-core/src/io/data_reference.rs b/crates/nvisy-core/src/io/data_reference.rs new file mode 100644 index 0000000..7dc51df --- /dev/null +++ b/crates/nvisy-core/src/io/data_reference.rs @@ -0,0 +1,140 @@ +//! Data reference definitions +//! +//! This module provides the `DataReference` struct for referencing and +//! tracking content within the Nvisy system. + +use serde::{Deserialize, Serialize}; + +use crate::io::Content; +use crate::path::ContentSource; + +/// Reference to data with source tracking and content information +/// +/// A `DataReference` provides a lightweight way to reference data content +/// while maintaining information about its source location and optional +/// mapping within that source. +/// +/// # Examples +/// +/// ```rust +/// use nvisy_core::io::{DataReference, Content}; +/// +/// let content = Content::Text("Hello, world!".to_string()); +/// let data_ref = DataReference::new(content) +/// .with_mapping_id("line-42"); +/// +/// assert!(data_ref.mapping_id().is_some()); +/// assert_eq!(data_ref.mapping_id().unwrap(), "line-42"); +/// ``` +#[derive(Debug, Clone)] +#[derive(Serialize, Deserialize)] +pub struct DataReference { + /// Unique identifier for the source containing this data + /// Using `UUIDv7` for time-ordered, globally unique identification + source: ContentSource, + + /// Optional identifier that defines the position/location of the data within the source + /// Examples: line numbers, byte offsets, element IDs, `XPath` expressions + mapping_id: Option, + + /// The actual content data + content_type: Content, +} + +impl DataReference { + /// Create a new data reference with auto-generated source ID (`UUIDv7`) + pub fn new(content: Content) -> Self { + Self { + source: ContentSource::new(), + mapping_id: None, + content_type: content, + } + } + + /// Create a new data reference with specific source + pub fn with_source(source: ContentSource, content: Content) -> Self { + Self { + source, + mapping_id: None, + content_type: content, + } + } + + /// Set the mapping ID for this data reference + #[must_use] + pub fn with_mapping_id>(mut self, mapping_id: S) -> Self { + self.mapping_id = Some(mapping_id.into()); + self + } + + /// Get the content source + pub fn source(&self) -> ContentSource { + self.source + } + + /// Get the mapping ID, if any + pub fn mapping_id(&self) -> Option<&str> { + self.mapping_id.as_deref() + } + + /// Get a reference to the content + pub fn content(&self) -> &Content { + &self.content_type + } + + /// Get the content type name + pub fn content_type_name(&self) -> &'static str { + self.content_type.type_name() + } + + /// Get the estimated size of the content in bytes + pub fn estimated_size(&self) -> usize { + self.content_type.estimated_size() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_data_reference_creation() { + let content = Content::text("Hello, world!"); + let data_ref = DataReference::new(content); + + assert_eq!(data_ref.content_type_name(), "text"); + assert!(data_ref.mapping_id().is_none()); + assert_eq!(data_ref.estimated_size(), 13); + // Verify UUIDv7 is used + assert_eq!(data_ref.source().as_uuid().get_version_num(), 7); + } + + #[test] + fn test_data_reference_with_mapping() { + let content = Content::text("Test content"); + let data_ref = DataReference::new(content).with_mapping_id("line-42"); + + assert_eq!(data_ref.mapping_id(), Some("line-42")); + } + + #[test] + fn test_data_reference_with_source() { + let source = ContentSource::new(); + let content = Content::text("Test content"); + let data_ref = DataReference::with_source(source, content); + + assert_eq!(data_ref.source(), source); + } + + #[test] + fn test_serialization() { + let content = Content::text("Test content"); + let data_ref = DataReference::new(content).with_mapping_id("test-mapping"); + + let json = serde_json::to_string(&data_ref).unwrap(); + let deserialized: DataReference = serde_json::from_str(&json).unwrap(); + + assert_eq!(data_ref.source(), deserialized.source()); + assert_eq!(data_ref.mapping_id(), deserialized.mapping_id()); + } +} diff --git a/crates/nvisy-core/src/io/mod.rs b/crates/nvisy-core/src/io/mod.rs new file mode 100644 index 0000000..e0f3c44 --- /dev/null +++ b/crates/nvisy-core/src/io/mod.rs @@ -0,0 +1,26 @@ +//! I/O module for content handling and processing +//! +//! This module provides the core I/O abstractions for handling content data, +//! including content data structures and async read/write traits. +//! +//! # Core Types +//! +//! - [`ContentData`]: Container for content data with metadata, hashing, and size utilities +//! +//! # Traits +//! +//! - [`AsyncContentRead`]: Async trait for reading content from various sources +//! - [`AsyncContentWrite`]: Async trait for writing content to various destinations + +mod content; +mod content_data; +mod content_read; +mod content_write; +mod data_reference; + +// Re-export core types and traits +pub use content::Content; +pub use content_data::ContentData; +pub use content_read::AsyncContentRead; +pub use content_write::AsyncContentWrite; +pub use data_reference::DataReference; diff --git a/crates/nvisy-core/src/lib.rs b/crates/nvisy-core/src/lib.rs new file mode 100644 index 0000000..b166bd9 --- /dev/null +++ b/crates/nvisy-core/src/lib.rs @@ -0,0 +1,33 @@ +#![forbid(unsafe_code)] +#![warn(clippy::pedantic)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +//! # Nvisy Core +//! +//! Core types and enums for data categorization in the Nvisy content processing system. +//! +//! This crate provides the fundamental data classification system used throughout +//! the Nvisy ecosystem to identify and categorize different types of sensitive data, +//! as well as structured error handling. +//! +//! ## Core Types +//! +//! - [`fs::DataSensitivity`]: Sensitivity levels for risk assessment +//! - [`io::Content`]: Content types and data structures +//! - [`io::DataReference`]: Data references with source tracking +//! - [`fs::DataStructureKind`]: Classification of data structure types +//! - [`fs::ContentFile`]: File operations with content tracking +//! - [`io::ContentData`]: Container for content data with metadata +//! - [`error::Error`]: Structured error handling with source classification + +pub mod error; +pub mod fs; +pub mod io; +pub mod path; + +// Re-export main types for convenience +pub use error::{BoxError, Error, ErrorResource, ErrorType, Result}; + +#[doc(hidden)] +pub mod prelude; diff --git a/crates/nvisy-core/src/path/mod.rs b/crates/nvisy-core/src/path/mod.rs new file mode 100644 index 0000000..08cb0c4 --- /dev/null +++ b/crates/nvisy-core/src/path/mod.rs @@ -0,0 +1,9 @@ +//! Path module for content source identification +//! +//! This module provides functionality for uniquely identifying content sources +//! throughout the nvisy system using UUIDv7-based identifiers. + +mod source; + +// Re-export core types +pub use source::ContentSource; diff --git a/crates/nvisy-core/src/path/source.rs b/crates/nvisy-core/src/path/source.rs new file mode 100644 index 0000000..49b2811 --- /dev/null +++ b/crates/nvisy-core/src/path/source.rs @@ -0,0 +1,287 @@ +//! Content source identification module +//! +//! This module provides the [`ContentSource`] struct for uniquely identifying +//! data sources throughout the nvisy system using `UUIDv7`. + +use std::fmt; + +use jiff::Zoned; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Unique identifier for content sources in the system +/// +/// Uses `UUIDv7` for time-ordered, globally unique identification of data sources. +/// +/// This allows for efficient tracking and correlation of content throughout +/// the processing pipeline. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Serialize, Deserialize)] +pub struct ContentSource { + /// `UUIDv7` identifier + id: Uuid, +} + +impl ContentSource { + /// Create a new content source with a fresh `UUIDv7` + /// + /// # Example + /// + /// ``` + /// use nvisy_core::path::ContentSource; + /// + /// let source = ContentSource::new(); + /// assert!(!source.as_uuid().is_nil()); + /// ``` + #[must_use] + pub fn new() -> Self { + let now = Zoned::now(); + let timestamp = uuid::Timestamp::from_unix( + uuid::NoContext, + now.timestamp().as_second().unsigned_abs(), + now.timestamp().subsec_nanosecond().unsigned_abs(), + ); + + Self { + id: Uuid::new_v7(timestamp), + } + } + + /// Create a content source from an existing UUID + /// + /// # Example + /// + /// ``` + /// use nvisy_core::path::ContentSource; + /// use uuid::Uuid; + /// + /// let source = ContentSource::new(); + /// let uuid = source.as_uuid(); + /// let source2 = ContentSource::from_uuid(uuid); + /// assert_eq!(source2.as_uuid(), uuid); + /// ``` + #[must_use] + pub fn from_uuid(id: Uuid) -> Self { + Self { id } + } + + /// Get the underlying UUID + /// + /// # Example + /// + /// ``` + /// use nvisy_core::path::ContentSource; + /// + /// let source = ContentSource::new(); + /// let uuid = source.as_uuid(); + /// assert_eq!(uuid.get_version_num(), 7); + /// ``` + #[must_use] + pub fn as_uuid(&self) -> Uuid { + self.id + } + + /// Get the UUID as a string + /// + /// # Example + /// + /// ``` + /// use nvisy_core::path::ContentSource; + /// + /// let source = ContentSource::new(); + /// let id_str = source.to_string(); + /// assert_eq!(id_str.len(), 36); // Standard UUID string length + /// ``` + /// + /// Parse a content source from a string + /// + /// # Errors + /// + /// Returns an error if the string is not a valid UUID format. + /// + /// # Example + /// + /// ``` + /// use nvisy_core::path::ContentSource; + /// + /// let source = ContentSource::new(); + /// let id_str = source.to_string(); + /// let parsed = ContentSource::parse(&id_str).unwrap(); + /// assert_eq!(source, parsed); + /// ``` + pub fn parse(s: &str) -> Result { + let id = Uuid::parse_str(s)?; + Ok(Self { id }) + } + + /// Get the timestamp component from the `UUIDv7` + /// + /// Returns the Unix timestamp in milliseconds when this UUID was generated, + /// or None if this is not a `UUIDv7`. + /// + /// # Example + /// + /// ``` + /// use nvisy_core::path::ContentSource; + /// use std::time::{SystemTime, UNIX_EPOCH}; + /// + /// let source = ContentSource::new(); + /// let timestamp = source.timestamp().expect("UUIDv7 should have timestamp"); + /// let now = SystemTime::now() + /// .duration_since(UNIX_EPOCH) + /// .unwrap() + /// .as_millis() as u64; + /// + /// // Should be very close to current time (within a few seconds) + /// assert!((timestamp as i64 - now as i64).abs() < 5000); + /// ``` + #[must_use] + pub fn timestamp(&self) -> Option { + self.id.get_timestamp().map(|timestamp| { + let (seconds, nanos) = timestamp.to_unix(); + seconds * 1000 + u64::from(nanos) / 1_000_000 + }) + } + + /// Check if this content source was created before another + /// + /// Returns false if either UUID is not a `UUIDv7` and thus has no timestamp. + /// + /// # Example + /// + /// ``` + /// use nvisy_core::path::ContentSource; + /// use std::thread; + /// use std::time::Duration; + /// + /// let source1 = ContentSource::new(); + /// thread::sleep(Duration::from_millis(1)); + /// let source2 = ContentSource::new(); + /// + /// assert!(source1.created_before(&source2)); + /// assert!(!source2.created_before(&source1)); + /// ``` + #[must_use] + pub fn created_before(&self, other: &ContentSource) -> bool { + match (self.timestamp(), other.timestamp()) { + (Some(self_ts), Some(other_ts)) => self_ts < other_ts, + _ => false, + } + } + + /// Check if this content source was created after another + /// + /// Returns false if either UUID is not a `UUIDv7` and thus has no timestamp. + #[must_use] + pub fn created_after(&self, other: &ContentSource) -> bool { + match (self.timestamp(), other.timestamp()) { + (Some(self_ts), Some(other_ts)) => self_ts > other_ts, + _ => false, + } + } +} + +impl Default for ContentSource { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Display for ContentSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.id) + } +} + +impl From for ContentSource { + fn from(id: Uuid) -> Self { + Self::from_uuid(id) + } +} + +impl From for Uuid { + fn from(source: ContentSource) -> Self { + source.id + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + use std::thread; + use std::time::Duration; + + use super::*; + + #[test] + fn test_new_content_source() { + let source = ContentSource::new(); + assert_eq!(source.as_uuid().get_version_num(), 7); + assert!(!source.as_uuid().is_nil()); + } + + #[test] + fn test_uniqueness() { + let mut sources = HashSet::new(); + + // Generate 1000 sources and ensure they're all unique + for _ in 0..1000 { + let source = ContentSource::new(); + assert!(sources.insert(source), "Duplicate content source found"); + } + } + + #[test] + fn test_string_conversion() { + let source = ContentSource::new(); + let string_repr = source.to_string(); + let parsed = ContentSource::parse(&string_repr).unwrap(); + assert_eq!(source, parsed); + } + + #[test] + fn test_invalid_string_parsing() { + let result = ContentSource::parse("invalid-uuid"); + assert!(result.is_err()); + } + + #[test] + fn test_ordering() { + let source1 = ContentSource::new(); + thread::sleep(Duration::from_millis(2)); + let source2 = ContentSource::new(); + + assert!(source1.created_before(&source2)); + assert!(source2.created_after(&source1)); + assert!(source1 < source2); // Test PartialOrd + } + + #[test] + fn test_display() { + let source = ContentSource::new(); + let display_str = format!("{source}"); + let uuid_str = source.as_uuid().to_string(); + assert_eq!(display_str, uuid_str); + } + + #[test] + fn test_serde_serialization() { + let source = ContentSource::new(); + let serialized = serde_json::to_string(&source).unwrap(); + let deserialized: ContentSource = serde_json::from_str(&serialized).unwrap(); + assert_eq!(source, deserialized); + } + + #[test] + fn test_hash_consistency() { + let source = ContentSource::new(); + let mut set = HashSet::new(); + + set.insert(source); + assert!(set.contains(&source)); + + // Same source should hash the same way + let cloned_source = source; + assert!(set.contains(&cloned_source)); + } +} diff --git a/crates/nvisy-core/src/prelude.rs b/crates/nvisy-core/src/prelude.rs new file mode 100644 index 0000000..f39f7e6 --- /dev/null +++ b/crates/nvisy-core/src/prelude.rs @@ -0,0 +1,15 @@ +//! Prelude module for commonly used types. +//! +//! This module re-exports the most commonly used types from this crate. +//! It is intended to be glob-imported for convenience. + +// Error handling +pub use crate::error::{BoxError, Error, ErrorResource, ErrorType, Result}; +// File system types +pub use crate::fs::{ + ContentFile, ContentKind, ContentMetadata, DataSensitivity, DataStructureKind, +}; +// I/O types +pub use crate::io::{AsyncContentRead, AsyncContentWrite, Content, ContentData, DataReference}; +// Path types +pub use crate::path::ContentSource; diff --git a/crates/nvisy-document/Cargo.toml b/crates/nvisy-document/Cargo.toml new file mode 100644 index 0000000..3d6ec4b --- /dev/null +++ b/crates/nvisy-document/Cargo.toml @@ -0,0 +1,46 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-document" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +# Core nvisy types +nvisy-core = { workspace = true } + +# Async runtime +tokio = { workspace = true, features = ["sync", "io-util"] } +async-trait = { workspace = true } + +# Data types +bytes = { workspace = true, features = ["serde"] } +uuid = { workspace = true, features = ["v4", "v7", "serde"] } +jiff = { workspace = true, features = ["std", "serde"] } + +# Serialization +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true, features = ["std"] } +base64 = { workspace = true, features = ["std"] } + +# Error handling +thiserror = { workspace = true, features = ["std"] } + +# Utilities +derive_more = { workspace = true, features = ["display", "from", "into", "deref", "deref_mut", "as_ref", "constructor"] } + +[dev-dependencies] +tokio = { workspace = true, features = ["rt", "macros"] } diff --git a/crates/nvisy-document/README.md b/crates/nvisy-document/README.md new file mode 100644 index 0000000..0a793b6 --- /dev/null +++ b/crates/nvisy-document/README.md @@ -0,0 +1,40 @@ +# nvisy-document + +Document manipulation traits and types for the Nvisy system. + +This crate provides a unified interface for working with different document +formats, enabling semantic editing operations driven by VLM (Vision Language +Model) understanding. + +## Features + +- **Document Format Trait**: Common interface for PDF, DOCX, and other formats +- **Format Registry**: Register and look up formats by MIME type or extension +- **Region-based Editing**: Reference and modify document regions with stable IDs +- **Edit Operations**: Redaction, text replacement, structural changes +- **Streaming Support**: Handle large documents with pagination + +## Architecture + +```text +┌─────────────────────────────────────────────────────────────────┐ +│ nvisy-engine │ +│ (Edit sessions, undo/redo, region caching) │ +└─────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ nvisy-document │ +│ (DocumentFormat trait, EditOperation, Region, Registry) │ +└─────────────────────────────────────────────────────────────────┘ + │ + ┌─────────────────┼─────────────────┐ + ▼ ▼ ▼ + ┌──────────┐ ┌──────────┐ ┌──────────┐ + │nvisy-pdf │ │nvisy-docx│ │nvisy-txt │ + └──────────┘ └──────────┘ └──────────┘ +``` + +## License + +MIT License - see [LICENSE.txt](../../LICENSE.txt) for details. diff --git a/crates/nvisy-document/src/conversion/mod.rs b/crates/nvisy-document/src/conversion/mod.rs new file mode 100644 index 0000000..14d7efd --- /dev/null +++ b/crates/nvisy-document/src/conversion/mod.rs @@ -0,0 +1,43 @@ +//! Document format conversion traits and types. +//! +//! This module defines the [`Conversion`] trait for converting documents +//! between formats. + +mod options; +mod types; + +use async_trait::async_trait; +pub use options::{ConversionOptions, HtmlOptions, PageMargins, PageOrientation, PdfOptions}; +pub use types::{ConversionPath, ConversionResult, ConversionStep, FormatPair, SkippedElement}; + +use crate::error::Result; +use crate::format::Document; + +/// Trait for document format conversion. +/// +/// This trait is implemented by [`Document`] types that support conversion +/// to other formats. +#[async_trait] +pub trait Conversion: Document { + /// Returns whether conversion is supported by this document. + fn supports_conversion(&self) -> bool; + + /// Returns the available conversion paths from this document's format. + fn conversion_paths(&self) -> &[ConversionPath]; + + /// Converts this document to the target format. + /// + /// # Arguments + /// + /// * `target_format` - The target format name (e.g., "pdf", "html") + /// * `options` - Optional conversion options + /// + /// # Returns + /// + /// The conversion result containing the converted document data. + async fn convert( + &self, + target_format: &str, + options: Option<&ConversionOptions>, + ) -> Result; +} diff --git a/crates/nvisy-document/src/conversion/options.rs b/crates/nvisy-document/src/conversion/options.rs new file mode 100644 index 0000000..826e15f --- /dev/null +++ b/crates/nvisy-document/src/conversion/options.rs @@ -0,0 +1,433 @@ +//! Format conversion options. + +use serde::{Deserialize, Serialize}; + +/// Options for format conversion. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ConversionOptions { + /// Whether to preserve the original document structure. + pub preserve_structure: bool, + + /// Whether to preserve formatting (fonts, styles, etc.). + pub preserve_formatting: bool, + + /// Whether to preserve images. + pub preserve_images: bool, + + /// Whether to embed fonts (for PDF output). + pub embed_fonts: bool, + + /// Image quality for lossy compression (1-100). + pub image_quality: u8, + + /// Maximum image dimension (width or height) in pixels. + pub max_image_dimension: Option, + + /// Whether to allow lossy conversion. + pub allow_lossy: bool, + + /// Whether to fail on content that cannot be converted. + pub strict_mode: bool, + + /// PDF-specific options. + pub pdf_options: Option, + + /// HTML-specific options. + pub html_options: Option, +} + +impl ConversionOptions { + /// Creates options optimized for quality preservation. + #[must_use] + pub fn high_quality() -> Self { + Self { + preserve_structure: true, + preserve_formatting: true, + preserve_images: true, + embed_fonts: true, + image_quality: 95, + max_image_dimension: None, + allow_lossy: false, + strict_mode: false, + pdf_options: None, + html_options: None, + } + } + + /// Creates options optimized for file size. + #[must_use] + pub fn compact() -> Self { + Self { + preserve_structure: true, + preserve_formatting: false, + preserve_images: true, + embed_fonts: false, + image_quality: 75, + max_image_dimension: Some(1920), + allow_lossy: true, + strict_mode: false, + pdf_options: None, + html_options: None, + } + } + + /// Creates options for text-only extraction. + #[must_use] + pub fn text_only() -> Self { + Self { + preserve_structure: false, + preserve_formatting: false, + preserve_images: false, + embed_fonts: false, + image_quality: 0, + max_image_dimension: None, + allow_lossy: true, + strict_mode: false, + pdf_options: None, + html_options: None, + } + } + + /// Enables structure preservation. + #[must_use] + pub fn with_structure(mut self) -> Self { + self.preserve_structure = true; + self + } + + /// Enables formatting preservation. + #[must_use] + pub fn with_formatting(mut self) -> Self { + self.preserve_formatting = true; + self + } + + /// Enables image preservation. + #[must_use] + pub fn with_images(mut self) -> Self { + self.preserve_images = true; + self + } + + /// Enables font embedding. + #[must_use] + pub fn with_embedded_fonts(mut self) -> Self { + self.embed_fonts = true; + self + } + + /// Sets the image quality. + #[must_use] + pub fn with_image_quality(mut self, quality: u8) -> Self { + self.image_quality = quality.min(100); + self + } + + /// Sets the maximum image dimension. + #[must_use] + pub fn with_max_image_dimension(mut self, dimension: u32) -> Self { + self.max_image_dimension = Some(dimension); + self + } + + /// Allows lossy conversion. + #[must_use] + pub fn allow_lossy_conversion(mut self) -> Self { + self.allow_lossy = true; + self + } + + /// Enables strict mode. + #[must_use] + pub fn strict(mut self) -> Self { + self.strict_mode = true; + self + } + + /// Sets PDF-specific options. + #[must_use] + pub fn with_pdf_options(mut self, options: PdfOptions) -> Self { + self.pdf_options = Some(options); + self + } + + /// Sets HTML-specific options. + #[must_use] + pub fn with_html_options(mut self, options: HtmlOptions) -> Self { + self.html_options = Some(options); + self + } + + /// Validates the options. + pub fn validate(&self) -> Result<(), String> { + if self.image_quality > 100 { + return Err("image_quality must be between 0 and 100".to_string()); + } + + if let Some(dim) = self.max_image_dimension { + if dim == 0 { + return Err("max_image_dimension must be greater than 0".to_string()); + } + if dim > 16384 { + return Err("max_image_dimension exceeds maximum of 16384".to_string()); + } + } + + Ok(()) + } +} + +/// PDF-specific conversion options. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct PdfOptions { + /// PDF version to target (e.g., "1.4", "1.7", "2.0"). + pub pdf_version: Option, + + /// Whether to create a PDF/A compliant document. + pub pdf_a: bool, + + /// PDF/A conformance level (if pdf_a is true). + pub pdf_a_level: Option, + + /// Whether to linearize for fast web viewing. + pub linearize: bool, + + /// Whether to include document outline/bookmarks. + pub include_outline: bool, + + /// Page size (e.g., "A4", "Letter"). + pub page_size: Option, + + /// Page orientation. + pub orientation: Option, + + /// Page margins in points. + pub margins: Option, +} + +impl PdfOptions { + /// Creates options for PDF/A-1b compliance. + #[must_use] + pub fn pdf_a_1b() -> Self { + Self { + pdf_version: Some("1.4".to_string()), + pdf_a: true, + pdf_a_level: Some("1b".to_string()), + linearize: false, + include_outline: true, + page_size: None, + orientation: None, + margins: None, + } + } + + /// Creates options for web-optimized PDF. + #[must_use] + pub fn web_optimized() -> Self { + Self { + pdf_version: Some("1.7".to_string()), + pdf_a: false, + pdf_a_level: None, + linearize: true, + include_outline: true, + page_size: None, + orientation: None, + margins: None, + } + } +} + +/// HTML-specific conversion options. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct HtmlOptions { + /// Whether to generate a complete HTML document (vs. fragment). + pub full_document: bool, + + /// Whether to inline CSS styles. + pub inline_styles: bool, + + /// Whether to embed images as data URIs. + pub embed_images: bool, + + /// Character encoding (default: UTF-8). + pub encoding: Option, + + /// Whether to include a CSS reset. + pub css_reset: bool, + + /// Custom CSS to include. + pub custom_css: Option, +} + +impl HtmlOptions { + /// Creates options for self-contained HTML. + #[must_use] + pub fn self_contained() -> Self { + Self { + full_document: true, + inline_styles: true, + embed_images: true, + encoding: Some("UTF-8".to_string()), + css_reset: true, + custom_css: None, + } + } + + /// Creates options for HTML fragment. + #[must_use] + pub fn fragment() -> Self { + Self { + full_document: false, + inline_styles: false, + embed_images: false, + encoding: None, + css_reset: false, + custom_css: None, + } + } +} + +/// Page orientation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum PageOrientation { + /// Portrait orientation. + #[default] + Portrait, + /// Landscape orientation. + Landscape, +} + +/// Page margins in points. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct PageMargins { + /// Top margin. + pub top: f32, + /// Right margin. + pub right: f32, + /// Bottom margin. + pub bottom: f32, + /// Left margin. + pub left: f32, +} + +impl Default for PageMargins { + fn default() -> Self { + Self { + top: 72.0, // 1 inch + right: 72.0, + bottom: 72.0, + left: 72.0, + } + } +} + +impl PageMargins { + /// Creates uniform margins. + #[must_use] + pub fn uniform(margin: f32) -> Self { + Self { + top: margin, + right: margin, + bottom: margin, + left: margin, + } + } + + /// Creates zero margins. + #[must_use] + pub fn zero() -> Self { + Self::uniform(0.0) + } + + /// Creates narrow margins (0.5 inch). + #[must_use] + pub fn narrow() -> Self { + Self::uniform(36.0) + } + + /// Creates wide margins (1.5 inch). + #[must_use] + pub fn wide() -> Self { + Self::uniform(108.0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_options() { + let opts = ConversionOptions::default(); + assert!(!opts.preserve_structure); + assert!(!opts.strict_mode); + } + + #[test] + fn test_high_quality_options() { + let opts = ConversionOptions::high_quality(); + assert!(opts.preserve_structure); + assert!(opts.preserve_formatting); + assert!(opts.embed_fonts); + assert_eq!(opts.image_quality, 95); + } + + #[test] + fn test_compact_options() { + let opts = ConversionOptions::compact(); + assert!(!opts.preserve_formatting); + assert!(opts.allow_lossy); + assert!(opts.max_image_dimension.is_some()); + } + + #[test] + fn test_builder() { + let opts = ConversionOptions::default() + .with_structure() + .with_formatting() + .with_image_quality(90) + .strict(); + + assert!(opts.preserve_structure); + assert!(opts.preserve_formatting); + assert_eq!(opts.image_quality, 90); + assert!(opts.strict_mode); + } + + #[test] + fn test_validation() { + let valid = ConversionOptions::default(); + assert!(valid.validate().is_ok()); + + let invalid = ConversionOptions { + max_image_dimension: Some(0), + ..Default::default() + }; + assert!(invalid.validate().is_err()); + } + + #[test] + fn test_pdf_options() { + let pdf_a = PdfOptions::pdf_a_1b(); + assert!(pdf_a.pdf_a); + assert_eq!(pdf_a.pdf_a_level, Some("1b".to_string())); + } + + #[test] + fn test_html_options() { + let self_contained = HtmlOptions::self_contained(); + assert!(self_contained.full_document); + assert!(self_contained.inline_styles); + assert!(self_contained.embed_images); + } + + #[test] + fn test_page_margins() { + let default = PageMargins::default(); + assert_eq!(default.top, 72.0); + + let narrow = PageMargins::narrow(); + assert_eq!(narrow.top, 36.0); + } +} diff --git a/crates/nvisy-document/src/conversion/types.rs b/crates/nvisy-document/src/conversion/types.rs new file mode 100644 index 0000000..85011c6 --- /dev/null +++ b/crates/nvisy-document/src/conversion/types.rs @@ -0,0 +1,308 @@ +//! Conversion types and structures. + +use bytes::Bytes; +use serde::{Deserialize, Serialize}; + +/// A pair of source and target formats. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct FormatPair { + /// Source MIME type. + pub source: String, + + /// Target MIME type. + pub target: String, +} + +impl FormatPair { + /// Creates a new format pair. + #[must_use] + pub fn new(source: impl Into, target: impl Into) -> Self { + Self { + source: source.into(), + target: target.into(), + } + } + + /// Creates a format pair for PDF to DOCX conversion. + #[must_use] + pub fn pdf_to_docx() -> Self { + Self::new( + "application/pdf", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ) + } + + /// Creates a format pair for DOCX to PDF conversion. + #[must_use] + pub fn docx_to_pdf() -> Self { + Self::new( + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/pdf", + ) + } + + /// Creates a format pair for HTML to PDF conversion. + #[must_use] + pub fn html_to_pdf() -> Self { + Self::new("text/html", "application/pdf") + } + + /// Creates a format pair for Markdown to HTML conversion. + #[must_use] + pub fn markdown_to_html() -> Self { + Self::new("text/markdown", "text/html") + } +} + +/// Describes a path for converting between formats. +/// +/// Some conversions may require intermediate formats (e.g., DOCX -> HTML -> PDF). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ConversionPath { + /// The steps in the conversion path. + pub steps: Vec, + + /// Whether this path may result in content loss. + pub lossy: bool, + + /// Estimated quality of the conversion (0.0 - 1.0). + pub quality_estimate: f32, +} + +impl ConversionPath { + /// Creates a direct conversion path (single step). + #[must_use] + pub fn direct(source: impl Into, target: impl Into) -> Self { + Self { + steps: vec![ConversionStep::new(source, target)], + lossy: false, + quality_estimate: 1.0, + } + } + + /// Creates a multi-step conversion path. + #[must_use] + pub fn multi_step(steps: Vec) -> Self { + let quality = if steps.is_empty() { + 1.0 + } else { + // Quality degrades with each step + 0.95_f32.powi(steps.len() as i32) + }; + + Self { + steps, + lossy: false, + quality_estimate: quality, + } + } + + /// Marks the path as lossy. + #[must_use] + pub fn as_lossy(mut self) -> Self { + self.lossy = true; + self + } + + /// Sets the quality estimate. + #[must_use] + pub fn with_quality(mut self, quality: f32) -> Self { + self.quality_estimate = quality.clamp(0.0, 1.0); + self + } + + /// Returns whether this is a direct (single-step) conversion. + #[must_use] + pub fn is_direct(&self) -> bool { + self.steps.len() == 1 + } + + /// Returns the number of conversion steps. + #[must_use] + pub fn step_count(&self) -> usize { + self.steps.len() + } + + /// Returns the source format. + #[must_use] + pub fn source(&self) -> Option<&str> { + self.steps.first().map(|s| s.source.as_str()) + } + + /// Returns the target format. + #[must_use] + pub fn target(&self) -> Option<&str> { + self.steps.last().map(|s| s.target.as_str()) + } +} + +/// A single step in a conversion path. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ConversionStep { + /// Source format for this step. + pub source: String, + + /// Target format for this step. + pub target: String, +} + +impl ConversionStep { + /// Creates a new conversion step. + #[must_use] + pub fn new(source: impl Into, target: impl Into) -> Self { + Self { + source: source.into(), + target: target.into(), + } + } +} + +/// Result of a format conversion operation. +#[derive(Debug, Clone)] +pub struct ConversionResult { + /// The converted document bytes. + pub data: Bytes, + + /// The MIME type of the output. + pub mime_type: String, + + /// The conversion path that was used. + pub path: ConversionPath, + + /// Warnings generated during conversion. + pub warnings: Vec, + + /// Elements that could not be converted. + pub skipped_elements: Vec, +} + +impl ConversionResult { + /// Creates a new conversion result. + #[must_use] + pub fn new(data: Bytes, mime_type: impl Into, path: ConversionPath) -> Self { + Self { + data, + mime_type: mime_type.into(), + path, + warnings: vec![], + skipped_elements: vec![], + } + } + + /// Adds a warning. + #[must_use] + pub fn with_warning(mut self, warning: impl Into) -> Self { + self.warnings.push(warning.into()); + self + } + + /// Adds a skipped element. + #[must_use] + pub fn with_skipped(mut self, element: SkippedElement) -> Self { + self.skipped_elements.push(element); + self + } + + /// Returns whether the conversion was lossless. + #[must_use] + pub fn is_lossless(&self) -> bool { + !self.path.lossy && self.skipped_elements.is_empty() + } + + /// Returns the size of the output in bytes. + #[must_use] + pub fn size_bytes(&self) -> usize { + self.data.len() + } +} + +/// An element that was skipped during conversion. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SkippedElement { + /// Type of element (e.g., "image", "table", "formula"). + pub element_type: String, + + /// Reason for skipping. + pub reason: String, + + /// Page or location (if applicable). + pub location: Option, +} + +impl SkippedElement { + /// Creates a new skipped element. + #[must_use] + pub fn new(element_type: impl Into, reason: impl Into) -> Self { + Self { + element_type: element_type.into(), + reason: reason.into(), + location: None, + } + } + + /// Sets the location. + #[must_use] + pub fn with_location(mut self, location: impl Into) -> Self { + self.location = Some(location.into()); + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_pair() { + let pair = FormatPair::new("application/pdf", "text/html"); + assert_eq!(pair.source, "application/pdf"); + assert_eq!(pair.target, "text/html"); + } + + #[test] + fn test_format_pair_presets() { + let docx_to_pdf = FormatPair::docx_to_pdf(); + assert!(docx_to_pdf.source.contains("wordprocessingml")); + assert_eq!(docx_to_pdf.target, "application/pdf"); + } + + #[test] + fn test_conversion_path_direct() { + let path = ConversionPath::direct("application/pdf", "text/html"); + assert!(path.is_direct()); + assert_eq!(path.step_count(), 1); + assert_eq!(path.source(), Some("application/pdf")); + assert_eq!(path.target(), Some("text/html")); + } + + #[test] + fn test_conversion_path_multi_step() { + let path = ConversionPath::multi_step(vec![ + ConversionStep::new("application/pdf", "text/html"), + ConversionStep::new("text/html", "text/markdown"), + ]); + + assert!(!path.is_direct()); + assert_eq!(path.step_count(), 2); + assert!(path.quality_estimate < 1.0); + } + + #[test] + fn test_conversion_result() { + let path = ConversionPath::direct("a", "b"); + let result = ConversionResult::new(Bytes::from_static(b"test"), "text/html", path) + .with_warning("Minor formatting changes"); + + assert_eq!(result.mime_type, "text/html"); + assert!(!result.warnings.is_empty()); + assert!(result.is_lossless()); + } + + #[test] + fn test_skipped_element() { + let element = SkippedElement::new("image", "Unsupported format").with_location("Page 3"); + + assert_eq!(element.element_type, "image"); + assert_eq!(element.location, Some("Page 3".to_string())); + } +} diff --git a/crates/nvisy-document/src/error.rs b/crates/nvisy-document/src/error.rs new file mode 100644 index 0000000..b6b5788 --- /dev/null +++ b/crates/nvisy-document/src/error.rs @@ -0,0 +1,396 @@ +//! Error types for document operations. + +use std::fmt; + +use crate::format::region::RegionId; + +/// A boxed error type for wrapping source errors. +pub type BoxError = Box; + +/// Result type for document operations. +pub type Result = std::result::Result; + +/// The error type for document operations. +#[derive(Debug)] +pub struct Error { + kind: ErrorKind, + source: Option, +} + +/// The kind of error that occurred during a document operation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ErrorKind { + /// The document format is not supported. + UnsupportedFormat { format: String }, + + /// The document could not be parsed. + Parse { message: String }, + + /// The requested operation is not supported by this format. + OperationNotSupported { operation: String }, + + /// A referenced region was not found. + RegionNotFound { id: RegionId }, + + /// A referenced page was not found. + PageNotFound { page: u32 }, + + /// An operation would result in invalid document state. + InvalidOperation { message: String }, + + /// An I/O error occurred. + Io { message: String }, + + /// Serialization/deserialization error. + Serialization { message: String }, + + /// The operation was cancelled. + Cancelled, + + /// A timeout occurred. + Timeout { duration_ms: u64 }, + + /// Resource limit exceeded. + ResourceLimit { resource: String }, + + /// Session error (e.g., invalid session state). + Session { message: String }, + + /// Conversion error. + Conversion { message: String }, + + /// Metadata error. + Metadata { message: String }, + + /// Thumbnail generation error. + Thumbnail { message: String }, + + /// Protected or encrypted document. + Protected { message: String }, +} + +impl Error { + /// Creates a new error with the given kind. + pub fn new(kind: ErrorKind) -> Self { + Self { kind, source: None } + } + + /// Creates a new error with the given kind and source. + pub fn with_source( + kind: ErrorKind, + source: impl std::error::Error + Send + Sync + 'static, + ) -> Self { + Self { + kind, + source: Some(Box::new(source)), + } + } + + /// Returns the kind of error. + pub fn kind(&self) -> &ErrorKind { + &self.kind + } + + /// Consumes the error and returns the kind. + pub fn into_kind(self) -> ErrorKind { + self.kind + } + + /// Returns true if this error is retriable. + pub fn is_retriable(&self) -> bool { + matches!( + self.kind, + ErrorKind::Timeout { .. } | ErrorKind::Io { .. } | ErrorKind::ResourceLimit { .. } + ) + } + + /// Returns true if this error indicates invalid user input. + pub fn is_user_error(&self) -> bool { + matches!( + self.kind, + ErrorKind::RegionNotFound { .. } + | ErrorKind::PageNotFound { .. } + | ErrorKind::InvalidOperation { .. } + | ErrorKind::OperationNotSupported { .. } + ) + } + + // Convenience constructors + + /// Creates a parse error. + pub fn parse(message: impl Into) -> Self { + Self::new(ErrorKind::Parse { + message: message.into(), + }) + } + + /// Creates a parse error with a source. + pub fn parse_with_source( + message: impl Into, + source: impl std::error::Error + Send + Sync + 'static, + ) -> Self { + Self::with_source( + ErrorKind::Parse { + message: message.into(), + }, + source, + ) + } + + /// Creates an unsupported format error. + pub fn unsupported_format(format: impl Into) -> Self { + Self::new(ErrorKind::UnsupportedFormat { + format: format.into(), + }) + } + + /// Creates an operation not supported error. + pub fn operation_not_supported(operation: impl Into) -> Self { + Self::new(ErrorKind::OperationNotSupported { + operation: operation.into(), + }) + } + + /// Creates a region not found error. + pub fn region_not_found(id: RegionId) -> Self { + Self::new(ErrorKind::RegionNotFound { id }) + } + + /// Creates a page not found error. + pub fn page_not_found(page: u32) -> Self { + Self::new(ErrorKind::PageNotFound { page }) + } + + /// Creates an invalid operation error. + pub fn invalid_operation(message: impl Into) -> Self { + Self::new(ErrorKind::InvalidOperation { + message: message.into(), + }) + } + + /// Creates an I/O error. + pub fn io(message: impl Into) -> Self { + Self::new(ErrorKind::Io { + message: message.into(), + }) + } + + /// Creates an I/O error with a source. + pub fn io_with_source( + message: impl Into, + source: impl std::error::Error + Send + Sync + 'static, + ) -> Self { + Self::with_source( + ErrorKind::Io { + message: message.into(), + }, + source, + ) + } + + /// Creates a serialization error. + pub fn serialization(message: impl Into) -> Self { + Self::new(ErrorKind::Serialization { + message: message.into(), + }) + } + + /// Creates a session error. + pub fn session(message: impl Into) -> Self { + Self::new(ErrorKind::Session { + message: message.into(), + }) + } + + /// Creates a timeout error. + pub fn timeout(duration_ms: u64) -> Self { + Self::new(ErrorKind::Timeout { duration_ms }) + } + + /// Creates a resource limit error. + pub fn resource_limit(resource: impl Into) -> Self { + Self::new(ErrorKind::ResourceLimit { + resource: resource.into(), + }) + } + + /// Creates a cancelled error. + pub fn cancelled() -> Self { + Self::new(ErrorKind::Cancelled) + } + + /// Creates a conversion error. + pub fn conversion(message: impl Into) -> Self { + Self::new(ErrorKind::Conversion { + message: message.into(), + }) + } + + /// Creates a conversion error with a source. + pub fn conversion_with_source( + message: impl Into, + source: impl std::error::Error + Send + Sync + 'static, + ) -> Self { + Self::with_source( + ErrorKind::Conversion { + message: message.into(), + }, + source, + ) + } + + /// Creates a metadata error. + pub fn metadata(message: impl Into) -> Self { + Self::new(ErrorKind::Metadata { + message: message.into(), + }) + } + + /// Creates a metadata error with a source. + pub fn metadata_with_source( + message: impl Into, + source: impl std::error::Error + Send + Sync + 'static, + ) -> Self { + Self::with_source( + ErrorKind::Metadata { + message: message.into(), + }, + source, + ) + } + + /// Creates a thumbnail error. + pub fn thumbnail(message: impl Into) -> Self { + Self::new(ErrorKind::Thumbnail { + message: message.into(), + }) + } + + /// Creates a thumbnail error with a source. + pub fn thumbnail_with_source( + message: impl Into, + source: impl std::error::Error + Send + Sync + 'static, + ) -> Self { + Self::with_source( + ErrorKind::Thumbnail { + message: message.into(), + }, + source, + ) + } + + /// Creates a protected document error. + pub fn protected(message: impl Into) -> Self { + Self::new(ErrorKind::Protected { + message: message.into(), + }) + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.kind { + ErrorKind::UnsupportedFormat { format } => write!(f, "unsupported format: {format}"), + ErrorKind::Parse { message } => write!(f, "parse error: {message}"), + ErrorKind::OperationNotSupported { operation } => { + write!(f, "operation not supported: {operation}") + } + ErrorKind::RegionNotFound { id } => write!(f, "region not found: {id}"), + ErrorKind::PageNotFound { page } => write!(f, "page not found: {page}"), + ErrorKind::InvalidOperation { message } => write!(f, "invalid operation: {message}"), + ErrorKind::Io { message } => write!(f, "I/O error: {message}"), + ErrorKind::Serialization { message } => write!(f, "serialization error: {message}"), + ErrorKind::Cancelled => write!(f, "operation cancelled"), + ErrorKind::Timeout { duration_ms } => { + write!(f, "operation timed out after {duration_ms}ms") + } + ErrorKind::ResourceLimit { resource } => { + write!(f, "resource limit exceeded: {resource}") + } + ErrorKind::Session { message } => write!(f, "session error: {message}"), + ErrorKind::Conversion { message } => write!(f, "conversion error: {message}"), + ErrorKind::Metadata { message } => write!(f, "metadata error: {message}"), + ErrorKind::Thumbnail { message } => write!(f, "thumbnail error: {message}"), + ErrorKind::Protected { message } => write!(f, "protected document: {message}"), + } + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.source + .as_ref() + .map(|e| e.as_ref() as &(dyn std::error::Error + 'static)) + } +} + +impl From for Error { + fn from(error: std::io::Error) -> Self { + Self::io_with_source(error.to_string(), error) + } +} + +impl From for Error { + fn from(kind: ErrorKind) -> Self { + Self::new(kind) + } +} + +#[cfg(test)] +mod tests { + use std::error::Error as StdError; + + use super::*; + + #[test] + fn test_error_display() { + let err = Error::region_not_found(RegionId::new()); + let msg = err.to_string(); + assert!(msg.contains("region not found")); + } + + #[test] + fn test_error_kind() { + let err = Error::timeout(1000); + assert!(matches!( + err.kind(), + ErrorKind::Timeout { duration_ms: 1000 } + )); + } + + #[test] + fn test_error_is_retriable() { + assert!(Error::timeout(1000).is_retriable()); + assert!(Error::io("failed").is_retriable()); + assert!(!Error::region_not_found(RegionId::new()).is_retriable()); + } + + #[test] + fn test_error_is_user_error() { + assert!(Error::region_not_found(RegionId::new()).is_user_error()); + assert!(Error::page_not_found(5).is_user_error()); + assert!(!Error::timeout(1000).is_user_error()); + } + + #[test] + fn test_from_io_error() { + let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found"); + let err: Error = io_err.into(); + assert!(matches!(err.kind(), ErrorKind::Io { .. })); + assert!(StdError::source(&err).is_some()); + } + + #[test] + fn test_error_with_source() { + let source = std::io::Error::other("underlying error"); + let err = Error::parse_with_source("failed to parse", source); + assert!(StdError::source(&err).is_some()); + } + + #[test] + fn test_from_error_kind() { + let kind = ErrorKind::Cancelled; + let err: Error = kind.into(); + assert!(matches!(err.kind(), ErrorKind::Cancelled)); + } +} diff --git a/crates/nvisy-document/src/format/capabilities.rs b/crates/nvisy-document/src/format/capabilities.rs new file mode 100644 index 0000000..a8983e9 --- /dev/null +++ b/crates/nvisy-document/src/format/capabilities.rs @@ -0,0 +1,486 @@ +//! Document format capabilities. +//! +//! Different document formats support different operations. This module +//! defines a capability matrix that allows querying what operations +//! are supported by a given format. + +use serde::{Deserialize, Serialize}; + +use crate::operation::{ + ContentOperation, DocumentOperation, EditOperation, InsertOperation, MetadataOperation, + PageOperation, RedactStyle, StructuralOperation, +}; + +/// Describes the capabilities of a document format. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Capabilities { + /// Text editing capabilities. + pub text: TextCapabilities, + + /// Image handling capabilities. + pub image: ImageCapabilities, + + /// Structural capabilities. + pub structure: StructureCapabilities, + + /// Page-level capabilities. + pub page: PageCapabilities, + + /// Metadata capabilities. + pub metadata: MetadataCapabilities, +} + +/// Text editing capabilities. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TextCapabilities { + /// Can read/extract text content. + pub can_read: bool, + + /// Can replace text while preserving formatting. + pub can_replace_preserving_format: bool, + + /// Can replace text (may lose formatting). + pub can_replace: bool, + + /// Can insert new text. + pub can_insert: bool, + + /// Can delete text regions. + pub can_delete: bool, + + /// Supports rich text formatting. + pub supports_rich_text: bool, + + /// Supports font embedding. + pub supports_font_embedding: bool, +} + +/// Image handling capabilities. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ImageCapabilities { + /// Can extract images. + pub can_extract: bool, + + /// Can replace images. + pub can_replace: bool, + + /// Can insert new images. + pub can_insert: bool, + + /// Can redact images with blur. + pub can_blur: bool, + + /// Can redact images with pixelation. + pub can_pixelate: bool, + + /// Supported image formats for insertion. + pub supported_formats: Vec, +} + +/// Structural capabilities. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct StructureCapabilities { + /// Can detect document structure (headings, paragraphs, etc.). + pub can_detect_structure: bool, + + /// Can detect tables. + pub can_detect_tables: bool, + + /// Can modify table structure. + pub can_modify_tables: bool, + + /// Can merge regions. + pub can_merge: bool, + + /// Can split regions. + pub can_split: bool, + + /// Can move regions. + pub can_move: bool, + + /// Can copy regions. + pub can_copy: bool, +} + +/// Page-level capabilities. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct PageCapabilities { + /// Document has pages (vs. flowing text). + pub has_pages: bool, + + /// Can delete pages. + pub can_delete: bool, + + /// Can reorder pages. + pub can_reorder: bool, + + /// Can rotate pages. + pub can_rotate: bool, + + /// Can extract pages to new document. + pub can_extract: bool, + + /// Can split document at page boundaries. + pub can_split: bool, + + /// Can merge multiple documents. + pub can_merge: bool, +} + +/// Metadata capabilities. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct MetadataCapabilities { + /// Can read document metadata. + pub can_read: bool, + + /// Can modify document metadata. + pub can_modify: bool, + + /// Can add annotations/comments. + pub can_annotate: bool, + + /// Annotations are preserved in output. + pub annotations_preserved: bool, +} + +impl Capabilities { + /// Returns capabilities for a format that supports everything. + #[must_use] + pub fn full() -> Self { + Self { + text: TextCapabilities { + can_read: true, + can_replace_preserving_format: true, + can_replace: true, + can_insert: true, + can_delete: true, + supports_rich_text: true, + supports_font_embedding: true, + }, + image: ImageCapabilities { + can_extract: true, + can_replace: true, + can_insert: true, + can_blur: true, + can_pixelate: true, + supported_formats: vec![ + "image/png".to_string(), + "image/jpeg".to_string(), + "image/gif".to_string(), + ], + }, + structure: StructureCapabilities { + can_detect_structure: true, + can_detect_tables: true, + can_modify_tables: true, + can_merge: true, + can_split: true, + can_move: true, + can_copy: true, + }, + page: PageCapabilities { + has_pages: true, + can_delete: true, + can_reorder: true, + can_rotate: true, + can_extract: true, + can_split: true, + can_merge: true, + }, + metadata: MetadataCapabilities { + can_read: true, + can_modify: true, + can_annotate: true, + annotations_preserved: true, + }, + } + } + + /// Returns capabilities for a read-only format. + #[must_use] + pub fn read_only() -> Self { + Self { + text: TextCapabilities { + can_read: true, + can_replace_preserving_format: false, + can_replace: false, + can_insert: false, + can_delete: false, + supports_rich_text: false, + supports_font_embedding: false, + }, + image: ImageCapabilities { + can_extract: true, + ..Default::default() + }, + structure: StructureCapabilities { + can_detect_structure: true, + can_detect_tables: true, + ..Default::default() + }, + page: PageCapabilities { + has_pages: true, + ..Default::default() + }, + metadata: MetadataCapabilities { + can_read: true, + ..Default::default() + }, + } + } + + /// Checks if the format supports a specific operation. + #[must_use] + pub fn supports(&self, operation: &EditOperation) -> OperationSupport { + match operation { + EditOperation::Content(op) => self.supports_content(op), + EditOperation::Insert(op) => self.supports_insert(op), + EditOperation::Structural(op) => self.supports_structural(op), + EditOperation::Page(op) => self.supports_page(op), + EditOperation::Document(op) => self.supports_document(op), + EditOperation::Metadata(op) => self.supports_metadata(op), + } + } + + fn supports_content(&self, op: &ContentOperation) -> OperationSupport { + match op { + ContentOperation::Redact { style, .. } => { + if !self.text.can_delete && !self.text.can_replace { + return OperationSupport::NotSupported; + } + match style { + RedactStyle::Blur { .. } if !self.image.can_blur => { + OperationSupport::Degraded("Blur not supported, will use black box") + } + RedactStyle::Pixelate { .. } if !self.image.can_pixelate => { + OperationSupport::Degraded("Pixelate not supported, will use black box") + } + _ => OperationSupport::Full, + } + } + + ContentOperation::ReplaceText { + preserve_formatting, + .. + } => { + if !self.text.can_replace { + OperationSupport::NotSupported + } else if *preserve_formatting && !self.text.can_replace_preserving_format { + OperationSupport::Degraded("Formatting may not be fully preserved") + } else { + OperationSupport::Full + } + } + + ContentOperation::ReplaceSubstring { .. } => { + if self.text.can_replace { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + + ContentOperation::Delete { .. } => { + if self.text.can_delete { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + } + } + + fn supports_insert(&self, _op: &InsertOperation) -> OperationSupport { + if self.text.can_insert { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + + fn supports_structural(&self, op: &StructuralOperation) -> OperationSupport { + match op { + StructuralOperation::Move { .. } => { + if self.structure.can_move { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + + StructuralOperation::Copy { .. } => { + if self.structure.can_copy { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + + StructuralOperation::Merge { .. } => { + if self.structure.can_merge { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + + StructuralOperation::SplitRegion { .. } => { + if self.structure.can_split { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + } + } + + fn supports_page(&self, op: &PageOperation) -> OperationSupport { + match op { + PageOperation::DeletePages { .. } => { + if self.page.has_pages && self.page.can_delete { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + + PageOperation::ReorderPages { .. } => { + if self.page.has_pages && self.page.can_reorder { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + + PageOperation::RotatePages { .. } => { + if self.page.has_pages && self.page.can_rotate { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + + PageOperation::ExtractPages { .. } => { + if self.page.has_pages && self.page.can_extract { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + } + } + + fn supports_document(&self, op: &DocumentOperation) -> OperationSupport { + match op { + DocumentOperation::Split { .. } => { + if self.page.can_split { + OperationSupport::Full + } else { + OperationSupport::NotSupported + } + } + } + } + + fn supports_metadata(&self, op: &MetadataOperation) -> OperationSupport { + match op { + MetadataOperation::Reclassify { .. } | MetadataOperation::UpdateBounds { .. } => { + OperationSupport::Full + } + + MetadataOperation::Annotate { .. } => { + if self.metadata.can_annotate { + OperationSupport::Full + } else { + OperationSupport::Degraded("Annotations won't be persisted in output") + } + } + } + } +} + +/// Result of checking operation support. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum OperationSupport { + /// Operation is fully supported. + Full, + + /// Operation is supported but may not work perfectly. + Degraded(&'static str), + + /// Operation is not supported. + NotSupported, +} + +impl OperationSupport { + /// Returns true if the operation can be attempted. + #[must_use] + pub const fn is_supported(&self) -> bool { + !matches!(self, Self::NotSupported) + } + + /// Returns true if the operation is fully supported. + #[must_use] + pub const fn is_full(&self) -> bool { + matches!(self, Self::Full) + } +} + +impl Default for TextCapabilities { + fn default() -> Self { + Self { + can_read: true, + can_replace_preserving_format: false, + can_replace: false, + can_insert: false, + can_delete: false, + supports_rich_text: false, + supports_font_embedding: false, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::format::region::RegionId; + + #[test] + fn test_full_capabilities() { + let caps = Capabilities::full(); + let region = RegionId::new(); + + assert!(caps.supports(&EditOperation::redact(region)).is_full()); + assert!(caps.supports(&EditOperation::delete(region)).is_full()); + } + + #[test] + fn test_read_only_capabilities() { + let caps = Capabilities::read_only(); + let region = RegionId::new(); + + assert!(!caps.supports(&EditOperation::delete(region)).is_supported()); + assert!(!caps + .supports(&EditOperation::replace_text(region, "test")) + .is_supported()); + } + + #[test] + fn test_degraded_support() { + let mut caps = Capabilities::full(); + caps.text.can_replace_preserving_format = false; + + let region = RegionId::new(); + let op: EditOperation = ContentOperation::ReplaceText { + target: region, + new_text: "test".to_string(), + preserve_formatting: true, + } + .into(); + + let support = caps.supports(&op); + assert!(support.is_supported()); + assert!(!support.is_full()); + assert!(matches!(support, OperationSupport::Degraded(_))); + } +} diff --git a/crates/nvisy-document/src/format/info.rs b/crates/nvisy-document/src/format/info.rs new file mode 100644 index 0000000..2b984bd --- /dev/null +++ b/crates/nvisy-document/src/format/info.rs @@ -0,0 +1,106 @@ +//! Document information types. + +use jiff::Timestamp; + +/// Information about a loaded document. +#[derive(Debug, Clone, Default)] +pub struct DocumentInfo { + /// Number of pages (if applicable). + pub page_count: Option, + + /// Document title (from metadata). + pub title: Option, + + /// Document author (from metadata). + pub author: Option, + + /// Creation timestamp. + pub created: Option, + + /// Last modified timestamp. + pub modified: Option, + + /// File size in bytes. + pub size_bytes: u64, + + /// MIME type. + pub mime_type: String, +} + +impl DocumentInfo { + /// Creates a new document info with minimal required fields. + #[must_use] + pub fn new(mime_type: impl Into, size_bytes: u64) -> Self { + Self { + page_count: None, + title: None, + author: None, + created: None, + modified: None, + size_bytes, + mime_type: mime_type.into(), + } + } + + /// Sets the page count. + #[must_use] + pub fn with_page_count(mut self, count: u32) -> Self { + self.page_count = Some(count); + self + } + + /// Sets the title. + #[must_use] + pub fn with_title(mut self, title: impl Into) -> Self { + self.title = Some(title.into()); + self + } + + /// Sets the author. + #[must_use] + pub fn with_author(mut self, author: impl Into) -> Self { + self.author = Some(author.into()); + self + } + + /// Sets the creation timestamp. + #[must_use] + pub fn with_created(mut self, created: Timestamp) -> Self { + self.created = Some(created); + self + } + + /// Sets the modified timestamp. + #[must_use] + pub fn with_modified(mut self, modified: Timestamp) -> Self { + self.modified = Some(modified); + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_document_info_builder() { + let info = DocumentInfo::new("application/pdf", 1024) + .with_page_count(10) + .with_title("Test Document") + .with_author("Test Author"); + + assert_eq!(info.mime_type, "application/pdf"); + assert_eq!(info.size_bytes, 1024); + assert_eq!(info.page_count, Some(10)); + assert_eq!(info.title, Some("Test Document".to_string())); + assert_eq!(info.author, Some("Test Author".to_string())); + } + + #[test] + fn test_document_info_default() { + let info = DocumentInfo::default(); + assert!(info.page_count.is_none()); + assert!(info.title.is_none()); + assert_eq!(info.size_bytes, 0); + } +} diff --git a/crates/nvisy-document/src/format/mod.rs b/crates/nvisy-document/src/format/mod.rs new file mode 100644 index 0000000..58d6296 --- /dev/null +++ b/crates/nvisy-document/src/format/mod.rs @@ -0,0 +1,97 @@ +//! Document format abstraction. +//! +//! This module defines the core traits for document handling: +//! +//! - [`DocumentFormat`]: A format handler (class/factory) that can load and create documents +//! - [`Document`]: A loaded document instance for reading document content +//! - [`EditableDocument`]: Extension trait for documents that support editing +//! +//! Think of `DocumentFormat` as a class and `Document` as an instance of that class. + +mod capabilities; +mod info; +mod page; + +pub mod region; + +use std::future::Future; + +use async_trait::async_trait; +use bytes::Bytes; +pub use capabilities::{ + Capabilities, ImageCapabilities, MetadataCapabilities, OperationSupport, PageCapabilities, + StructureCapabilities, TextCapabilities, +}; +pub use info::DocumentInfo; +pub use page::PageOptions; +pub use region::{BoundingBox, Point, Region, RegionId, RegionKind, RegionSource, RegionStatus}; + +use crate::error::Result; +use crate::operation::{EditOperation, EditResult}; + +/// Trait for document format handlers with an associated Document type. +/// +/// A `DocumentFormat` is like a class that knows how to load and create +/// documents of a specific format. Each format implementation provides +/// a concrete `Document` type. +pub trait DocumentFormat: Send + Sync { + /// The concrete document type produced by this format. + type Document: EditableDocument; + + /// Returns the format name (e.g., "pdf", "docx"). + fn name(&self) -> &'static str; + + /// Returns the MIME types this format handles. + fn mime_types(&self) -> &'static [&'static str]; + + /// Returns the file extensions this format handles (without dots). + fn extensions(&self) -> &'static [&'static str]; + + /// Returns the capabilities of this format. + fn capabilities(&self) -> &Capabilities; + + /// Loads a document from bytes. + fn load(&self, data: Bytes) -> impl Future> + Send; + + /// Creates a new empty document. + fn create_empty(&self) -> impl Future> + Send; +} + +/// A loaded document instance (read-only access). +/// +/// Documents provide read access to document content and structure. +/// For editing capabilities, see [`EditableDocument`]. +#[async_trait] +pub trait Document: Send + Sync { + /// Returns document information. + fn info(&self) -> &DocumentInfo; + + /// Returns all regions in the document. + fn regions(&self) -> &[Region]; + + /// Returns regions for a specific page. + fn regions_for_page(&self, page: u32) -> Vec<&Region>; + + /// Finds a region by ID. + fn find_region(&self, id: RegionId) -> Option<&Region>; + + /// Serializes the document to bytes. + async fn serialize(&self) -> Result; +} + +/// Extension trait for documents that support editing. +/// +/// This trait extends [`Document`] with mutation capabilities. +/// Not all document formats support editing - check the format's +/// [`Capabilities`] to determine what operations are supported. +#[async_trait] +pub trait EditableDocument: Document { + /// Applies an edit operation to the document. + async fn apply(&mut self, operation: &EditOperation) -> Result; + + /// Returns whether the document has unsaved changes. + fn is_modified(&self) -> bool; + + /// Extracts regions for specific pages (for streaming/pagination). + async fn extract_page_regions(&mut self, options: &PageOptions) -> Result>; +} diff --git a/crates/nvisy-document/src/format/page.rs b/crates/nvisy-document/src/format/page.rs new file mode 100644 index 0000000..ac11c74 --- /dev/null +++ b/crates/nvisy-document/src/format/page.rs @@ -0,0 +1,81 @@ +//! Page-related types for document operations. + +/// Page extraction options. +#[derive(Debug, Clone, Default)] +pub struct PageOptions { + /// Starting page (0-indexed). + pub start_page: u32, + + /// Number of pages to extract (None = all remaining). + pub page_count: Option, + + /// Whether to include detailed region extraction. + pub extract_regions: bool, +} + +impl PageOptions { + /// Creates options for a single page. + #[must_use] + pub fn single(page: u32) -> Self { + Self { + start_page: page, + page_count: Some(1), + extract_regions: true, + } + } + + /// Creates options for a range of pages. + #[must_use] + pub fn range(start: u32, count: u32) -> Self { + Self { + start_page: start, + page_count: Some(count), + extract_regions: true, + } + } + + /// Creates options for all pages starting from the given page. + #[must_use] + pub fn from_page(start: u32) -> Self { + Self { + start_page: start, + page_count: None, + extract_regions: true, + } + } + + /// Sets whether to extract regions. + #[must_use] + pub fn with_regions(mut self, extract: bool) -> Self { + self.extract_regions = extract; + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_page_options_default() { + let opts = PageOptions::default(); + assert_eq!(opts.start_page, 0); + assert!(opts.page_count.is_none()); + assert!(!opts.extract_regions); + } + + #[test] + fn test_page_options_single() { + let opts = PageOptions::single(5); + assert_eq!(opts.start_page, 5); + assert_eq!(opts.page_count, Some(1)); + assert!(opts.extract_regions); + } + + #[test] + fn test_page_options_range() { + let opts = PageOptions::range(2, 10); + assert_eq!(opts.start_page, 2); + assert_eq!(opts.page_count, Some(10)); + } +} diff --git a/crates/nvisy-document/src/format/region/bounds.rs b/crates/nvisy-document/src/format/region/bounds.rs new file mode 100644 index 0000000..38cb9fc --- /dev/null +++ b/crates/nvisy-document/src/format/region/bounds.rs @@ -0,0 +1,339 @@ +//! Bounding box for document regions. + +use serde::{Deserialize, Serialize}; + +/// A 2D point with floating-point coordinates. +#[derive(Debug, Clone, Copy, PartialEq, Default, Serialize, Deserialize)] +pub struct Point { + /// X coordinate. + pub x: f64, + /// Y coordinate. + pub y: f64, +} + +impl Point { + /// Creates a new point. + #[must_use] + pub const fn new(x: f64, y: f64) -> Self { + Self { x, y } + } + + /// Creates a point at the origin (0, 0). + #[must_use] + pub const fn origin() -> Self { + Self::new(0.0, 0.0) + } + + /// Calculates the Euclidean distance to another point. + #[must_use] + pub fn distance_to(&self, other: &Point) -> f64 { + let dx = self.x - other.x; + let dy = self.y - other.y; + (dx * dx + dy * dy).sqrt() + } + + /// Calculates the midpoint between this point and another. + #[must_use] + pub fn midpoint(&self, other: &Point) -> Point { + Point::new((self.x + other.x) / 2.0, (self.y + other.y) / 2.0) + } + + /// Translates the point by the given offset. + #[must_use] + pub fn translate(&self, dx: f64, dy: f64) -> Point { + Point::new(self.x + dx, self.y + dy) + } +} + +impl From<[f64; 2]> for Point { + fn from(coords: [f64; 2]) -> Self { + Self::new(coords[0], coords[1]) + } +} + +impl From for [f64; 2] { + fn from(point: Point) -> Self { + [point.x, point.y] + } +} + +impl From<(f64, f64)> for Point { + fn from((x, y): (f64, f64)) -> Self { + Self::new(x, y) + } +} + +impl From for (f64, f64) { + fn from(point: Point) -> Self { + (point.x, point.y) + } +} + +/// A bounding box in normalized coordinates (0.0 to 1.0). +/// +/// Coordinates are relative to the page or container dimensions, +/// making them resolution-independent. +/// +/// The coordinate system uses top-left as origin: +/// - `x` increases from left to right +/// - `y` increases from top to bottom +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)] +pub struct BoundingBox { + /// Left edge (0.0 = left edge of page). + pub x: f64, + + /// Top edge (0.0 = top edge of page). + pub y: f64, + + /// Width as fraction of page width. + pub width: f64, + + /// Height as fraction of page height. + pub height: f64, +} + +impl BoundingBox { + /// Creates a new bounding box. + #[must_use] + pub const fn new(x: f64, y: f64, width: f64, height: f64) -> Self { + Self { + x, + y, + width, + height, + } + } + + /// Creates a bounding box covering the entire page. + #[must_use] + pub const fn full_page() -> Self { + Self { + x: 0.0, + y: 0.0, + width: 1.0, + height: 1.0, + } + } + + /// Creates a bounding box from absolute pixel coordinates. + #[must_use] + pub fn from_pixels( + x: u32, + y: u32, + width: u32, + height: u32, + page_width: u32, + page_height: u32, + ) -> Self { + Self { + x: f64::from(x) / f64::from(page_width), + y: f64::from(y) / f64::from(page_height), + width: f64::from(width) / f64::from(page_width), + height: f64::from(height) / f64::from(page_height), + } + } + + /// Converts to absolute pixel coordinates. + #[must_use] + pub fn to_pixels(&self, page_width: u32, page_height: u32) -> (u32, u32, u32, u32) { + let x = (self.x * f64::from(page_width)).round() as u32; + let y = (self.y * f64::from(page_height)).round() as u32; + let w = (self.width * f64::from(page_width)).round() as u32; + let h = (self.height * f64::from(page_height)).round() as u32; + (x, y, w, h) + } + + /// Returns the right edge x-coordinate. + #[must_use] + pub fn right(&self) -> f64 { + self.x + self.width + } + + /// Returns the bottom edge y-coordinate. + #[must_use] + pub fn bottom(&self) -> f64 { + self.y + self.height + } + + /// Returns the center point. + #[must_use] + pub fn center(&self) -> (f64, f64) { + (self.x + self.width / 2.0, self.y + self.height / 2.0) + } + + /// Returns the area of the bounding box. + #[must_use] + pub fn area(&self) -> f64 { + self.width * self.height + } + + /// Checks if this bounding box contains a point. + #[must_use] + pub fn contains_point(&self, x: f64, y: f64) -> bool { + x >= self.x && x <= self.right() && y >= self.y && y <= self.bottom() + } + + /// Checks if this bounding box intersects with another. + #[must_use] + pub fn intersects(&self, other: &Self) -> bool { + self.x < other.right() + && self.right() > other.x + && self.y < other.bottom() + && self.bottom() > other.y + } + + /// Returns the intersection of two bounding boxes, if any. + #[must_use] + pub fn intersection(&self, other: &Self) -> Option { + if !self.intersects(other) { + return None; + } + + let x = self.x.max(other.x); + let y = self.y.max(other.y); + let right = self.right().min(other.right()); + let bottom = self.bottom().min(other.bottom()); + + Some(Self { + x, + y, + width: right - x, + height: bottom - y, + }) + } + + /// Returns the union (bounding box containing both). + #[must_use] + pub fn union(&self, other: &Self) -> Self { + let x = self.x.min(other.x); + let y = self.y.min(other.y); + let right = self.right().max(other.right()); + let bottom = self.bottom().max(other.bottom()); + + Self { + x, + y, + width: right - x, + height: bottom - y, + } + } + + /// Calculates the Intersection over Union (IoU) with another bounding box. + /// + /// Returns a value between 0.0 (no overlap) and 1.0 (identical boxes). + #[must_use] + pub fn iou(&self, other: &Self) -> f64 { + let intersection_area = self.intersection(other).map_or(0.0, |b| b.area()); + let union_area = self.area() + other.area() - intersection_area; + + if union_area == 0.0 { + 0.0 + } else { + intersection_area / union_area + } + } + + /// Expands the bounding box by a margin. + #[must_use] + pub fn expand(&self, margin: f64) -> Self { + Self { + x: (self.x - margin).max(0.0), + y: (self.y - margin).max(0.0), + width: (self.width + 2.0 * margin).min(1.0 - self.x + margin), + height: (self.height + 2.0 * margin).min(1.0 - self.y + margin), + } + } +} + +impl Default for BoundingBox { + fn default() -> Self { + Self::full_page() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_point() { + let p1 = Point::new(0.0, 0.0); + let p2 = Point::new(3.0, 4.0); + assert!((p1.distance_to(&p2) - 5.0).abs() < f64::EPSILON); + + let mid = p1.midpoint(&p2); + assert!((mid.x - 1.5).abs() < f64::EPSILON); + assert!((mid.y - 2.0).abs() < f64::EPSILON); + } + + #[test] + fn test_new() { + let bbox = BoundingBox::new(0.1, 0.2, 0.3, 0.4); + assert!((bbox.x - 0.1).abs() < f64::EPSILON); + assert!((bbox.y - 0.2).abs() < f64::EPSILON); + assert!((bbox.width - 0.3).abs() < f64::EPSILON); + assert!((bbox.height - 0.4).abs() < f64::EPSILON); + } + + #[test] + fn test_from_pixels() { + let bbox = BoundingBox::from_pixels(100, 200, 300, 400, 1000, 1000); + assert!((bbox.x - 0.1).abs() < f64::EPSILON); + assert!((bbox.y - 0.2).abs() < f64::EPSILON); + assert!((bbox.width - 0.3).abs() < f64::EPSILON); + assert!((bbox.height - 0.4).abs() < f64::EPSILON); + } + + #[test] + fn test_to_pixels() { + let bbox = BoundingBox::new(0.1, 0.2, 0.3, 0.4); + let (x, y, w, h) = bbox.to_pixels(1000, 1000); + assert_eq!(x, 100); + assert_eq!(y, 200); + assert_eq!(w, 300); + assert_eq!(h, 400); + } + + #[test] + fn test_intersection() { + let a = BoundingBox::new(0.0, 0.0, 0.5, 0.5); + let b = BoundingBox::new(0.25, 0.25, 0.5, 0.5); + + assert!(a.intersects(&b)); + + let intersection = a.intersection(&b).unwrap(); + assert!((intersection.x - 0.25).abs() < f64::EPSILON); + assert!((intersection.y - 0.25).abs() < f64::EPSILON); + assert!((intersection.width - 0.25).abs() < f64::EPSILON); + assert!((intersection.height - 0.25).abs() < f64::EPSILON); + } + + #[test] + fn test_no_intersection() { + let a = BoundingBox::new(0.0, 0.0, 0.2, 0.2); + let b = BoundingBox::new(0.5, 0.5, 0.2, 0.2); + + assert!(!a.intersects(&b)); + assert!(a.intersection(&b).is_none()); + } + + #[test] + fn test_iou() { + let a = BoundingBox::new(0.0, 0.0, 0.5, 0.5); + let b = BoundingBox::new(0.0, 0.0, 0.5, 0.5); + + assert!((a.iou(&b) - 1.0).abs() < f64::EPSILON); + + let c = BoundingBox::new(0.6, 0.6, 0.2, 0.2); + assert!(a.iou(&c).abs() < f64::EPSILON); + } + + #[test] + fn test_serde() { + let bbox = BoundingBox::new(0.1, 0.2, 0.3, 0.4); + let json = serde_json::to_string(&bbox).unwrap(); + let parsed: BoundingBox = serde_json::from_str(&json).unwrap(); + assert!((bbox.x - parsed.x).abs() < f64::EPSILON); + assert!((bbox.y - parsed.y).abs() < f64::EPSILON); + } +} diff --git a/crates/nvisy-document/src/format/region/core.rs b/crates/nvisy-document/src/format/region/core.rs new file mode 100644 index 0000000..2e10c4e --- /dev/null +++ b/crates/nvisy-document/src/format/region/core.rs @@ -0,0 +1,253 @@ +//! Core Region type. + +use std::num::NonZeroU32; + +use serde::{Deserialize, Serialize}; + +use super::{BoundingBox, RegionId, RegionKind, RegionSource, RegionStatus}; + +/// A region within a document that can be referenced and modified. +/// +/// Regions are the fundamental unit for VLM-driven document editing. +/// Each region has a stable ID, spatial bounds, and optional text content. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Region { + /// Unique identifier for this region. + pub id: RegionId, + + /// Page number (1-indexed), if applicable. + pub page: Option, + + /// Bounding box in normalized coordinates (0.0-1.0). + pub bounds: BoundingBox, + + /// Text content within this region, if extractable. + pub text: Option, + + /// Semantic type of this region. + pub kind: RegionKind, + + /// Current status within the edit session (None means Active). + pub status: Option, + + /// How this region was identified/created. + pub source: RegionSource, + + /// Parent region ID, if this is a nested region. + pub parent: Option, + + /// Child region IDs, if this is a container. + pub children: Vec, +} + +impl Region { + /// Creates a new region with the given bounds. + #[must_use] + pub fn new(bounds: BoundingBox) -> Self { + Self { + id: RegionId::new(), + page: None, + bounds, + text: None, + kind: RegionKind::Unknown, + status: None, + source: RegionSource::Parser, + parent: None, + children: Vec::new(), + } + } + + /// Creates a simple text region with default bounds. + #[must_use] + pub fn text(content: impl Into) -> Self { + Self { + id: RegionId::new(), + page: None, + bounds: BoundingBox::default(), + text: Some(content.into()), + kind: RegionKind::Text, + status: None, + source: RegionSource::Parser, + parent: None, + children: Vec::new(), + } + } + + /// Creates a new region on a specific page. + #[must_use] + pub fn on_page(page: NonZeroU32, bounds: BoundingBox) -> Self { + Self { + page: Some(page), + ..Self::new(bounds) + } + } + + /// Sets the text content. + #[must_use] + pub fn with_text(mut self, text: impl Into) -> Self { + self.text = Some(text.into()); + self + } + + /// Sets the region kind. + #[must_use] + pub fn with_kind(mut self, kind: RegionKind) -> Self { + self.kind = kind; + self + } + + /// Sets the region source. + #[must_use] + pub fn with_source(mut self, source: RegionSource) -> Self { + self.source = source; + self + } + + /// Sets the parent region. + #[must_use] + pub fn with_parent(mut self, parent: RegionId) -> Self { + self.parent = Some(parent); + self + } + + /// Sets the region status. + #[must_use] + pub fn with_status(mut self, status: RegionStatus) -> Self { + self.status = Some(status); + self + } + + /// Adds a child region ID. + pub fn add_child(&mut self, child: RegionId) { + self.children.push(child); + } + + /// Returns the effective status (defaults to Active if None). + #[must_use] + pub fn effective_status(&self) -> RegionStatus { + self.status.unwrap_or(RegionStatus::Active) + } + + /// Returns true if this region is still valid for operations. + #[must_use] + pub fn is_valid(&self) -> bool { + self.effective_status().is_valid() + } + + /// Returns true if this region has text content. + #[must_use] + pub fn has_text(&self) -> bool { + self.text.as_ref().is_some_and(|t| !t.is_empty()) + } + + /// Returns true if this region is a container for other regions. + #[must_use] + pub fn is_container(&self) -> bool { + self.kind.is_container() || !self.children.is_empty() + } + + /// Returns true if this region can have its text edited. + #[must_use] + pub fn is_text_editable(&self) -> bool { + self.kind.is_text_editable() && self.is_valid() + } + + /// Marks the region as modified. + pub fn mark_modified(&mut self) { + if self.effective_status() == RegionStatus::Active { + self.status = Some(RegionStatus::Modified); + } + } + + /// Marks the region as deleted. + pub fn mark_deleted(&mut self) { + self.status = Some(RegionStatus::Deleted); + } + + /// Updates the text content and marks as modified. + pub fn update_text(&mut self, new_text: String) { + self.text = Some(new_text); + self.mark_modified(); + } + + /// Updates the bounds and marks as modified. + pub fn update_bounds(&mut self, new_bounds: BoundingBox) { + self.bounds = new_bounds; + self.mark_modified(); + } +} + +impl Default for Region { + fn default() -> Self { + Self::new(BoundingBox::default()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_region_creation() { + let bounds = BoundingBox::new(0.1, 0.2, 0.3, 0.4); + let region = Region::new(bounds); + + assert!(region.is_valid()); + assert!(!region.has_text()); + assert_eq!(region.kind, RegionKind::Unknown); + assert_eq!(region.effective_status(), RegionStatus::Active); + assert!(region.status.is_none()); + } + + #[test] + fn test_region_builder() { + let page = NonZeroU32::new(1).unwrap(); + let region = Region::on_page(page, BoundingBox::new(0.0, 0.0, 0.5, 0.5)) + .with_text("Hello, world!") + .with_kind(RegionKind::Text); + + assert_eq!(region.page, Some(page)); + assert_eq!(region.text.as_deref(), Some("Hello, world!")); + assert_eq!(region.kind, RegionKind::Text); + } + + #[test] + fn test_region_modification() { + let mut region = Region::new(BoundingBox::default()).with_text("Original"); + + assert!(region.status.is_none()); + assert_eq!(region.effective_status(), RegionStatus::Active); + + region.update_text("Modified".to_string()); + + assert_eq!(region.status, Some(RegionStatus::Modified)); + assert_eq!(region.text.as_deref(), Some("Modified")); + } + + #[test] + fn test_region_deletion() { + let mut region = Region::new(BoundingBox::default()); + assert!(region.is_valid()); + + region.mark_deleted(); + + assert!(!region.is_valid()); + assert_eq!(region.status, Some(RegionStatus::Deleted)); + } + + #[test] + fn test_region_serde() { + let page = NonZeroU32::new(2).unwrap(); + let region = Region::on_page(page, BoundingBox::new(0.1, 0.2, 0.3, 0.4)) + .with_text("Test") + .with_kind(RegionKind::Heading); + + let json = serde_json::to_string(®ion).unwrap(); + let parsed: Region = serde_json::from_str(&json).unwrap(); + + assert_eq!(region.id, parsed.id); + assert_eq!(region.page, parsed.page); + assert_eq!(region.text, parsed.text); + assert_eq!(region.kind, parsed.kind); + } +} diff --git a/crates/nvisy-document/src/format/region/id.rs b/crates/nvisy-document/src/format/region/id.rs new file mode 100644 index 0000000..ae04131 --- /dev/null +++ b/crates/nvisy-document/src/format/region/id.rs @@ -0,0 +1,85 @@ +//! Region identifier types. + +use std::fmt; + +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// Unique identifier for a region within a document session. +/// +/// Region IDs are stable across edits within the same session, allowing +/// VLM-driven workflows to reference regions across multiple turns. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct RegionId(Uuid); + +impl RegionId { + /// Creates a new unique region ID. + #[must_use] + pub fn new() -> Self { + Self(Uuid::new_v4()) + } + + /// Creates a region ID from an existing UUID. + #[must_use] + pub fn from_uuid(uuid: Uuid) -> Self { + Self(uuid) + } + + /// Returns the underlying UUID. + #[must_use] + pub fn as_uuid(&self) -> Uuid { + self.0 + } +} + +impl Default for RegionId { + fn default() -> Self { + Self::new() + } +} + +impl fmt::Display for RegionId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "region_{}", &self.0.to_string()[..8]) + } +} + +impl From for RegionId { + fn from(uuid: Uuid) -> Self { + Self(uuid) + } +} + +impl From for Uuid { + fn from(id: RegionId) -> Self { + id.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_region_id_uniqueness() { + let id1 = RegionId::new(); + let id2 = RegionId::new(); + assert_ne!(id1, id2); + } + + #[test] + fn test_region_id_display() { + let id = RegionId::new(); + let display = format!("{}", id); + assert!(display.starts_with("region_")); + assert_eq!(display.len(), 15); // "region_" + 8 chars + } + + #[test] + fn test_region_id_serde() { + let id = RegionId::new(); + let json = serde_json::to_string(&id).unwrap(); + let parsed: RegionId = serde_json::from_str(&json).unwrap(); + assert_eq!(id, parsed); + } +} diff --git a/crates/nvisy-document/src/format/region/kind.rs b/crates/nvisy-document/src/format/region/kind.rs new file mode 100644 index 0000000..2d5182d --- /dev/null +++ b/crates/nvisy-document/src/format/region/kind.rs @@ -0,0 +1,150 @@ +//! Region kind classification. + +use serde::{Deserialize, Serialize}; + +/// Classification of a document region by its semantic type. +/// +/// This helps VLMs understand the context of each region and +/// guides appropriate editing operations. +#[derive( + Debug, + Default, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize +)] +#[serde(rename_all = "snake_case")] +pub enum RegionKind { + /// Plain text content (paragraphs, sentences). + Text, + + /// Heading or title text. + Heading, + + /// Tabular data structure. + Table, + + /// Table row (child of Table). + TableRow, + + /// Table cell (child of TableRow). + TableCell, + + /// Embedded image or graphic. + Image, + + /// Bulleted or numbered list. + List, + + /// Individual list item. + ListItem, + + /// Page header region. + Header, + + /// Page footer region. + Footer, + + /// Footnote or endnote. + Footnote, + + /// Form field or interactive element. + FormField, + + /// Code block or preformatted text. + Code, + + /// Block quote or citation. + Quote, + + /// Mathematical formula or equation. + Formula, + + /// Hyperlink or reference. + Link, + + /// Annotation or comment. + Annotation, + + /// Unknown or unclassified content. + #[default] + Unknown, +} + +impl RegionKind { + /// Returns true if this region typically contains editable text. + #[must_use] + pub const fn is_text_editable(&self) -> bool { + matches!( + self, + Self::Text + | Self::Heading + | Self::TableCell + | Self::ListItem + | Self::Header + | Self::Footer + | Self::Footnote + | Self::Code + | Self::Quote + ) + } + + /// Returns true if this region is a container for other regions. + #[must_use] + pub const fn is_container(&self) -> bool { + matches!(self, Self::Table | Self::TableRow | Self::List) + } + + /// Returns true if this region can be redacted. + #[must_use] + pub const fn is_redactable(&self) -> bool { + !matches!(self, Self::Unknown) + } + + /// Returns true if this region can be deleted. + #[must_use] + pub const fn is_deletable(&self) -> bool { + true + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_text_editable() { + assert!(RegionKind::Text.is_text_editable()); + assert!(RegionKind::Heading.is_text_editable()); + assert!(RegionKind::TableCell.is_text_editable()); + assert!(!RegionKind::Image.is_text_editable()); + assert!(!RegionKind::Table.is_text_editable()); + } + + #[test] + fn test_container() { + assert!(RegionKind::Table.is_container()); + assert!(RegionKind::List.is_container()); + assert!(!RegionKind::Text.is_container()); + assert!(!RegionKind::TableCell.is_container()); + } + + #[test] + fn test_default() { + assert_eq!(RegionKind::default(), RegionKind::Unknown); + } + + #[test] + fn test_serde() { + let kind = RegionKind::TableCell; + let json = serde_json::to_string(&kind).unwrap(); + assert_eq!(json, "\"table_cell\""); + + let parsed: RegionKind = serde_json::from_str(&json).unwrap(); + assert_eq!(kind, parsed); + } +} diff --git a/crates/nvisy-document/src/format/region/mod.rs b/crates/nvisy-document/src/format/region/mod.rs new file mode 100644 index 0000000..adf896e --- /dev/null +++ b/crates/nvisy-document/src/format/region/mod.rs @@ -0,0 +1,20 @@ +//! Region types for document manipulation. +//! +//! Regions are the fundamental unit for VLM-driven document editing. +//! Each region represents a semantically meaningful part of a document +//! (paragraph, table, image, etc.) that can be referenced and modified. + +mod bounds; +mod core; +mod id; +mod kind; +mod source; +mod status; + +pub use core::Region; + +pub use bounds::{BoundingBox, Point}; +pub use id::RegionId; +pub use kind::RegionKind; +pub use source::RegionSource; +pub use status::RegionStatus; diff --git a/crates/nvisy-document/src/format/region/source.rs b/crates/nvisy-document/src/format/region/source.rs new file mode 100644 index 0000000..0774575 --- /dev/null +++ b/crates/nvisy-document/src/format/region/source.rs @@ -0,0 +1,63 @@ +//! Region source tracking. + +use derive_more::Display; +use serde::{Deserialize, Serialize}; + +/// How a region was identified/created. +/// +/// Tracks the origin of each region for debugging and +/// to handle different region types appropriately. +#[derive( + Debug, + Display, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Default, + Serialize, + Deserialize +)] +#[serde(rename_all = "snake_case")] +pub enum RegionSource { + /// Region was extracted by the format parser. + #[default] + #[display("parser")] + Parser, + + /// Region was identified by LLM analysis. + #[display("worker")] + Worker, + + /// Region was created by the user. + #[display("user")] + User, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default() { + assert_eq!(RegionSource::default(), RegionSource::Parser); + } + + #[test] + fn test_display() { + assert_eq!(RegionSource::Parser.to_string(), "parser"); + assert_eq!(RegionSource::Worker.to_string(), "worker"); + assert_eq!(RegionSource::User.to_string(), "user"); + } + + #[test] + fn test_serde() { + let source = RegionSource::Worker; + let json = serde_json::to_string(&source).unwrap(); + assert_eq!(json, "\"worker\""); + + let parsed: RegionSource = serde_json::from_str(&json).unwrap(); + assert_eq!(source, parsed); + } +} diff --git a/crates/nvisy-document/src/format/region/status.rs b/crates/nvisy-document/src/format/region/status.rs new file mode 100644 index 0000000..7402926 --- /dev/null +++ b/crates/nvisy-document/src/format/region/status.rs @@ -0,0 +1,86 @@ +//! Region status tracking. + +use serde::{Deserialize, Serialize}; + +/// Status of a region within an edit session. +/// +/// Tracks the lifecycle of regions as edits are applied, +/// enabling stable references across multi-turn VLM interactions. +#[derive( + Debug, + Default, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize +)] +#[serde(rename_all = "snake_case")] +pub enum RegionStatus { + /// Region is active and unchanged from its original state. + #[default] + Active, + + /// Region content has been modified. + Modified, + + /// Region has been deleted. + Deleted, + + /// Region was split into multiple regions. + Split, + + /// Region was merged with another region. + Merged, + + /// Region was created during this session (not in original document). + Created, +} + +impl RegionStatus { + /// Returns true if the region is still valid for operations. + #[must_use] + pub const fn is_valid(&self) -> bool { + matches!(self, Self::Active | Self::Modified | Self::Created) + } + + /// Returns true if the region has been removed. + #[must_use] + pub const fn is_removed(&self) -> bool { + matches!(self, Self::Deleted | Self::Merged) + } + + /// Returns true if the region was changed from its original state. + #[must_use] + pub const fn is_changed(&self) -> bool { + !matches!(self, Self::Active) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_status_validity() { + assert!(RegionStatus::Active.is_valid()); + assert!(RegionStatus::Modified.is_valid()); + assert!(RegionStatus::Created.is_valid()); + assert!(!RegionStatus::Deleted.is_valid()); + assert!(!RegionStatus::Merged.is_valid()); + } + + #[test] + fn test_status_removed() { + assert!(!RegionStatus::Active.is_removed()); + assert!(RegionStatus::Deleted.is_removed()); + assert!(RegionStatus::Merged.is_removed()); + } + + #[test] + fn test_default() { + assert_eq!(RegionStatus::default(), RegionStatus::Active); + } +} diff --git a/crates/nvisy-document/src/lib.rs b/crates/nvisy-document/src/lib.rs new file mode 100644 index 0000000..f85af03 --- /dev/null +++ b/crates/nvisy-document/src/lib.rs @@ -0,0 +1,77 @@ +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +//! # nvisy-document +//! +//! Document manipulation library for VLM-driven editing workflows. +//! +//! This crate provides a format-agnostic abstraction for document editing, +//! designed to support Vision Language Model (VLM) function calls for +//! operations like redaction, text replacement, splitting, and merging. +//! +//! ## Core Concepts +//! +//! - **[`DocumentFormat`]**: A format handler (like a class) that can load +//! and create documents. Implementations know about format capabilities +//! and how to parse/serialize documents. +//! +//! - **[`Document`]**: A loaded document instance for reading document content. +//! Think of this as an instance of a DocumentFormat. +//! +//! - **[`EditableDocument`]**: Extension trait for documents that support editing. +//! +//! - **[`Region`]**: Semantic units within a document (text blocks, images, +//! tables) with stable IDs that persist across edit sessions. +//! +//! - **[`EditOperation`]**: Edit commands that target regions by ID, +//! supporting undo/redo and batch operations. +//! +//! ## Extension Traits +//! +//! Document implementations can optionally implement these extension traits: +//! +//! - [`Conversion`]: Convert documents to other formats +//! - [`Metadata`]: Extract and modify document metadata +//! - [`ThumbnailGenerator`]: Generate thumbnail images + +// Core modules +pub mod error; +pub mod format; +pub mod operation; + +// Extension trait modules +pub mod conversion; +pub mod metadata; +pub mod thumbnail; + +// Error re-exports +// Conversion re-exports +pub use conversion::{ + Conversion, ConversionOptions, ConversionPath, ConversionResult, ConversionStep, FormatPair, + HtmlOptions, PageMargins, PageOrientation, PdfOptions, SkippedElement, +}; +pub use error::{BoxError, Error, ErrorKind, Result}; +// Region re-exports (from format::region) +pub use format::region::{ + BoundingBox, Point, Region, RegionId, RegionKind, RegionSource, RegionStatus, +}; +// Format re-exports +pub use format::{ + Capabilities, Document, DocumentFormat, DocumentInfo, EditableDocument, ImageCapabilities, + MetadataCapabilities, OperationSupport, PageCapabilities, PageOptions, StructureCapabilities, + TextCapabilities, +}; +// Metadata re-exports +pub use metadata::{ + CustomProperty, DocumentMetadata, Metadata, MetadataExtractOptions, MetadataField, + PropertyValue, +}; +// Operation re-exports +pub use operation::{ + ContentOperation, DocumentOperation, EditOperation, EditResult, InsertContent, InsertOperation, + MergeOrder, MetadataOperation, PageOperation, RedactStyle, SplitBoundary, StructuralOperation, + TextStyle, +}; +// Thumbnail re-exports +pub use thumbnail::{ImageFormat, Thumbnail, ThumbnailGenerator, ThumbnailOptions, ThumbnailSize}; diff --git a/crates/nvisy-document/src/metadata/extract.rs b/crates/nvisy-document/src/metadata/extract.rs new file mode 100644 index 0000000..5ba2243 --- /dev/null +++ b/crates/nvisy-document/src/metadata/extract.rs @@ -0,0 +1,118 @@ +//! Metadata extraction options and utilities. + +use serde::{Deserialize, Serialize}; + +/// Options for metadata extraction. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct MetadataExtractOptions { + /// Whether to include custom/extended properties. + pub include_custom: bool, + + /// Whether to include raw format-specific metadata. + pub include_raw: bool, + + /// Whether to compute word/character counts (may be slow for large documents). + pub compute_counts: bool, + + /// Maximum depth for nested metadata structures. + pub max_depth: Option, + + /// Specific fields to extract (empty = all fields). + pub fields: Vec, +} + +impl MetadataExtractOptions { + /// Creates options for basic metadata extraction. + #[must_use] + pub fn basic() -> Self { + Self { + include_custom: false, + include_raw: false, + compute_counts: false, + max_depth: Some(1), + fields: vec![], + } + } + + /// Creates options for full metadata extraction. + #[must_use] + pub fn full() -> Self { + Self { + include_custom: true, + include_raw: true, + compute_counts: true, + max_depth: None, + fields: vec![], + } + } + + /// Enables custom property extraction. + #[must_use] + pub fn with_custom(mut self) -> Self { + self.include_custom = true; + self + } + + /// Enables raw metadata extraction. + #[must_use] + pub fn with_raw(mut self) -> Self { + self.include_raw = true; + self + } + + /// Enables word/character count computation. + #[must_use] + pub fn with_counts(mut self) -> Self { + self.compute_counts = true; + self + } + + /// Sets the maximum depth for nested metadata. + #[must_use] + pub fn with_max_depth(mut self, depth: u32) -> Self { + self.max_depth = Some(depth); + self + } + + /// Limits extraction to specific fields. + #[must_use] + pub fn with_fields(mut self, fields: impl IntoIterator>) -> Self { + self.fields = fields.into_iter().map(Into::into).collect(); + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basic_options() { + let opts = MetadataExtractOptions::basic(); + assert!(!opts.include_custom); + assert!(!opts.include_raw); + assert!(!opts.compute_counts); + assert_eq!(opts.max_depth, Some(1)); + } + + #[test] + fn test_full_options() { + let opts = MetadataExtractOptions::full(); + assert!(opts.include_custom); + assert!(opts.include_raw); + assert!(opts.compute_counts); + assert!(opts.max_depth.is_none()); + } + + #[test] + fn test_builder() { + let opts = MetadataExtractOptions::basic() + .with_custom() + .with_counts() + .with_fields(["title", "author"]); + + assert!(opts.include_custom); + assert!(opts.compute_counts); + assert_eq!(opts.fields.len(), 2); + } +} diff --git a/crates/nvisy-document/src/metadata/mod.rs b/crates/nvisy-document/src/metadata/mod.rs new file mode 100644 index 0000000..80e7568 --- /dev/null +++ b/crates/nvisy-document/src/metadata/mod.rs @@ -0,0 +1,52 @@ +//! Document metadata extraction and manipulation traits and types. +//! +//! This module defines the [`Metadata`] trait for extracting and modifying +//! document metadata. + +mod extract; +mod types; + +use async_trait::async_trait; +pub use extract::MetadataExtractOptions; +pub use types::{CustomProperty, DocumentMetadata, MetadataField, PropertyValue}; + +use crate::error::Result; +use crate::format::Document; + +/// Trait for document metadata extraction and manipulation. +/// +/// This trait is implemented by [`Document`] types that support reading +/// and modifying document metadata. +#[async_trait] +pub trait Metadata: Document { + /// Returns the metadata fields supported by this document's format. + fn supported_fields(&self) -> &[MetadataField]; + + /// Returns whether metadata modification is supported. + fn supports_modification(&self) -> bool; + + /// Extracts metadata from this document. + /// + /// # Arguments + /// + /// * `options` - Optional extraction options + async fn extract(&self, options: Option<&MetadataExtractOptions>) -> Result; + + /// Sets a metadata field value. + /// + /// # Arguments + /// + /// * `field` - The metadata field to set + /// * `value` - The value to set + async fn set_field(&mut self, field: MetadataField, value: PropertyValue) -> Result<()>; + + /// Removes a metadata field. + /// + /// # Arguments + /// + /// * `field` - The metadata field to remove + async fn remove_field(&mut self, field: MetadataField) -> Result<()>; + + /// Strips all metadata from this document. + async fn strip_all(&mut self) -> Result<()>; +} diff --git a/crates/nvisy-document/src/metadata/types.rs b/crates/nvisy-document/src/metadata/types.rs new file mode 100644 index 0000000..9d2e16e --- /dev/null +++ b/crates/nvisy-document/src/metadata/types.rs @@ -0,0 +1,365 @@ +//! Metadata types and structures. + +use jiff::Timestamp; +use serde::{Deserialize, Serialize}; + +/// Document metadata container. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct DocumentMetadata { + /// Document title. + pub title: Option, + + /// Document author(s). + pub author: Option, + + /// Document subject or description. + pub subject: Option, + + /// Keywords associated with the document. + pub keywords: Vec, + + /// Application that created the document. + pub creator: Option, + + /// Application that produced the document (e.g., PDF producer). + pub producer: Option, + + /// Document creation timestamp. + pub created: Option, + + /// Document last modification timestamp. + pub modified: Option, + + /// Language of the document (ISO 639-1 code). + pub language: Option, + + /// Number of pages (if applicable). + pub page_count: Option, + + /// Word count (if available). + pub word_count: Option, + + /// Character count (if available). + pub character_count: Option, + + /// Document revision number. + pub revision: Option, + + /// Custom/extended properties. + pub custom: Vec, + + /// Raw metadata for format-specific access. + #[serde(skip)] + pub raw: Option, +} + +impl DocumentMetadata { + /// Creates a new empty metadata container. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Sets the title. + #[must_use] + pub fn with_title(mut self, title: impl Into) -> Self { + self.title = Some(title.into()); + self + } + + /// Sets the author. + #[must_use] + pub fn with_author(mut self, author: impl Into) -> Self { + self.author = Some(author.into()); + self + } + + /// Sets the creation timestamp. + #[must_use] + pub fn with_created(mut self, created: Timestamp) -> Self { + self.created = Some(created); + self + } + + /// Sets the modification timestamp. + #[must_use] + pub fn with_modified(mut self, modified: Timestamp) -> Self { + self.modified = Some(modified); + self + } + + /// Adds a keyword. + #[must_use] + pub fn with_keyword(mut self, keyword: impl Into) -> Self { + self.keywords.push(keyword.into()); + self + } + + /// Adds a custom property. + #[must_use] + pub fn with_custom(mut self, property: CustomProperty) -> Self { + self.custom.push(property); + self + } + + /// Gets a standard field value by name. + #[must_use] + pub fn get_field(&self, field: MetadataField) -> Option { + match field { + MetadataField::Title => self.title.clone().map(PropertyValue::String), + MetadataField::Author => self.author.clone().map(PropertyValue::String), + MetadataField::Subject => self.subject.clone().map(PropertyValue::String), + MetadataField::Creator => self.creator.clone().map(PropertyValue::String), + MetadataField::Producer => self.producer.clone().map(PropertyValue::String), + MetadataField::Language => self.language.clone().map(PropertyValue::String), + MetadataField::Created => self.created.map(PropertyValue::Timestamp), + MetadataField::Modified => self.modified.map(PropertyValue::Timestamp), + MetadataField::PageCount => self.page_count.map(|v| PropertyValue::Integer(v as i64)), + MetadataField::WordCount => self.word_count.map(|v| PropertyValue::Integer(v as i64)), + MetadataField::Revision => self.revision.map(|v| PropertyValue::Integer(v as i64)), + MetadataField::Keywords => Some(PropertyValue::StringList(self.keywords.clone())), + } + } + + /// Gets a custom property by name. + #[must_use] + pub fn get_custom(&self, name: &str) -> Option<&CustomProperty> { + self.custom.iter().find(|p| p.name == name) + } + + /// Returns whether any metadata is present. + #[must_use] + pub fn is_empty(&self) -> bool { + self.title.is_none() + && self.author.is_none() + && self.subject.is_none() + && self.keywords.is_empty() + && self.creator.is_none() + && self.producer.is_none() + && self.created.is_none() + && self.modified.is_none() + && self.custom.is_empty() + } +} + +/// Standard metadata fields. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum MetadataField { + /// Document title. + Title, + /// Document author. + Author, + /// Document subject/description. + Subject, + /// Application that created the document. + Creator, + /// Application that produced the document. + Producer, + /// Document language. + Language, + /// Creation timestamp. + Created, + /// Last modification timestamp. + Modified, + /// Page count. + PageCount, + /// Word count. + WordCount, + /// Revision number. + Revision, + /// Keywords list. + Keywords, +} + +impl MetadataField { + /// Returns the field name as a string. + #[must_use] + pub fn as_str(&self) -> &'static str { + match self { + Self::Title => "title", + Self::Author => "author", + Self::Subject => "subject", + Self::Creator => "creator", + Self::Producer => "producer", + Self::Language => "language", + Self::Created => "created", + Self::Modified => "modified", + Self::PageCount => "page_count", + Self::WordCount => "word_count", + Self::Revision => "revision", + Self::Keywords => "keywords", + } + } +} + +/// A custom metadata property. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CustomProperty { + /// Property name. + pub name: String, + + /// Property value. + pub value: PropertyValue, + + /// Property namespace (for XML-based formats). + pub namespace: Option, +} + +impl CustomProperty { + /// Creates a new custom property. + #[must_use] + pub fn new(name: impl Into, value: PropertyValue) -> Self { + Self { + name: name.into(), + value, + namespace: None, + } + } + + /// Creates a string property. + #[must_use] + pub fn string(name: impl Into, value: impl Into) -> Self { + Self::new(name, PropertyValue::String(value.into())) + } + + /// Creates an integer property. + #[must_use] + pub fn integer(name: impl Into, value: i64) -> Self { + Self::new(name, PropertyValue::Integer(value)) + } + + /// Creates a boolean property. + #[must_use] + pub fn boolean(name: impl Into, value: bool) -> Self { + Self::new(name, PropertyValue::Boolean(value)) + } + + /// Sets the namespace. + #[must_use] + pub fn with_namespace(mut self, namespace: impl Into) -> Self { + self.namespace = Some(namespace.into()); + self + } +} + +/// Property value types. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum PropertyValue { + /// String value. + String(String), + + /// Integer value. + Integer(i64), + + /// Floating-point value. + Float(f64), + + /// Boolean value. + Boolean(bool), + + /// Timestamp value. + Timestamp(Timestamp), + + /// List of strings. + StringList(Vec), +} + +impl PropertyValue { + /// Returns the value as a string, if it is one. + #[must_use] + pub fn as_str(&self) -> Option<&str> { + match self { + Self::String(s) => Some(s), + _ => None, + } + } + + /// Returns the value as an integer, if it is one. + #[must_use] + pub fn as_integer(&self) -> Option { + match self { + Self::Integer(i) => Some(*i), + _ => None, + } + } + + /// Returns the value as a boolean, if it is one. + #[must_use] + pub fn as_boolean(&self) -> Option { + match self { + Self::Boolean(b) => Some(*b), + _ => None, + } + } + + /// Returns the value as a timestamp, if it is one. + #[must_use] + pub fn as_timestamp(&self) -> Option<&Timestamp> { + match self { + Self::Timestamp(t) => Some(t), + _ => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_metadata_builder() { + let metadata = DocumentMetadata::new() + .with_title("Test Document") + .with_author("Test Author") + .with_keyword("test") + .with_keyword("example"); + + assert_eq!(metadata.title, Some("Test Document".to_string())); + assert_eq!(metadata.author, Some("Test Author".to_string())); + assert_eq!(metadata.keywords.len(), 2); + } + + #[test] + fn test_custom_property() { + let prop = CustomProperty::string("custom_field", "custom_value") + .with_namespace("http://example.com/ns"); + + assert_eq!(prop.name, "custom_field"); + assert_eq!(prop.value.as_str(), Some("custom_value")); + assert_eq!(prop.namespace, Some("http://example.com/ns".to_string())); + } + + #[test] + fn test_metadata_is_empty() { + let empty = DocumentMetadata::new(); + assert!(empty.is_empty()); + + let with_title = DocumentMetadata::new().with_title("Title"); + assert!(!with_title.is_empty()); + } + + #[test] + fn test_get_field() { + let metadata = DocumentMetadata::new() + .with_title("Test") + .with_keyword("kw1") + .with_keyword("kw2"); + + assert_eq!( + metadata + .get_field(MetadataField::Title) + .and_then(|v| v.as_str().map(String::from)), + Some("Test".to_string()) + ); + + if let Some(PropertyValue::StringList(keywords)) = + metadata.get_field(MetadataField::Keywords) + { + assert_eq!(keywords.len(), 2); + } else { + panic!("Expected StringList"); + } + } +} diff --git a/crates/nvisy-document/src/operation/insert.rs b/crates/nvisy-document/src/operation/insert.rs new file mode 100644 index 0000000..40636b5 --- /dev/null +++ b/crates/nvisy-document/src/operation/insert.rs @@ -0,0 +1,160 @@ +//! Insert content types. + +use bytes::Bytes; +use serde::{Deserialize, Serialize}; + +use crate::format::region::RegionKind; + +/// Content to insert into a document. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum InsertContent { + /// Plain text content. + Text { + /// The text to insert. + content: String, + + /// Optional style hint. + style: Option, + }, + + /// Image content. + Image { + /// Image data. + #[serde(with = "bytes_serde")] + data: Bytes, + + /// MIME type (e.g., "image/png"). + mime_type: String, + + /// Optional alt text. + alt_text: Option, + }, + + /// Page break. + PageBreak, + + /// Section break. + SectionBreak, + + /// Horizontal rule/divider. + HorizontalRule, +} + +/// Text style hints for insertion. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum TextStyle { + /// Normal paragraph text. + #[default] + Normal, + + /// Heading level 1-6. + Heading(u8), + + /// Bold text. + Bold, + + /// Italic text. + Italic, + + /// Code/monospace text. + Code, + + /// Block quote. + Quote, +} + +impl InsertContent { + /// Creates a text insert with the given content. + #[must_use] + pub fn text(content: impl Into) -> Self { + Self::Text { + content: content.into(), + style: None, + } + } + + /// Creates a text insert with style. + #[must_use] + pub fn styled_text(content: impl Into, style: TextStyle) -> Self { + Self::Text { + content: content.into(), + style: Some(style), + } + } + + /// Creates an image insert. + #[must_use] + pub fn image(data: Bytes, mime_type: impl Into) -> Self { + Self::Image { + data, + mime_type: mime_type.into(), + alt_text: None, + } + } + + /// Returns the region kind this content would create. + #[must_use] + pub fn region_kind(&self) -> RegionKind { + match self { + Self::Text { style, .. } => match style { + Some(TextStyle::Heading(_)) => RegionKind::Heading, + Some(TextStyle::Code) => RegionKind::Code, + Some(TextStyle::Quote) => RegionKind::Quote, + _ => RegionKind::Text, + }, + Self::Image { .. } => RegionKind::Image, + Self::PageBreak | Self::SectionBreak | Self::HorizontalRule => RegionKind::Unknown, + } + } +} + +/// Serde helper for Bytes. +mod bytes_serde { + use bytes::Bytes; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + + pub fn serialize(bytes: &Bytes, serializer: S) -> Result + where + S: Serializer, + { + base64::Engine::encode(&base64::engine::general_purpose::STANDARD, bytes) + .serialize(serializer) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &s) + .map(Bytes::from) + .map_err(serde::de::Error::custom) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_text_insert() { + let content = InsertContent::text("Hello, world!"); + assert!(matches!(content, InsertContent::Text { .. })); + assert_eq!(content.region_kind(), RegionKind::Text); + } + + #[test] + fn test_styled_text() { + let content = InsertContent::styled_text("Title", TextStyle::Heading(1)); + assert_eq!(content.region_kind(), RegionKind::Heading); + } + + #[test] + fn test_image_insert() { + let data = Bytes::from(vec![0u8; 10]); + let content = InsertContent::image(data, "image/png"); + assert_eq!(content.region_kind(), RegionKind::Image); + } +} diff --git a/crates/nvisy-document/src/operation/mod.rs b/crates/nvisy-document/src/operation/mod.rs new file mode 100644 index 0000000..b523ecd --- /dev/null +++ b/crates/nvisy-document/src/operation/mod.rs @@ -0,0 +1,542 @@ +//! Document edit operations. +//! +//! This module defines all the operations that can be performed on a document. +//! Operations are designed to be: +//! - Reversible (for undo/redo support) +//! - Serializable (for persistence and VLM communication) +//! - Format-agnostic (implementations handle format-specific details) + +mod insert; +mod redact; +mod result; +mod split; + +use derive_more::From; +pub use insert::{InsertContent, TextStyle}; +pub use redact::RedactStyle; +pub use result::EditResult; +use serde::{Deserialize, Serialize}; +pub use split::{MergeOrder, SplitBoundary}; + +use crate::format::region::{BoundingBox, RegionId, RegionKind}; + +/// Content modification operations. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "operation")] +pub enum ContentOperation { + /// Redact content within a region. + Redact { + /// Target region to redact. + target: RegionId, + + /// Redaction style. + #[serde(default)] + style: RedactStyle, + }, + + /// Replace text content in a region. + ReplaceText { + /// Target region. + target: RegionId, + + /// New text content. + new_text: String, + + /// Whether to preserve original formatting. + #[serde(default = "default_true")] + preserve_formatting: bool, + }, + + /// Replace a substring within a region's text. + ReplaceSubstring { + /// Target region. + target: RegionId, + + /// Text to find (first occurrence). + find: String, + + /// Text to replace with. + replace: String, + + /// Replace all occurrences vs just the first. + #[serde(default)] + replace_all: bool, + }, + + /// Delete a region entirely. + Delete { + /// Target region to delete. + target: RegionId, + + /// Whether to collapse space left by deletion. + #[serde(default = "default_true")] + collapse_space: bool, + }, +} + +/// Insertion operations. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "operation")] +pub enum InsertOperation { + /// Insert content before a region. + InsertBefore { + /// Region to insert before. + target: RegionId, + + /// Content to insert. + content: InsertContent, + }, + + /// Insert content after a region. + InsertAfter { + /// Region to insert after. + target: RegionId, + + /// Content to insert. + content: InsertContent, + }, + + /// Insert content at the start of a region (for containers). + InsertStart { + /// Container region. + target: RegionId, + + /// Content to insert. + content: InsertContent, + }, + + /// Insert content at the end of a region (for containers). + InsertEnd { + /// Container region. + target: RegionId, + + /// Content to insert. + content: InsertContent, + }, +} + +/// Structural operations for moving, copying, merging, and splitting. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "operation")] +pub enum StructuralOperation { + /// Move a region to a new location. + Move { + /// Region to move. + source: RegionId, + + /// Target location (insert after this region). + target: RegionId, + }, + + /// Copy a region to a new location. + Copy { + /// Region to copy. + source: RegionId, + + /// Target location (insert after this region). + target: RegionId, + }, + + /// Merge multiple regions into one. + Merge { + /// Regions to merge (in order). + regions: Vec, + + /// Separator between merged content. + separator: Option, + }, + + /// Split a region at a specific point. + SplitRegion { + /// Region to split. + target: RegionId, + + /// Character offset to split at. + at_offset: usize, + }, +} + +/// Page-level operations. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "operation")] +pub enum PageOperation { + /// Delete specific pages. + DeletePages { + /// Page numbers to delete (0-indexed). + pages: Vec, + }, + + /// Reorder pages. + ReorderPages { + /// New page order (each value is the old page index). + new_order: Vec, + }, + + /// Rotate pages. + RotatePages { + /// Page numbers to rotate (0-indexed). + pages: Vec, + + /// Rotation in degrees (90, 180, 270). + degrees: i16, + }, + + /// Extract pages to a new document. + ExtractPages { + /// Page numbers to extract (0-indexed). + pages: Vec, + }, +} + +/// Document-level operations. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "operation")] +pub enum DocumentOperation { + /// Split document at specified boundaries. + Split { + /// Split boundary definitions. + boundaries: Vec, + }, +} + +/// Metadata operations for classification, bounds, and annotations. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "operation")] +pub enum MetadataOperation { + /// Change region kind/classification. + Reclassify { + /// Target region. + target: RegionId, + + /// New region kind. + new_kind: RegionKind, + }, + + /// Update region bounds (for layout adjustments). + UpdateBounds { + /// Target region. + target: RegionId, + + /// New bounding box. + new_bounds: BoundingBox, + }, + + /// Add annotation/comment to a region. + Annotate { + /// Target region. + target: RegionId, + + /// Annotation text. + annotation: String, + + /// Annotation author (optional). + author: Option, + }, +} + +/// An edit operation to be applied to a document. +/// +/// Operations target specific regions by their stable IDs, allowing +/// VLM-driven workflows to reference regions across multiple turns. +#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", untagged)] +pub enum EditOperation { + /// Content modification operations. + Content(ContentOperation), + + /// Insertion operations. + Insert(InsertOperation), + + /// Structural operations. + Structural(StructuralOperation), + + /// Page-level operations. + Page(PageOperation), + + /// Document-level operations. + Document(DocumentOperation), + + /// Metadata operations. + Metadata(MetadataOperation), +} + +fn default_true() -> bool { + true +} + +impl EditOperation { + /// Returns the primary target region of this operation, if any. + #[must_use] + pub fn target(&self) -> Option { + match self { + Self::Content(op) => op.target(), + Self::Insert(op) => op.target(), + Self::Structural(op) => op.target(), + Self::Page(_) => None, + Self::Document(_) => None, + Self::Metadata(op) => op.target(), + } + } + + /// Returns all region IDs referenced by this operation. + #[must_use] + pub fn referenced_regions(&self) -> Vec { + match self { + Self::Content(op) => op.referenced_regions(), + Self::Insert(op) => op.referenced_regions(), + Self::Structural(op) => op.referenced_regions(), + Self::Page(_) => vec![], + Self::Document(op) => op.referenced_regions(), + Self::Metadata(op) => op.referenced_regions(), + } + } + + /// Returns true if this operation modifies content (vs. metadata only). + #[must_use] + pub const fn modifies_content(&self) -> bool { + match self { + Self::Content(_) + | Self::Insert(_) + | Self::Structural(_) + | Self::Page(_) + | Self::Document(_) => true, + Self::Metadata(_) => false, + } + } + + /// Returns true if this operation is reversible. + #[must_use] + pub const fn is_reversible(&self) -> bool { + true + } + + /// Creates a redact operation with default style. + #[must_use] + pub fn redact(target: RegionId) -> Self { + ContentOperation::Redact { + target, + style: RedactStyle::default(), + } + .into() + } + + /// Creates a redact operation with custom style. + #[must_use] + pub fn redact_with_style(target: RegionId, style: RedactStyle) -> Self { + ContentOperation::Redact { target, style }.into() + } + + /// Creates a replace text operation. + #[must_use] + pub fn replace_text(target: RegionId, new_text: impl Into) -> Self { + ContentOperation::ReplaceText { + target, + new_text: new_text.into(), + preserve_formatting: true, + } + .into() + } + + /// Creates a delete operation. + #[must_use] + pub fn delete(target: RegionId) -> Self { + ContentOperation::Delete { + target, + collapse_space: true, + } + .into() + } + + /// Creates an insert after operation. + #[must_use] + pub fn insert_after(target: RegionId, content: InsertContent) -> Self { + InsertOperation::InsertAfter { target, content }.into() + } + + /// Creates an insert before operation. + #[must_use] + pub fn insert_before(target: RegionId, content: InsertContent) -> Self { + InsertOperation::InsertBefore { target, content }.into() + } +} + +impl ContentOperation { + /// Returns the target region of this operation. + #[must_use] + pub fn target(&self) -> Option { + match self { + Self::Redact { target, .. } + | Self::ReplaceText { target, .. } + | Self::ReplaceSubstring { target, .. } + | Self::Delete { target, .. } => Some(*target), + } + } + + /// Returns all region IDs referenced by this operation. + #[must_use] + pub fn referenced_regions(&self) -> Vec { + self.target().into_iter().collect() + } +} + +impl InsertOperation { + /// Returns the target region of this operation. + #[must_use] + pub fn target(&self) -> Option { + match self { + Self::InsertBefore { target, .. } + | Self::InsertAfter { target, .. } + | Self::InsertStart { target, .. } + | Self::InsertEnd { target, .. } => Some(*target), + } + } + + /// Returns all region IDs referenced by this operation. + #[must_use] + pub fn referenced_regions(&self) -> Vec { + self.target().into_iter().collect() + } +} + +impl StructuralOperation { + /// Returns the primary target region of this operation. + #[must_use] + pub fn target(&self) -> Option { + match self { + Self::Move { source, .. } | Self::Copy { source, .. } => Some(*source), + Self::Merge { regions, .. } => regions.first().copied(), + Self::SplitRegion { target, .. } => Some(*target), + } + } + + /// Returns all region IDs referenced by this operation. + #[must_use] + pub fn referenced_regions(&self) -> Vec { + match self { + Self::Move { source, target } | Self::Copy { source, target } => vec![*source, *target], + Self::Merge { regions, .. } => regions.clone(), + Self::SplitRegion { target, .. } => vec![*target], + } + } +} + +impl DocumentOperation { + /// Returns all region IDs referenced by this operation. + #[must_use] + pub fn referenced_regions(&self) -> Vec { + match self { + Self::Split { boundaries } => boundaries + .iter() + .filter_map(|b| match b { + SplitBoundary::AfterRegion { region } => Some(*region), + _ => None, + }) + .collect(), + } + } +} + +impl MetadataOperation { + /// Returns the target region of this operation. + #[must_use] + pub fn target(&self) -> Option { + match self { + Self::Reclassify { target, .. } + | Self::UpdateBounds { target, .. } + | Self::Annotate { target, .. } => Some(*target), + } + } + + /// Returns all region IDs referenced by this operation. + #[must_use] + pub fn referenced_regions(&self) -> Vec { + self.target().into_iter().collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_target_extraction() { + let region = RegionId::new(); + let op = EditOperation::redact(region); + assert_eq!(op.target(), Some(region)); + } + + #[test] + fn test_referenced_regions() { + let r1 = RegionId::new(); + let r2 = RegionId::new(); + + let op: EditOperation = StructuralOperation::Move { + source: r1, + target: r2, + } + .into(); + let refs = op.referenced_regions(); + assert_eq!(refs.len(), 2); + assert!(refs.contains(&r1)); + assert!(refs.contains(&r2)); + } + + #[test] + fn test_modifies_content() { + let region = RegionId::new(); + + assert!(EditOperation::redact(region).modifies_content()); + assert!(EditOperation::delete(region).modifies_content()); + + let annotate: EditOperation = MetadataOperation::Annotate { + target: region, + annotation: "test".to_string(), + author: None, + } + .into(); + assert!(!annotate.modifies_content()); + } + + #[test] + fn test_from_impls() { + let region = RegionId::new(); + + let _: EditOperation = ContentOperation::Delete { + target: region, + collapse_space: true, + } + .into(); + + let _: EditOperation = InsertOperation::InsertAfter { + target: region, + content: InsertContent::text("test"), + } + .into(); + + let _: EditOperation = StructuralOperation::SplitRegion { + target: region, + at_offset: 10, + } + .into(); + + let _: EditOperation = PageOperation::DeletePages { pages: vec![0] }.into(); + + let _: EditOperation = DocumentOperation::Split { boundaries: vec![] }.into(); + + let _: EditOperation = MetadataOperation::Reclassify { + target: region, + new_kind: RegionKind::Text, + } + .into(); + } + + #[test] + fn test_serde() { + let region = RegionId::new(); + let op = EditOperation::replace_text(region, "Hello, world!"); + + let json = serde_json::to_string_pretty(&op).unwrap(); + let parsed: EditOperation = serde_json::from_str(&json).unwrap(); + assert_eq!(op, parsed); + } +} diff --git a/crates/nvisy-document/src/operation/redact.rs b/crates/nvisy-document/src/operation/redact.rs new file mode 100644 index 0000000..9776971 --- /dev/null +++ b/crates/nvisy-document/src/operation/redact.rs @@ -0,0 +1,103 @@ +//! Redaction styles and options. + +use serde::{Deserialize, Serialize}; + +/// Style for redacting content. +#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum RedactStyle { + /// Black box overlay (content hidden but space preserved). + #[default] + BlackBox, + + /// White box overlay (content hidden, blends with background). + WhiteBox, + + /// Replace with placeholder text. + Placeholder { + /// The placeholder text to show. + text: String, + }, + + /// Blur effect (for images, if supported). + Blur { + /// Blur intensity (1-10). + intensity: u8, + }, + + /// Pixelate effect (for images, if supported). + Pixelate { + /// Block size in pixels. + block_size: u8, + }, + + /// Complete removal (content and space removed). + Remove, +} + +impl RedactStyle { + /// Creates a placeholder redaction with the given text. + #[must_use] + pub fn placeholder(text: impl Into) -> Self { + Self::Placeholder { text: text.into() } + } + + /// Creates a blur redaction with the given intensity. + #[must_use] + pub fn blur(intensity: u8) -> Self { + Self::Blur { + intensity: intensity.clamp(1, 10), + } + } + + /// Creates a pixelate redaction with the given block size. + #[must_use] + pub fn pixelate(block_size: u8) -> Self { + Self::Pixelate { + block_size: block_size.max(1), + } + } + + /// Returns true if this style preserves the original space. + #[must_use] + pub const fn preserves_space(&self) -> bool { + !matches!(self, Self::Remove) + } + + /// Returns true if this style is suitable for images. + #[must_use] + pub const fn is_image_style(&self) -> bool { + matches!(self, Self::Blur { .. } | Self::Pixelate { .. }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_redact_style_default() { + assert_eq!(RedactStyle::default(), RedactStyle::BlackBox); + } + + #[test] + fn test_placeholder() { + let style = RedactStyle::placeholder("[REDACTED]"); + assert!(matches!(style, RedactStyle::Placeholder { text } if text == "[REDACTED]")); + } + + #[test] + fn test_preserves_space() { + assert!(RedactStyle::BlackBox.preserves_space()); + assert!(RedactStyle::placeholder("X").preserves_space()); + assert!(!RedactStyle::Remove.preserves_space()); + } + + #[test] + fn test_serde() { + let style = RedactStyle::Blur { intensity: 5 }; + let json = serde_json::to_string(&style).unwrap(); + let parsed: RedactStyle = serde_json::from_str(&json).unwrap(); + assert_eq!(style, parsed); + } +} diff --git a/crates/nvisy-document/src/operation/result.rs b/crates/nvisy-document/src/operation/result.rs new file mode 100644 index 0000000..92d083e --- /dev/null +++ b/crates/nvisy-document/src/operation/result.rs @@ -0,0 +1,136 @@ +//! Edit operation result types. + +use super::EditOperation; +use crate::format::region::{Region, RegionId}; + +/// Result of applying an edit operation. +#[derive(Debug, Clone)] +pub struct EditResult { + /// Whether the operation succeeded. + pub success: bool, + + /// New regions created by the operation. + pub created_regions: Vec, + + /// Regions modified by the operation. + pub modified_regions: Vec, + + /// Regions deleted by the operation. + pub deleted_region_ids: Vec, + + /// Reverse operation for undo support. + pub reverse_operation: Option, + + /// Warnings generated during the operation. + pub warnings: Vec, +} + +impl EditResult { + /// Creates a successful edit result with no changes. + #[must_use] + pub fn success() -> Self { + Self { + success: true, + created_regions: vec![], + modified_regions: vec![], + deleted_region_ids: vec![], + reverse_operation: None, + warnings: vec![], + } + } + + /// Creates a failed edit result. + #[must_use] + pub fn failed() -> Self { + Self { + success: false, + created_regions: vec![], + modified_regions: vec![], + deleted_region_ids: vec![], + reverse_operation: None, + warnings: vec![], + } + } + + /// Adds a created region. + #[must_use] + pub fn with_created(mut self, region: Region) -> Self { + self.created_regions.push(region); + self + } + + /// Adds a modified region. + #[must_use] + pub fn with_modified(mut self, region: Region) -> Self { + self.modified_regions.push(region); + self + } + + /// Adds a deleted region ID. + #[must_use] + pub fn with_deleted(mut self, id: RegionId) -> Self { + self.deleted_region_ids.push(id); + self + } + + /// Sets the reverse operation. + #[must_use] + pub fn with_reverse(mut self, op: EditOperation) -> Self { + self.reverse_operation = Some(op); + self + } + + /// Adds a warning. + #[must_use] + pub fn with_warning(mut self, warning: impl Into) -> Self { + self.warnings.push(warning.into()); + self + } + + /// Returns true if any regions were affected. + #[must_use] + pub fn has_changes(&self) -> bool { + !self.created_regions.is_empty() + || !self.modified_regions.is_empty() + || !self.deleted_region_ids.is_empty() + } + + /// Returns the total number of affected regions. + #[must_use] + pub fn affected_count(&self) -> usize { + self.created_regions.len() + self.modified_regions.len() + self.deleted_region_ids.len() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_edit_result_success() { + let result = EditResult::success(); + assert!(result.success); + assert!(!result.has_changes()); + assert_eq!(result.affected_count(), 0); + } + + #[test] + fn test_edit_result_failed() { + let result = EditResult::failed(); + assert!(!result.success); + } + + #[test] + fn test_edit_result_builder() { + let region = Region::text("test"); + let result = EditResult::success() + .with_created(region) + .with_warning("Minor issue"); + + assert!(result.success); + assert_eq!(result.created_regions.len(), 1); + assert_eq!(result.warnings.len(), 1); + assert!(result.has_changes()); + assert_eq!(result.affected_count(), 1); + } +} diff --git a/crates/nvisy-document/src/operation/split.rs b/crates/nvisy-document/src/operation/split.rs new file mode 100644 index 0000000..db7eb29 --- /dev/null +++ b/crates/nvisy-document/src/operation/split.rs @@ -0,0 +1,105 @@ +//! Split operation types. + +use serde::{Deserialize, Serialize}; + +use crate::format::region::RegionId; + +/// Defines where to split a document. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum SplitBoundary { + /// Split after a specific page. + AfterPage { + /// Page number (0-indexed). + page: u32, + }, + + /// Split after a specific region. + AfterRegion { + /// Region ID to split after. + region: RegionId, + }, + + /// Split at page intervals. + EveryNPages { + /// Number of pages per split. + n: u32, + }, + + /// Split by heading level (each heading starts a new document). + ByHeading { + /// Heading level to split on (1-6). + level: u8, + }, +} + +impl SplitBoundary { + /// Creates a split after a specific page. + #[must_use] + pub fn after_page(page: u32) -> Self { + Self::AfterPage { page } + } + + /// Creates a split after a specific region. + #[must_use] + pub fn after_region(region: RegionId) -> Self { + Self::AfterRegion { region } + } + + /// Creates splits every N pages. + #[must_use] + pub fn every_n_pages(n: u32) -> Self { + Self::EveryNPages { n: n.max(1) } + } + + /// Creates splits at heading level. + #[must_use] + pub fn by_heading(level: u8) -> Self { + Self::ByHeading { + level: level.clamp(1, 6), + } + } +} + +/// Order for merging documents. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum MergeOrder { + /// Merge in the order provided. + #[default] + Sequential, + + /// Interleave pages from each document. + Interleaved, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_split_boundary() { + let split = SplitBoundary::after_page(5); + assert!(matches!(split, SplitBoundary::AfterPage { page: 5 })); + } + + #[test] + fn test_every_n_pages_minimum() { + let split = SplitBoundary::every_n_pages(0); + assert!(matches!(split, SplitBoundary::EveryNPages { n: 1 })); + } + + #[test] + fn test_heading_level_clamped() { + let split = SplitBoundary::by_heading(10); + assert!(matches!(split, SplitBoundary::ByHeading { level: 6 })); + } + + #[test] + fn test_serde() { + let split = SplitBoundary::after_page(3); + let json = serde_json::to_string(&split).unwrap(); + let parsed: SplitBoundary = serde_json::from_str(&json).unwrap(); + assert_eq!(split, parsed); + } +} diff --git a/crates/nvisy-document/src/thumbnail/mod.rs b/crates/nvisy-document/src/thumbnail/mod.rs new file mode 100644 index 0000000..7db8f4a --- /dev/null +++ b/crates/nvisy-document/src/thumbnail/mod.rs @@ -0,0 +1,53 @@ +//! Document thumbnail generation traits and types. +//! +//! This module defines the [`ThumbnailGenerator`] trait for generating +//! thumbnail images from documents. + +mod options; +mod types; + +use async_trait::async_trait; +pub use options::ThumbnailOptions; +pub use types::{ImageFormat, Thumbnail, ThumbnailSize}; + +use crate::error::Result; +use crate::format::Document; + +/// Trait for document thumbnail generation. +/// +/// This trait is implemented by [`Document`] types that support generating +/// thumbnail images. +#[async_trait] +pub trait ThumbnailGenerator: Document { + /// Returns whether thumbnail generation is supported by this document. + fn supports_thumbnails(&self) -> bool; + + /// Returns the supported output image formats. + fn supported_image_formats(&self) -> &[ImageFormat]; + + /// Generates a thumbnail for the first page (or entire document for images). + /// + /// # Arguments + /// + /// * `options` - Optional thumbnail generation options + async fn generate(&self, options: Option<&ThumbnailOptions>) -> Result; + + /// Generates a thumbnail for a specific page. + /// + /// # Arguments + /// + /// * `page` - The page number (0-indexed) + /// * `options` - Optional thumbnail generation options + async fn generate_for_page( + &self, + page: u32, + options: Option<&ThumbnailOptions>, + ) -> Result; + + /// Generates thumbnails for all pages. + /// + /// # Arguments + /// + /// * `options` - Optional thumbnail generation options + async fn generate_all(&self, options: Option<&ThumbnailOptions>) -> Result>; +} diff --git a/crates/nvisy-document/src/thumbnail/options.rs b/crates/nvisy-document/src/thumbnail/options.rs new file mode 100644 index 0000000..5ee44dd --- /dev/null +++ b/crates/nvisy-document/src/thumbnail/options.rs @@ -0,0 +1,245 @@ +//! Thumbnail generation options. + +use serde::{Deserialize, Serialize}; + +use super::types::{ImageFormat, ThumbnailSize}; + +/// Options for thumbnail generation. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ThumbnailOptions { + /// The desired thumbnail size. + pub size: ThumbnailSize, + + /// The output image format. + pub format: ImageFormat, + + /// JPEG/WebP quality (1-100). Only applicable for lossy formats. + pub quality: u8, + + /// Background color for transparent images (hex RGB, e.g., "FFFFFF"). + /// If None, transparency is preserved (for formats that support it). + pub background: Option, + + /// Whether to crop to fit the exact dimensions (vs. fitting within bounds). + pub crop_to_fit: bool, + + /// Page to generate thumbnail from (0-indexed). None means first page. + pub page: Option, + + /// DPI for rendering vector content (PDF, SVG). + pub render_dpi: u32, +} + +impl Default for ThumbnailOptions { + fn default() -> Self { + Self { + size: ThumbnailSize::Medium, + format: ImageFormat::Png, + quality: 85, + background: None, + crop_to_fit: false, + page: None, + render_dpi: 150, + } + } +} + +impl ThumbnailOptions { + /// Creates options for a small PNG thumbnail. + #[must_use] + pub fn small() -> Self { + Self { + size: ThumbnailSize::Small, + ..Default::default() + } + } + + /// Creates options for a medium PNG thumbnail. + #[must_use] + pub fn medium() -> Self { + Self::default() + } + + /// Creates options for a large PNG thumbnail. + #[must_use] + pub fn large() -> Self { + Self { + size: ThumbnailSize::Large, + ..Default::default() + } + } + + /// Creates options for a high-quality JPEG thumbnail. + #[must_use] + pub fn jpeg_high_quality() -> Self { + Self { + format: ImageFormat::Jpeg, + quality: 95, + background: Some("FFFFFF".to_string()), + ..Default::default() + } + } + + /// Creates options for a web-optimized WebP thumbnail. + #[must_use] + pub fn webp_optimized() -> Self { + Self { + format: ImageFormat::WebP, + quality: 80, + ..Default::default() + } + } + + /// Sets the thumbnail size. + #[must_use] + pub fn with_size(mut self, size: ThumbnailSize) -> Self { + self.size = size; + self + } + + /// Sets custom dimensions. + #[must_use] + pub fn with_dimensions(mut self, width: u32, height: u32) -> Self { + self.size = ThumbnailSize::custom(width, height); + self + } + + /// Sets the output format. + #[must_use] + pub fn with_format(mut self, format: ImageFormat) -> Self { + self.format = format; + self + } + + /// Sets the quality for lossy formats. + #[must_use] + pub fn with_quality(mut self, quality: u8) -> Self { + self.quality = quality.clamp(1, 100); + self + } + + /// Sets the background color. + #[must_use] + pub fn with_background(mut self, color: impl Into) -> Self { + self.background = Some(color.into()); + self + } + + /// Enables crop-to-fit mode. + #[must_use] + pub fn with_crop(mut self) -> Self { + self.crop_to_fit = true; + self + } + + /// Sets the page to render. + #[must_use] + pub fn with_page(mut self, page: u32) -> Self { + self.page = Some(page); + self + } + + /// Sets the rendering DPI. + #[must_use] + pub fn with_dpi(mut self, dpi: u32) -> Self { + self.render_dpi = dpi; + self + } + + /// Returns the effective page number (0 if not specified). + #[must_use] + pub fn effective_page(&self) -> u32 { + self.page.unwrap_or(0) + } + + /// Validates the options. + pub fn validate(&self) -> Result<(), String> { + if self.quality == 0 || self.quality > 100 { + return Err("quality must be between 1 and 100".to_string()); + } + + if self.render_dpi == 0 { + return Err("render_dpi must be greater than 0".to_string()); + } + + if self.render_dpi > 600 { + return Err("render_dpi exceeds maximum of 600".to_string()); + } + + if let Some(ref bg) = self.background { + if bg.len() != 6 || !bg.chars().all(|c| c.is_ascii_hexdigit()) { + return Err("background must be a 6-character hex RGB value".to_string()); + } + } + + if self.size.max_width() > 4096 || self.size.max_height() > 4096 { + return Err("dimensions exceed maximum of 4096 pixels".to_string()); + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_options() { + let opts = ThumbnailOptions::default(); + assert_eq!(opts.size, ThumbnailSize::Medium); + assert_eq!(opts.format, ImageFormat::Png); + assert_eq!(opts.quality, 85); + assert!(!opts.crop_to_fit); + } + + #[test] + fn test_builder() { + let opts = ThumbnailOptions::default() + .with_size(ThumbnailSize::Large) + .with_format(ImageFormat::Jpeg) + .with_quality(90) + .with_background("FFFFFF") + .with_page(2); + + assert_eq!(opts.size, ThumbnailSize::Large); + assert_eq!(opts.format, ImageFormat::Jpeg); + assert_eq!(opts.quality, 90); + assert_eq!(opts.background, Some("FFFFFF".to_string())); + assert_eq!(opts.page, Some(2)); + } + + #[test] + fn test_quality_clamping() { + let opts = ThumbnailOptions::default().with_quality(150); + assert_eq!(opts.quality, 100); + + let opts = ThumbnailOptions::default().with_quality(0); + assert_eq!(opts.quality, 1); + } + + #[test] + fn test_validation() { + let valid = ThumbnailOptions::default(); + assert!(valid.validate().is_ok()); + + let invalid_bg = ThumbnailOptions::default().with_background("invalid"); + assert!(invalid_bg.validate().is_err()); + + let high_dpi = ThumbnailOptions::default().with_dpi(1000); + assert!(high_dpi.validate().is_err()); + } + + #[test] + fn test_preset_options() { + let small = ThumbnailOptions::small(); + assert_eq!(small.size, ThumbnailSize::Small); + + let jpeg = ThumbnailOptions::jpeg_high_quality(); + assert_eq!(jpeg.format, ImageFormat::Jpeg); + assert_eq!(jpeg.quality, 95); + + let webp = ThumbnailOptions::webp_optimized(); + assert_eq!(webp.format, ImageFormat::WebP); + } +} diff --git a/crates/nvisy-document/src/thumbnail/types.rs b/crates/nvisy-document/src/thumbnail/types.rs new file mode 100644 index 0000000..3a685d6 --- /dev/null +++ b/crates/nvisy-document/src/thumbnail/types.rs @@ -0,0 +1,288 @@ +//! Thumbnail types and structures. + +use bytes::Bytes; +use serde::{Deserialize, Serialize}; + +/// A generated thumbnail. +#[derive(Debug, Clone)] +pub struct Thumbnail { + /// The thumbnail image data. + pub data: Bytes, + + /// The image format. + pub format: ImageFormat, + + /// The actual width in pixels. + pub width: u32, + + /// The actual height in pixels. + pub height: u32, + + /// The page number this thumbnail represents (if applicable). + pub page: Option, +} + +impl Thumbnail { + /// Creates a new thumbnail. + #[must_use] + pub fn new(data: Bytes, format: ImageFormat, width: u32, height: u32) -> Self { + Self { + data, + format, + width, + height, + page: None, + } + } + + /// Sets the page number. + #[must_use] + pub fn with_page(mut self, page: u32) -> Self { + self.page = Some(page); + self + } + + /// Returns the size of the thumbnail data in bytes. + #[must_use] + pub fn size_bytes(&self) -> usize { + self.data.len() + } + + /// Returns the MIME type of the thumbnail. + #[must_use] + pub fn mime_type(&self) -> &'static str { + self.format.mime_type() + } + + /// Returns the file extension for this thumbnail format. + #[must_use] + pub fn extension(&self) -> &'static str { + self.format.extension() + } +} + +/// Supported thumbnail image formats. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Default, + Serialize, + Deserialize +)] +#[serde(rename_all = "lowercase")] +pub enum ImageFormat { + /// PNG format (lossless, supports transparency). + #[default] + Png, + + /// JPEG format (lossy, smaller file size). + Jpeg, + + /// WebP format (modern, efficient compression). + WebP, +} + +impl ImageFormat { + /// Returns the MIME type for this format. + #[must_use] + pub fn mime_type(&self) -> &'static str { + match self { + Self::Png => "image/png", + Self::Jpeg => "image/jpeg", + Self::WebP => "image/webp", + } + } + + /// Returns the file extension for this format. + #[must_use] + pub fn extension(&self) -> &'static str { + match self { + Self::Png => "png", + Self::Jpeg => "jpg", + Self::WebP => "webp", + } + } + + /// Returns whether this format supports transparency. + #[must_use] + pub fn supports_transparency(&self) -> bool { + match self { + Self::Png | Self::WebP => true, + Self::Jpeg => false, + } + } + + /// Parses a format from a string. + #[must_use] + pub fn parse(s: &str) -> Option { + match s.to_lowercase().as_str() { + "png" => Some(Self::Png), + "jpeg" | "jpg" => Some(Self::Jpeg), + "webp" => Some(Self::WebP), + _ => None, + } + } +} + +impl std::str::FromStr for ImageFormat { + type Err = String; + + fn from_str(s: &str) -> Result { + Self::parse(s).ok_or_else(|| format!("unknown image format: {s}")) + } +} + +/// Predefined thumbnail sizes. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Default, + Serialize, + Deserialize +)] +#[serde(rename_all = "lowercase")] +pub enum ThumbnailSize { + /// Small thumbnail (64x64). + Small, + + /// Medium thumbnail (128x128). + #[default] + Medium, + + /// Large thumbnail (256x256). + Large, + + /// Extra large thumbnail (512x512). + ExtraLarge, + + /// Custom size with explicit dimensions. + Custom { + /// Maximum width in pixels. + width: u32, + /// Maximum height in pixels. + height: u32, + }, +} + +impl ThumbnailSize { + /// Returns the maximum width for this size. + #[must_use] + pub fn max_width(&self) -> u32 { + match self { + Self::Small => 64, + Self::Medium => 128, + Self::Large => 256, + Self::ExtraLarge => 512, + Self::Custom { width, .. } => *width, + } + } + + /// Returns the maximum height for this size. + #[must_use] + pub fn max_height(&self) -> u32 { + match self { + Self::Small => 64, + Self::Medium => 128, + Self::Large => 256, + Self::ExtraLarge => 512, + Self::Custom { height, .. } => *height, + } + } + + /// Creates a custom size. + #[must_use] + pub fn custom(width: u32, height: u32) -> Self { + Self::Custom { width, height } + } + + /// Calculates the scaled dimensions for the given source dimensions, + /// maintaining aspect ratio. + #[must_use] + pub fn scaled_dimensions(&self, source_width: u32, source_height: u32) -> (u32, u32) { + let max_width = self.max_width(); + let max_height = self.max_height(); + + if source_width == 0 || source_height == 0 { + return (max_width, max_height); + } + + let width_ratio = max_width as f64 / source_width as f64; + let height_ratio = max_height as f64 / source_height as f64; + let ratio = width_ratio.min(height_ratio); + + let new_width = (source_width as f64 * ratio).round() as u32; + let new_height = (source_height as f64 * ratio).round() as u32; + + (new_width.max(1), new_height.max(1)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_image_format_mime_types() { + assert_eq!(ImageFormat::Png.mime_type(), "image/png"); + assert_eq!(ImageFormat::Jpeg.mime_type(), "image/jpeg"); + assert_eq!(ImageFormat::WebP.mime_type(), "image/webp"); + } + + #[test] + fn test_image_format_transparency() { + assert!(ImageFormat::Png.supports_transparency()); + assert!(!ImageFormat::Jpeg.supports_transparency()); + assert!(ImageFormat::WebP.supports_transparency()); + } + + #[test] + fn test_thumbnail_size_dimensions() { + assert_eq!(ThumbnailSize::Small.max_width(), 64); + assert_eq!(ThumbnailSize::Medium.max_width(), 128); + assert_eq!(ThumbnailSize::Large.max_width(), 256); + assert_eq!(ThumbnailSize::ExtraLarge.max_width(), 512); + + let custom = ThumbnailSize::custom(800, 600); + assert_eq!(custom.max_width(), 800); + assert_eq!(custom.max_height(), 600); + } + + #[test] + fn test_scaled_dimensions() { + let size = ThumbnailSize::Medium; // 128x128 + + // Landscape image + let (w, h) = size.scaled_dimensions(1920, 1080); + assert_eq!(w, 128); + assert!(h < 128); + + // Portrait image + let (w, h) = size.scaled_dimensions(1080, 1920); + assert!(w < 128); + assert_eq!(h, 128); + + // Square image + let (w, h) = size.scaled_dimensions(1000, 1000); + assert_eq!(w, 128); + assert_eq!(h, 128); + } + + #[test] + fn test_thumbnail_creation() { + let thumb = + Thumbnail::new(Bytes::from_static(b"test"), ImageFormat::Png, 128, 96).with_page(1); + + assert_eq!(thumb.width, 128); + assert_eq!(thumb.height, 96); + assert_eq!(thumb.page, Some(1)); + assert_eq!(thumb.mime_type(), "image/png"); + } +} diff --git a/crates/nvisy-docx/Cargo.toml b/crates/nvisy-docx/Cargo.toml new file mode 100644 index 0000000..871f217 --- /dev/null +++ b/crates/nvisy-docx/Cargo.toml @@ -0,0 +1,30 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-docx" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +description = "DOCX document format support for nvisy" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +nvisy-document = { workspace = true } + +async-trait = { workspace = true } +bytes = { workspace = true } +thiserror = { workspace = true } + +[dev-dependencies] diff --git a/crates/nvisy-docx/README.md b/crates/nvisy-docx/README.md new file mode 100644 index 0000000..aaa6490 --- /dev/null +++ b/crates/nvisy-docx/README.md @@ -0,0 +1,13 @@ +# nvisy-docx + +DOCX document format support for nvisy. + +This crate provides a `DocumentFormat` implementation for Microsoft Word DOCX files (.docx). + +## Status + +This crate is currently a stub. DOCX parsing and manipulation are not yet implemented. + +## License + +MIT diff --git a/crates/nvisy-docx/src/document.rs b/crates/nvisy-docx/src/document.rs new file mode 100644 index 0000000..d8ad4a4 --- /dev/null +++ b/crates/nvisy-docx/src/document.rs @@ -0,0 +1,79 @@ +//! DOCX document implementation. + +use async_trait::async_trait; +use bytes::Bytes; +use nvisy_document::{ + Document, DocumentInfo, EditOperation, EditResult, EditableDocument, Error, PageOptions, + Region, RegionId, Result, +}; + +/// A loaded DOCX document. +#[derive(Debug)] +pub struct DocxDocument { + info: DocumentInfo, + regions: Vec, + modified: bool, +} + +impl DocxDocument { + /// Creates a new DOCX document (internal use). + #[must_use] + #[allow(dead_code)] // Will be used when load() is implemented + pub(crate) fn new(info: DocumentInfo) -> Self { + Self { + info, + regions: Vec::new(), + modified: false, + } + } +} + +#[async_trait] +impl Document for DocxDocument { + fn info(&self) -> &DocumentInfo { + &self.info + } + + fn regions(&self) -> &[Region] { + &self.regions + } + + fn regions_for_page(&self, page: u32) -> Vec<&Region> { + self.regions + .iter() + .filter(|r| r.page.map(|p| p.get()) == Some(page)) + .collect() + } + + fn find_region(&self, id: RegionId) -> Option<&Region> { + self.regions.iter().find(|r| r.id == id) + } + + async fn serialize(&self) -> Result { + // TODO: Implement DOCX serialization + Err(Error::unsupported_format( + "DOCX serialization not yet implemented", + )) + } +} + +#[async_trait] +impl EditableDocument for DocxDocument { + async fn apply(&mut self, _operation: &EditOperation) -> Result { + // TODO: Implement DOCX editing + Err(Error::unsupported_format( + "DOCX editing not yet implemented", + )) + } + + fn is_modified(&self) -> bool { + self.modified + } + + async fn extract_page_regions(&mut self, _options: &PageOptions) -> Result> { + // TODO: Implement page region extraction + Err(Error::unsupported_format( + "DOCX page extraction not yet implemented", + )) + } +} diff --git a/crates/nvisy-docx/src/format.rs b/crates/nvisy-docx/src/format.rs new file mode 100644 index 0000000..e378bcd --- /dev/null +++ b/crates/nvisy-docx/src/format.rs @@ -0,0 +1,71 @@ +//! DOCX format handler implementation. + +use bytes::Bytes; +use nvisy_document::{Capabilities, DocumentFormat, Error, Result}; + +use crate::DocxDocument; + +/// DOCX document format handler. +#[derive(Debug, Clone, Default)] +pub struct DocxFormat { + capabilities: Capabilities, +} + +impl DocxFormat { + /// Creates a new DOCX format handler. + #[must_use] + pub fn new() -> Self { + Self { + capabilities: Capabilities::read_only(), + } + } +} + +impl DocumentFormat for DocxFormat { + type Document = DocxDocument; + + fn name(&self) -> &'static str { + "docx" + } + + fn mime_types(&self) -> &'static [&'static str] { + &["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] + } + + fn extensions(&self) -> &'static [&'static str] { + &["docx"] + } + + fn capabilities(&self) -> &Capabilities { + &self.capabilities + } + + async fn load(&self, _data: Bytes) -> Result { + // TODO: Implement DOCX loading + Err(Error::unsupported_format( + "DOCX loading not yet implemented", + )) + } + + async fn create_empty(&self) -> Result { + // TODO: Implement empty DOCX creation + Err(Error::unsupported_format( + "DOCX creation not yet implemented", + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_metadata() { + let format = DocxFormat::new(); + assert_eq!(format.name(), "docx"); + assert!(format + .mime_types() + .contains(&"application/vnd.openxmlformats-officedocument.wordprocessingml.document")); + assert!(format.extensions().contains(&"docx")); + } +} diff --git a/crates/nvisy-docx/src/lib.rs b/crates/nvisy-docx/src/lib.rs new file mode 100644 index 0000000..40b31c4 --- /dev/null +++ b/crates/nvisy-docx/src/lib.rs @@ -0,0 +1,23 @@ +//! DOCX document format support for nvisy. +//! +//! This crate provides a `DocumentFormat` implementation for Microsoft Word +//! DOCX files (.docx). +//! +//! # Example +//! +//! ```ignore +//! use nvisy_docx::DocxFormat; +//! use nvisy_engine::Engine; +//! +//! let engine = Engine::new(); +//! let doc = engine.load_docx(data).await?; +//! ``` + +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] + +mod document; +mod format; + +pub use document::DocxDocument; +pub use format::DocxFormat; diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml new file mode 100644 index 0000000..2073535 --- /dev/null +++ b/crates/nvisy-engine/Cargo.toml @@ -0,0 +1,40 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-engine" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[features] +default = ["pdf", "docx", "text"] +pdf = ["dep:nvisy-pdf"] +docx = ["dep:nvisy-docx"] +text = ["dep:nvisy-text"] + +[dependencies] +nvisy-archive = { workspace = true } +nvisy-document = { workspace = true } +nvisy-docx = { workspace = true, optional = true } +nvisy-pdf = { workspace = true, optional = true } +nvisy-text = { workspace = true, optional = true } + +bytes = { workspace = true } +jiff = { workspace = true, features = ["std"] } +serde = { workspace = true, features = ["std", "derive"] } +uuid = { workspace = true, features = ["v4"] } + +[dev-dependencies] +serde_json = { workspace = true, features = ["std"] } diff --git a/crates/nvisy-engine/README.md b/crates/nvisy-engine/README.md new file mode 100644 index 0000000..6540af8 --- /dev/null +++ b/crates/nvisy-engine/README.md @@ -0,0 +1,21 @@ +# nvisy-engine + +Document editing session management for the Nvisy system. + +## Overview + +This crate provides session management for document editing workflows, +including undo/redo support, region caching, and streaming for large documents. + +## Features + +- **Edit Sessions** - Wrap documents with stable region IDs and undo/redo +- **Edit History** - Track operations for undo/redo support +- **Region Caching** - Quick lookup of document regions +- **Streaming Support** - Lazy loading for large multi-page documents + +## Dependencies + +- `nvisy-document` - Document manipulation types +- `jiff` - Timestamps +- `uuid` - Session identifiers diff --git a/crates/nvisy-engine/src/engine/config.rs b/crates/nvisy-engine/src/engine/config.rs new file mode 100644 index 0000000..c8afbba --- /dev/null +++ b/crates/nvisy-engine/src/engine/config.rs @@ -0,0 +1,111 @@ +//! Engine configuration. + +use serde::{Deserialize, Serialize}; + +/// Configuration for the document processing engine. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EngineConfig { + /// Maximum file size in bytes that can be loaded. + /// + /// Files larger than this will be rejected. Set to `None` for no limit. + pub max_file_size: Option, + + /// Whether to enable archive extraction. + /// + /// When enabled, the engine can extract and process documents from + /// archive files (ZIP, TAR, etc.). + pub enable_archives: bool, + + /// Maximum depth for nested archives. + /// + /// Prevents zip bomb attacks by limiting how deep archive extraction + /// can go. + pub max_archive_depth: u32, + + /// Whether to process documents in parallel when possible. + pub parallel_processing: bool, +} + +impl EngineConfig { + /// Creates a new configuration with default values. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Sets the maximum file size. + #[must_use] + pub fn with_max_file_size(mut self, size: Option) -> Self { + self.max_file_size = size; + self + } + + /// Enables or disables archive extraction. + #[must_use] + pub fn with_archives(mut self, enable: bool) -> Self { + self.enable_archives = enable; + self + } + + /// Sets the maximum archive nesting depth. + #[must_use] + pub fn with_max_archive_depth(mut self, depth: u32) -> Self { + self.max_archive_depth = depth; + self + } + + /// Enables or disables parallel processing. + #[must_use] + pub fn with_parallel_processing(mut self, enable: bool) -> Self { + self.parallel_processing = enable; + self + } +} + +impl Default for EngineConfig { + fn default() -> Self { + Self { + // 100 MB default limit + max_file_size: Some(100 * 1024 * 1024), + enable_archives: true, + max_archive_depth: 3, + parallel_processing: true, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_default_config() { + let config = EngineConfig::default(); + assert_eq!(config.max_file_size, Some(100 * 1024 * 1024)); + assert!(config.enable_archives); + assert_eq!(config.max_archive_depth, 3); + assert!(config.parallel_processing); + } + + #[test] + fn test_config_builder() { + let config = EngineConfig::new() + .with_max_file_size(Some(1024)) + .with_archives(false) + .with_max_archive_depth(1) + .with_parallel_processing(false); + + assert_eq!(config.max_file_size, Some(1024)); + assert!(!config.enable_archives); + assert_eq!(config.max_archive_depth, 1); + assert!(!config.parallel_processing); + } + + #[test] + fn test_config_serialization() { + let config = EngineConfig::default(); + let json = serde_json::to_string(&config).unwrap(); + let restored: EngineConfig = serde_json::from_str(&json).unwrap(); + assert_eq!(config.max_file_size, restored.max_file_size); + } +} diff --git a/crates/nvisy-engine/src/engine/mod.rs b/crates/nvisy-engine/src/engine/mod.rs new file mode 100644 index 0000000..4f27ddc --- /dev/null +++ b/crates/nvisy-engine/src/engine/mod.rs @@ -0,0 +1,292 @@ +//! Central engine module for document processing. +//! +//! The [`Engine`] struct serves as the main entry point for loading, +//! processing, and managing documents across different formats. + +mod config; + +use std::path::Path; + +use bytes::Bytes; +pub use config::EngineConfig; +use nvisy_document::{DocumentFormat, Error, Result}; +#[cfg(feature = "docx")] +use nvisy_docx::{DocxDocument, DocxFormat}; +#[cfg(feature = "pdf")] +use nvisy_pdf::{PdfDocument, PdfFormat}; +#[cfg(feature = "text")] +use nvisy_text::{TextDocument, TextFormat}; + +/// The central document processing engine. +/// +/// `Engine` provides a unified interface for: +/// - Loading documents from various formats (PDF, DOCX, plain text, etc.) +/// - Managing format handlers +/// - Processing archives containing documents +/// +/// # Example +/// +/// ```ignore +/// use nvisy_engine::Engine; +/// +/// let engine = Engine::new(); +/// let doc = engine.load_pdf(data).await?; +/// ``` +#[derive(Debug, Clone)] +pub struct Engine { + /// Configuration for the engine. + config: EngineConfig, + + /// PDF format handler. + #[cfg(feature = "pdf")] + pdf: PdfFormat, + + /// DOCX format handler. + #[cfg(feature = "docx")] + docx: DocxFormat, + + /// Plain text format handler. + #[cfg(feature = "text")] + text: TextFormat, +} + +impl Engine { + /// Creates a new engine with default configuration. + #[must_use] + pub fn new() -> Self { + Self { + config: EngineConfig::default(), + #[cfg(feature = "pdf")] + pdf: PdfFormat::new(), + #[cfg(feature = "docx")] + docx: DocxFormat::new(), + #[cfg(feature = "text")] + text: TextFormat::new(), + } + } + + /// Creates a new engine with the specified configuration. + #[must_use] + pub fn with_config(config: EngineConfig) -> Self { + Self { + config, + #[cfg(feature = "pdf")] + pdf: PdfFormat::new(), + #[cfg(feature = "docx")] + docx: DocxFormat::new(), + #[cfg(feature = "text")] + text: TextFormat::new(), + } + } + + /// Returns a reference to the engine configuration. + #[must_use] + pub fn config(&self) -> &EngineConfig { + &self.config + } + + /// Returns the PDF format handler. + #[cfg(feature = "pdf")] + #[cfg_attr(docsrs, doc(cfg(feature = "pdf")))] + #[must_use] + pub fn pdf(&self) -> &PdfFormat { + &self.pdf + } + + /// Returns the DOCX format handler. + #[cfg(feature = "docx")] + #[cfg_attr(docsrs, doc(cfg(feature = "docx")))] + #[must_use] + pub fn docx(&self) -> &DocxFormat { + &self.docx + } + + /// Returns the text format handler. + #[cfg(feature = "text")] + #[cfg_attr(docsrs, doc(cfg(feature = "text")))] + #[must_use] + pub fn text(&self) -> &TextFormat { + &self.text + } + + /// Loads a PDF document from bytes. + #[cfg(feature = "pdf")] + #[cfg_attr(docsrs, doc(cfg(feature = "pdf")))] + pub async fn load_pdf(&self, data: Bytes) -> Result { + self.pdf.load(data).await + } + + /// Loads a DOCX document from bytes. + #[cfg(feature = "docx")] + #[cfg_attr(docsrs, doc(cfg(feature = "docx")))] + pub async fn load_docx(&self, data: Bytes) -> Result { + self.docx.load(data).await + } + + /// Loads a text document from bytes. + #[cfg(feature = "text")] + #[cfg_attr(docsrs, doc(cfg(feature = "text")))] + pub async fn load_text(&self, data: Bytes) -> Result { + self.text.load(data).await + } + + /// Reads a file and returns its contents along with the file extension. + /// + /// # Errors + /// + /// Returns an error if: + /// - The file cannot be read + /// - The file has no extension + pub fn read_file>(&self, path: P) -> Result<(Bytes, String)> { + let path = path.as_ref(); + let data = std::fs::read(path) + .map_err(|e| Error::io(format!("Failed to read file '{}': {}", path.display(), e)))?; + + let ext = path + .extension() + .and_then(|e| e.to_str()) + .ok_or_else(|| Error::unsupported_format("No file extension"))? + .to_owned(); + + Ok((Bytes::from(data), ext)) + } + + /// Checks if a file extension is supported. + #[must_use] + pub fn supports_extension(&self, ext: &str) -> bool { + let ext = ext.trim_start_matches('.').to_lowercase(); + + #[cfg(feature = "pdf")] + if self.pdf.extensions().contains(&ext.as_str()) { + return true; + } + + #[cfg(feature = "docx")] + if self.docx.extensions().contains(&ext.as_str()) { + return true; + } + + #[cfg(feature = "text")] + if self.text.extensions().contains(&ext.as_str()) { + return true; + } + + false + } + + /// Checks if a MIME type is supported. + #[must_use] + pub fn supports_mime(&self, mime_type: &str) -> bool { + let mime = mime_type.to_lowercase(); + + #[cfg(feature = "pdf")] + if self.pdf.mime_types().contains(&mime.as_str()) { + return true; + } + + #[cfg(feature = "docx")] + if self.docx.mime_types().contains(&mime.as_str()) { + return true; + } + + #[cfg(feature = "text")] + if self.text.mime_types().contains(&mime.as_str()) { + return true; + } + + false + } + + /// Returns all supported file extensions. + #[must_use] + pub fn supported_extensions(&self) -> Vec<&'static str> { + let mut exts = Vec::new(); + + #[cfg(feature = "pdf")] + exts.extend(self.pdf.extensions()); + + #[cfg(feature = "docx")] + exts.extend(self.docx.extensions()); + + #[cfg(feature = "text")] + exts.extend(self.text.extensions()); + + exts + } + + /// Returns all supported MIME types. + #[must_use] + pub fn supported_mime_types(&self) -> Vec<&'static str> { + let mut mimes = Vec::new(); + + #[cfg(feature = "pdf")] + mimes.extend(self.pdf.mime_types()); + + #[cfg(feature = "docx")] + mimes.extend(self.docx.mime_types()); + + #[cfg(feature = "text")] + mimes.extend(self.text.mime_types()); + + mimes + } +} + +impl Default for Engine { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_engine_creation() { + let engine = Engine::new(); + assert_eq!(engine.config().max_file_size, Some(100 * 1024 * 1024)); + } + + #[test] + fn test_engine_with_config() { + let config = EngineConfig { + max_file_size: Some(50 * 1024 * 1024), + ..Default::default() + }; + let engine = Engine::with_config(config); + assert_eq!(engine.config().max_file_size, Some(50 * 1024 * 1024)); + } + + #[test] + fn test_supported_extensions() { + let engine = Engine::new(); + + #[cfg(feature = "pdf")] + assert!(engine.supports_extension("pdf")); + + #[cfg(feature = "docx")] + assert!(engine.supports_extension("docx")); + + #[cfg(feature = "text")] + { + assert!(engine.supports_extension("txt")); + assert!(engine.supports_extension("md")); + } + + assert!(!engine.supports_extension("xyz")); + } + + #[test] + fn test_supported_mime_types() { + let engine = Engine::new(); + + #[cfg(feature = "pdf")] + assert!(engine.supports_mime("application/pdf")); + + #[cfg(feature = "text")] + assert!(engine.supports_mime("text/plain")); + + assert!(!engine.supports_mime("application/unknown")); + } +} diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs new file mode 100644 index 0000000..1093cf1 --- /dev/null +++ b/crates/nvisy-engine/src/lib.rs @@ -0,0 +1,23 @@ +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc = include_str!("../README.md")] + +pub mod engine; +pub mod session; + +pub use engine::{Engine, EngineConfig}; +pub use nvisy_document::{ + self as doc, BoundingBox, Capabilities, DocumentFormat, EditOperation, Point, Region, RegionId, + RegionKind, +}; +// Re-export format types for convenience +#[cfg(feature = "docx")] +#[cfg_attr(docsrs, doc(cfg(feature = "docx")))] +pub use nvisy_docx::{DocxDocument, DocxFormat}; +#[cfg(feature = "pdf")] +#[cfg_attr(docsrs, doc(cfg(feature = "pdf")))] +pub use nvisy_pdf::{PdfDocument, PdfFormat}; +#[cfg(feature = "text")] +#[cfg_attr(docsrs, doc(cfg(feature = "text")))] +pub use nvisy_text::{TextDocument, TextFormat}; +pub use session::{EditHistory, EditSession, HistoryEntry, SessionConfig, SessionId}; diff --git a/crates/nvisy-engine/src/session/history.rs b/crates/nvisy-engine/src/session/history.rs new file mode 100644 index 0000000..63bd15d --- /dev/null +++ b/crates/nvisy-engine/src/session/history.rs @@ -0,0 +1,225 @@ +//! Edit history for undo/redo support. + +use jiff::Timestamp; +use nvisy_document::EditOperation; + +/// A single entry in the edit history. +#[derive(Debug, Clone)] +pub struct HistoryEntry { + /// The operation that was applied. + pub operation: EditOperation, + + /// The reverse operation for undoing. + pub reverse: EditOperation, + + /// When the operation was applied. + pub timestamp: Timestamp, + + /// Optional description of the operation. + pub description: Option, +} + +impl HistoryEntry { + /// Creates a new history entry. + #[must_use] + pub fn new(operation: EditOperation, reverse: EditOperation) -> Self { + Self { + operation, + reverse, + timestamp: Timestamp::now(), + description: None, + } + } + + /// Creates a new history entry with a description. + #[must_use] + pub fn with_description( + operation: EditOperation, + reverse: EditOperation, + description: impl Into, + ) -> Self { + Self { + operation, + reverse, + timestamp: Timestamp::now(), + description: Some(description.into()), + } + } +} + +/// Manages edit history with undo/redo support. +#[derive(Debug, Default)] +pub struct EditHistory { + /// Stack of operations that can be undone. + undo_stack: Vec, + + /// Stack of operations that can be redone. + redo_stack: Vec, +} + +impl EditHistory { + /// Creates a new empty history. + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Records a new operation in the history. + /// + /// This clears the redo stack since we're diverging from the previous future. + pub fn record(&mut self, entry: HistoryEntry) { + self.redo_stack.clear(); + self.undo_stack.push(entry); + } + + /// Returns true if there are operations that can be undone. + #[must_use] + pub fn can_undo(&self) -> bool { + !self.undo_stack.is_empty() + } + + /// Returns true if there are operations that can be redone. + #[must_use] + pub fn can_redo(&self) -> bool { + !self.redo_stack.is_empty() + } + + /// Returns the number of operations that can be undone. + #[must_use] + pub fn undo_count(&self) -> usize { + self.undo_stack.len() + } + + /// Returns the number of operations that can be redone. + #[must_use] + pub fn redo_count(&self) -> usize { + self.redo_stack.len() + } + + /// Pops the most recent operation for undoing. + /// + /// Returns the entry that should be reversed. + pub fn pop_undo(&mut self) -> Option { + self.undo_stack.pop().inspect(|entry| { + self.redo_stack.push(entry.clone()); + }) + } + + /// Pops the most recently undone operation for redoing. + /// + /// Returns the entry that should be reapplied. + pub fn pop_redo(&mut self) -> Option { + self.redo_stack.pop().inspect(|entry| { + self.undo_stack.push(entry.clone()); + }) + } + + /// Peeks at the most recent undoable operation without removing it. + #[must_use] + pub fn peek_undo(&self) -> Option<&HistoryEntry> { + self.undo_stack.last() + } + + /// Peeks at the most recent redoable operation without removing it. + #[must_use] + pub fn peek_redo(&self) -> Option<&HistoryEntry> { + self.redo_stack.last() + } + + /// Returns all entries in the undo stack (oldest first). + #[must_use] + pub fn undo_entries(&self) -> &[HistoryEntry] { + &self.undo_stack + } + + /// Returns all entries in the redo stack (oldest first). + #[must_use] + pub fn redo_entries(&self) -> &[HistoryEntry] { + &self.redo_stack + } + + /// Clears all history. + pub fn clear(&mut self) { + self.undo_stack.clear(); + self.redo_stack.clear(); + } + + /// Clears the redo stack only. + pub fn clear_redo(&mut self) { + self.redo_stack.clear(); + } +} + +#[cfg(test)] +mod tests { + use nvisy_document::{InsertContent, RegionId}; + + use super::*; + + fn make_entry() -> HistoryEntry { + let region = RegionId::new(); + HistoryEntry::new( + EditOperation::delete(region), + EditOperation::insert_after(region, InsertContent::text("original")), + ) + } + + #[test] + fn test_empty_history() { + let history = EditHistory::new(); + assert!(!history.can_undo()); + assert!(!history.can_redo()); + } + + #[test] + fn test_record_and_undo() { + let mut history = EditHistory::new(); + + history.record(make_entry()); + assert!(history.can_undo()); + assert!(!history.can_redo()); + + let entry = history.pop_undo(); + assert!(entry.is_some()); + assert!(!history.can_undo()); + assert!(history.can_redo()); + } + + #[test] + fn test_redo() { + let mut history = EditHistory::new(); + + history.record(make_entry()); + history.pop_undo(); + + assert!(history.can_redo()); + + let entry = history.pop_redo(); + assert!(entry.is_some()); + assert!(history.can_undo()); + assert!(!history.can_redo()); + } + + #[test] + fn test_new_record_clears_redo() { + let mut history = EditHistory::new(); + + history.record(make_entry()); + history.pop_undo(); + assert!(history.can_redo()); + + history.record(make_entry()); + assert!(!history.can_redo()); + } + + #[test] + fn test_unlimited_entries() { + let mut history = EditHistory::new(); + + for _ in 0..1000 { + history.record(make_entry()); + } + + assert_eq!(history.undo_count(), 1000); + } +} diff --git a/crates/nvisy-engine/src/session/mod.rs b/crates/nvisy-engine/src/session/mod.rs new file mode 100644 index 0000000..ccdb983 --- /dev/null +++ b/crates/nvisy-engine/src/session/mod.rs @@ -0,0 +1,405 @@ +//! Document editing sessions. +//! +//! An `EditSession` wraps a document and provides: +//! - Stable region IDs across edits +//! - Undo/redo support +//! - Operation validation +//! - Streaming/pagination for large documents + +mod history; + +use std::collections::HashMap; +use std::num::NonZeroU32; + +use bytes::Bytes; +pub use history::{EditHistory, HistoryEntry}; +use jiff::Timestamp; +use nvisy_document::{ + Capabilities, EditOperation, EditResult, EditableDocument, Error, PageOptions, Region, + RegionId, RegionStatus, Result, +}; +use uuid::Uuid; + +/// Unique identifier for an edit session. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct SessionId(Uuid); + +impl SessionId { + /// Creates a new session ID. + #[must_use] + pub fn new() -> Self { + Self(Uuid::new_v4()) + } + + /// Returns the underlying UUID. + #[must_use] + pub fn as_uuid(&self) -> Uuid { + self.0 + } +} + +impl Default for SessionId { + fn default() -> Self { + Self::new() + } +} + +impl std::fmt::Display for SessionId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "session_{}", &self.0.to_string()[..8]) + } +} + +/// Configuration for an edit session. +#[derive(Debug, Clone)] +pub struct SessionConfig { + /// Whether to auto-extract regions on load. + pub auto_extract_regions: bool, + + /// Page batch size for streaming. + pub page_batch_size: u32, + + /// Whether to validate operations before applying. + pub validate_operations: bool, +} + +impl Default for SessionConfig { + fn default() -> Self { + Self { + auto_extract_regions: true, + page_batch_size: 10, + validate_operations: true, + } + } +} + +/// An edit session for a document. +/// +/// Sessions provide stable region IDs, undo/redo, and streaming support. +/// The session is generic over the document type `D`. +pub struct EditSession { + /// Unique session identifier. + id: SessionId, + + /// The underlying document (must support editing). + document: D, + + /// Format capabilities. + capabilities: Capabilities, + + /// Edit history for undo/redo. + history: EditHistory, + + /// Session configuration. + config: SessionConfig, + + /// When the session was created. + created_at: Timestamp, + + /// Region cache for quick lookup. + region_cache: HashMap, + + /// Pages that have been loaded (for lazy loading). + loaded_pages: Vec, + + /// Total number of pages in the document. + total_pages: Option, +} + +impl EditSession { + /// Creates a new edit session from a loaded document. + #[must_use] + pub fn new(document: D, capabilities: Capabilities, config: SessionConfig) -> Self { + let history = EditHistory::new(); + let total_pages = document.info().page_count; + + let mut region_cache = HashMap::new(); + for region in document.regions() { + region_cache.insert(region.id, region.clone()); + } + + let loaded_pages = if total_pages.is_some() { + document + .regions() + .iter() + .filter_map(|r| r.page.map(NonZeroU32::get)) + .collect::>() + .into_iter() + .collect() + } else { + vec![] + }; + + Self { + id: SessionId::new(), + document, + capabilities, + history, + config, + created_at: Timestamp::now(), + region_cache, + loaded_pages, + total_pages, + } + } + + /// Returns the session ID. + #[must_use] + pub fn id(&self) -> SessionId { + self.id + } + + /// Returns the underlying document. + #[must_use] + pub fn document(&self) -> &D { + &self.document + } + + /// Returns a mutable reference to the underlying document. + pub fn document_mut(&mut self) -> &mut D { + &mut self.document + } + + /// Returns the format capabilities. + #[must_use] + pub fn capabilities(&self) -> &Capabilities { + &self.capabilities + } + + /// Returns when the session was created. + #[must_use] + pub fn created_at(&self) -> Timestamp { + self.created_at + } + + /// Returns the edit history. + #[must_use] + pub fn history(&self) -> &EditHistory { + &self.history + } + + /// Returns whether there are undoable operations. + #[must_use] + pub fn can_undo(&self) -> bool { + self.history.can_undo() + } + + /// Returns whether there are redoable operations. + #[must_use] + pub fn can_redo(&self) -> bool { + self.history.can_redo() + } + + /// Returns all regions (from cache). + #[must_use] + pub fn regions(&self) -> Vec<&Region> { + self.region_cache.values().collect() + } + + /// Returns regions for a specific page. + #[must_use] + pub fn regions_for_page(&self, page: NonZeroU32) -> Vec<&Region> { + self.region_cache + .values() + .filter(|r| r.page == Some(page)) + .collect() + } + + /// Finds a region by ID. + #[must_use] + pub fn find_region(&self, id: RegionId) -> Option<&Region> { + self.region_cache.get(&id) + } + + /// Returns the total number of pages. + #[must_use] + pub fn page_count(&self) -> Option { + self.total_pages + } + + /// Returns which pages have been loaded. + #[must_use] + pub fn loaded_pages(&self) -> &[u32] { + &self.loaded_pages + } + + /// Checks if a page has been loaded. + #[must_use] + pub fn is_page_loaded(&self, page: u32) -> bool { + self.loaded_pages.contains(&page) + } + + /// Validates an operation before applying. + fn validate_operation(&self, operation: &EditOperation) -> Result<()> { + let support = self.capabilities.supports(operation); + if !support.is_supported() { + return Err(Error::operation_not_supported(format!("{operation:?}"))); + } + + for region_id in operation.referenced_regions() { + if !self.region_cache.contains_key(®ion_id) { + return Err(Error::region_not_found(region_id)); + } + } + + for region_id in operation.referenced_regions() { + if let Some(region) = self.region_cache.get(®ion_id) { + if region.effective_status() == RegionStatus::Deleted { + return Err(Error::invalid_operation(format!( + "region {region_id} is deleted" + ))); + } + } + } + + Ok(()) + } + + /// Applies an edit operation. + pub async fn apply(&mut self, operation: EditOperation) -> Result { + if self.config.validate_operations { + self.validate_operation(&operation)?; + } + + let result = self.document.apply(&operation).await?; + + if result.success { + for region in &result.created_regions { + self.region_cache.insert(region.id, region.clone()); + } + + for region in &result.modified_regions { + self.region_cache.insert(region.id, region.clone()); + } + + for id in &result.deleted_region_ids { + if let Some(region) = self.region_cache.get_mut(id) { + region.status = Some(RegionStatus::Deleted); + } + } + + if let Some(reverse) = result.reverse_operation.clone() { + self.history.record(HistoryEntry::new(operation, reverse)); + } + } + + Ok(result) + } + + /// Undoes the most recent operation. + pub async fn undo(&mut self) -> Result> { + let Some(entry) = self.history.pop_undo() else { + return Ok(None); + }; + + let result = self.document.apply(&entry.reverse).await?; + + if result.success { + for region in &result.created_regions { + self.region_cache.insert(region.id, region.clone()); + } + + for region in &result.modified_regions { + self.region_cache.insert(region.id, region.clone()); + } + + for id in &result.deleted_region_ids { + if let Some(region) = self.region_cache.get_mut(id) { + region.status = Some(RegionStatus::Deleted); + } + } + } + + Ok(Some(result)) + } + + /// Redoes the most recently undone operation. + pub async fn redo(&mut self) -> Result> { + let Some(entry) = self.history.pop_redo() else { + return Ok(None); + }; + + let result = self.document.apply(&entry.operation).await?; + + if result.success { + for region in &result.created_regions { + self.region_cache.insert(region.id, region.clone()); + } + + for region in &result.modified_regions { + self.region_cache.insert(region.id, region.clone()); + } + + for id in &result.deleted_region_ids { + if let Some(region) = self.region_cache.get_mut(id) { + region.status = Some(RegionStatus::Deleted); + } + } + } + + Ok(Some(result)) + } + + /// Loads regions for additional pages (streaming support). + pub async fn load_pages(&mut self, start_page: u32, count: u32) -> Result<()> { + let options = PageOptions { + start_page, + page_count: Some(count), + extract_regions: true, + }; + + let regions = self.document.extract_page_regions(&options).await?; + + for region in regions { + if let Some(page) = region.page { + if !self.loaded_pages.contains(&page.get()) { + self.loaded_pages.push(page.get()); + } + } + self.region_cache.insert(region.id, region); + } + + self.loaded_pages.sort_unstable(); + + Ok(()) + } + + /// Serializes the document to bytes. + pub async fn serialize(&self) -> Result { + self.document.serialize().await + } + + /// Returns whether the document has unsaved changes. + #[must_use] + pub fn is_modified(&self) -> bool { + self.document.is_modified() + } + + /// Consumes the session and returns the underlying document. + pub fn into_document(self) -> D { + self.document + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_session_id() { + let id1 = SessionId::new(); + let id2 = SessionId::new(); + assert_ne!(id1, id2); + + let display = format!("{id1}"); + assert!(display.starts_with("session_")); + } + + #[test] + fn test_session_config_default() { + let config = SessionConfig::default(); + assert!(config.auto_extract_regions); + assert_eq!(config.page_batch_size, 10); + assert!(config.validate_operations); + } +} diff --git a/crates/nvisy-pdf/Cargo.toml b/crates/nvisy-pdf/Cargo.toml new file mode 100644 index 0000000..77348cd --- /dev/null +++ b/crates/nvisy-pdf/Cargo.toml @@ -0,0 +1,30 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-pdf" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +description = "PDF document format support for nvisy" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +nvisy-document = { workspace = true } + +async-trait = { workspace = true } +bytes = { workspace = true } +thiserror = { workspace = true } + +[dev-dependencies] diff --git a/crates/nvisy-pdf/README.md b/crates/nvisy-pdf/README.md new file mode 100644 index 0000000..7c2cad4 --- /dev/null +++ b/crates/nvisy-pdf/README.md @@ -0,0 +1,13 @@ +# nvisy-pdf + +PDF document format support for nvisy. + +This crate provides a `DocumentFormat` implementation for PDF files (.pdf). + +## Status + +This crate is currently a stub. PDF parsing and manipulation are not yet implemented. + +## License + +MIT diff --git a/crates/nvisy-pdf/src/document.rs b/crates/nvisy-pdf/src/document.rs new file mode 100644 index 0000000..dc0638b --- /dev/null +++ b/crates/nvisy-pdf/src/document.rs @@ -0,0 +1,77 @@ +//! PDF document implementation. + +use async_trait::async_trait; +use bytes::Bytes; +use nvisy_document::{ + Document, DocumentInfo, EditOperation, EditResult, EditableDocument, Error, PageOptions, + Region, RegionId, Result, +}; + +/// A loaded PDF document. +#[derive(Debug)] +pub struct PdfDocument { + info: DocumentInfo, + regions: Vec, + modified: bool, +} + +impl PdfDocument { + /// Creates a new PDF document (internal use). + #[must_use] + #[allow(dead_code)] // Will be used when load() is implemented + pub(crate) fn new(info: DocumentInfo) -> Self { + Self { + info, + regions: Vec::new(), + modified: false, + } + } +} + +#[async_trait] +impl Document for PdfDocument { + fn info(&self) -> &DocumentInfo { + &self.info + } + + fn regions(&self) -> &[Region] { + &self.regions + } + + fn regions_for_page(&self, page: u32) -> Vec<&Region> { + self.regions + .iter() + .filter(|r| r.page.map(|p| p.get()) == Some(page)) + .collect() + } + + fn find_region(&self, id: RegionId) -> Option<&Region> { + self.regions.iter().find(|r| r.id == id) + } + + async fn serialize(&self) -> Result { + // TODO: Implement PDF serialization + Err(Error::unsupported_format( + "PDF serialization not yet implemented", + )) + } +} + +#[async_trait] +impl EditableDocument for PdfDocument { + async fn apply(&mut self, _operation: &EditOperation) -> Result { + // TODO: Implement PDF editing + Err(Error::unsupported_format("PDF editing not yet implemented")) + } + + fn is_modified(&self) -> bool { + self.modified + } + + async fn extract_page_regions(&mut self, _options: &PageOptions) -> Result> { + // TODO: Implement page region extraction + Err(Error::unsupported_format( + "PDF page extraction not yet implemented", + )) + } +} diff --git a/crates/nvisy-pdf/src/format.rs b/crates/nvisy-pdf/src/format.rs new file mode 100644 index 0000000..f48345a --- /dev/null +++ b/crates/nvisy-pdf/src/format.rs @@ -0,0 +1,67 @@ +//! PDF format handler implementation. + +use bytes::Bytes; +use nvisy_document::{Capabilities, DocumentFormat, Error, Result}; + +use crate::PdfDocument; + +/// PDF document format handler. +#[derive(Debug, Clone, Default)] +pub struct PdfFormat { + capabilities: Capabilities, +} + +impl PdfFormat { + /// Creates a new PDF format handler. + #[must_use] + pub fn new() -> Self { + Self { + capabilities: Capabilities::read_only(), + } + } +} + +impl DocumentFormat for PdfFormat { + type Document = PdfDocument; + + fn name(&self) -> &'static str { + "pdf" + } + + fn mime_types(&self) -> &'static [&'static str] { + &["application/pdf"] + } + + fn extensions(&self) -> &'static [&'static str] { + &["pdf"] + } + + fn capabilities(&self) -> &Capabilities { + &self.capabilities + } + + async fn load(&self, _data: Bytes) -> Result { + // TODO: Implement PDF loading + Err(Error::unsupported_format("PDF loading not yet implemented")) + } + + async fn create_empty(&self) -> Result { + // TODO: Implement empty PDF creation + Err(Error::unsupported_format( + "PDF creation not yet implemented", + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_metadata() { + let format = PdfFormat::new(); + assert_eq!(format.name(), "pdf"); + assert!(format.mime_types().contains(&"application/pdf")); + assert!(format.extensions().contains(&"pdf")); + } +} diff --git a/crates/nvisy-pdf/src/lib.rs b/crates/nvisy-pdf/src/lib.rs new file mode 100644 index 0000000..5011638 --- /dev/null +++ b/crates/nvisy-pdf/src/lib.rs @@ -0,0 +1,21 @@ +//! PDF document format support for nvisy. +//! +//! This crate provides a `DocumentFormat` implementation for PDF files (.pdf). +//! +//! # Example +//! +//! ```ignore +//! use nvisy_pdf::PdfFormat; +//! use nvisy_engine::Engine; +//! +//! let engine = Engine::new().with_pdf(PdfFormat::new()); +//! ``` + +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] + +mod document; +mod format; + +pub use document::PdfDocument; +pub use format::PdfFormat; diff --git a/crates/nvisy-text/Cargo.toml b/crates/nvisy-text/Cargo.toml new file mode 100644 index 0000000..d653a15 --- /dev/null +++ b/crates/nvisy-text/Cargo.toml @@ -0,0 +1,30 @@ +# https://doc.rust-lang.org/cargo/reference/manifest.html + +[package] +name = "nvisy-text" +version = { workspace = true } +rust-version = { workspace = true } +edition = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +readme = "./README.md" + +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +documentation = { workspace = true } + +description = "Plain text document format support for nvisy" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + +[dependencies] +nvisy-document = { workspace = true } + +async-trait = { workspace = true } +bytes = { workspace = true } +thiserror = { workspace = true } + +[dev-dependencies] diff --git a/crates/nvisy-text/README.md b/crates/nvisy-text/README.md new file mode 100644 index 0000000..f7b701a --- /dev/null +++ b/crates/nvisy-text/README.md @@ -0,0 +1,13 @@ +# nvisy-text + +Plain text document format support for nvisy. + +This crate provides a `DocumentFormat` implementation for plain text files (.txt, .md, .rst, etc.). + +## Status + +This crate is currently a stub. Text document handling is not yet fully implemented. + +## License + +MIT diff --git a/crates/nvisy-text/src/document.rs b/crates/nvisy-text/src/document.rs new file mode 100644 index 0000000..baf041b --- /dev/null +++ b/crates/nvisy-text/src/document.rs @@ -0,0 +1,79 @@ +//! Plain text document implementation. + +use async_trait::async_trait; +use bytes::Bytes; +use nvisy_document::{ + Document, DocumentInfo, EditOperation, EditResult, EditableDocument, Error, PageOptions, + Region, RegionId, Result, +}; + +/// A loaded plain text document. +#[derive(Debug)] +pub struct TextDocument { + info: DocumentInfo, + regions: Vec, + modified: bool, +} + +impl TextDocument { + /// Creates a new text document (internal use). + #[must_use] + #[allow(dead_code)] // Will be used when load() is implemented + pub(crate) fn new(info: DocumentInfo) -> Self { + Self { + info, + regions: Vec::new(), + modified: false, + } + } +} + +#[async_trait] +impl Document for TextDocument { + fn info(&self) -> &DocumentInfo { + &self.info + } + + fn regions(&self) -> &[Region] { + &self.regions + } + + fn regions_for_page(&self, page: u32) -> Vec<&Region> { + self.regions + .iter() + .filter(|r| r.page.map(|p| p.get()) == Some(page)) + .collect() + } + + fn find_region(&self, id: RegionId) -> Option<&Region> { + self.regions.iter().find(|r| r.id == id) + } + + async fn serialize(&self) -> Result { + // TODO: Implement text serialization + Err(Error::unsupported_format( + "Text serialization not yet implemented", + )) + } +} + +#[async_trait] +impl EditableDocument for TextDocument { + async fn apply(&mut self, _operation: &EditOperation) -> Result { + // TODO: Implement text editing + Err(Error::unsupported_format( + "Text editing not yet implemented", + )) + } + + fn is_modified(&self) -> bool { + self.modified + } + + async fn extract_page_regions(&mut self, _options: &PageOptions) -> Result> { + // TODO: Implement page region extraction + Err(Error::unsupported_format( + "Text page extraction not yet implemented", + )) + } +} diff --git a/crates/nvisy-text/src/format.rs b/crates/nvisy-text/src/format.rs new file mode 100644 index 0000000..5458c74 --- /dev/null +++ b/crates/nvisy-text/src/format.rs @@ -0,0 +1,70 @@ +//! Plain text format handler implementation. + +use bytes::Bytes; +use nvisy_document::{Capabilities, DocumentFormat, Error, Result}; + +use crate::TextDocument; + +/// Plain text document format handler. +#[derive(Debug, Clone, Default)] +pub struct TextFormat { + capabilities: Capabilities, +} + +impl TextFormat { + /// Creates a new plain text format handler. + #[must_use] + pub fn new() -> Self { + Self { + capabilities: Capabilities::read_only(), + } + } +} + +impl DocumentFormat for TextFormat { + type Document = TextDocument; + + fn name(&self) -> &'static str { + "text" + } + + fn mime_types(&self) -> &'static [&'static str] { + &["text/plain", "text/markdown", "text/x-rst"] + } + + fn extensions(&self) -> &'static [&'static str] { + &["txt", "md", "markdown", "rst", "text"] + } + + fn capabilities(&self) -> &Capabilities { + &self.capabilities + } + + async fn load(&self, _data: Bytes) -> Result { + // TODO: Implement text loading + Err(Error::unsupported_format( + "Text loading not yet implemented", + )) + } + + async fn create_empty(&self) -> Result { + // TODO: Implement empty text document creation + Err(Error::unsupported_format( + "Text creation not yet implemented", + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_format_metadata() { + let format = TextFormat::new(); + assert_eq!(format.name(), "text"); + assert!(format.mime_types().contains(&"text/plain")); + assert!(format.extensions().contains(&"txt")); + assert!(format.extensions().contains(&"md")); + } +} diff --git a/crates/nvisy-text/src/lib.rs b/crates/nvisy-text/src/lib.rs new file mode 100644 index 0000000..5c5f5c4 --- /dev/null +++ b/crates/nvisy-text/src/lib.rs @@ -0,0 +1,22 @@ +//! Plain text document format support for nvisy. +//! +//! This crate provides a `DocumentFormat` implementation for plain text +//! files (.txt, .md, .rst, etc.). +//! +//! # Example +//! +//! ```ignore +//! use nvisy_text::TextFormat; +//! use nvisy_engine::Engine; +//! +//! let engine = Engine::new().with_text(TextFormat::new()); +//! ``` + +#![forbid(unsafe_code)] +#![cfg_attr(docsrs, feature(doc_cfg))] + +mod document; +mod format; + +pub use document::TextDocument; +pub use format::TextFormat; diff --git a/deny.toml b/deny.toml new file mode 100644 index 0000000..2d25701 --- /dev/null +++ b/deny.toml @@ -0,0 +1,80 @@ +# Configuration for cargo-deny +# See: https://embarkstudios.github.io/cargo-deny/ + +[graph] +targets = [ + { triple = "x86_64-unknown-linux-gnu" }, + { triple = "x86_64-unknown-linux-musl" }, + { triple = "x86_64-apple-darwin" }, + { triple = "aarch64-apple-darwin" }, + { triple = "x86_64-pc-windows-msvc" }, +] + +[advisories] +# The path where the advisory database is cloned/fetched into +db-path = "~/.cargo/advisory-db" +# The url(s) of the advisory databases to use +db-urls = ["https://github.com/rustsec/advisory-db"] +# The lint level for unmaintained crates +unmaintained = "all" +# The lint level for crates that have been yanked from their source registry +yanked = "deny" +# A list of advisory IDs to ignore +ignore = [] + +[licenses] +# Confidence threshold for detecting a license from a license text (higher = stricter) +confidence-threshold = 0.9 +# Private licenses are not allowed +private = { ignore = false, registries = [] } +# Warn if an allowed license is not used in the dependency graph +unused-allowed-license = "warn" + +# List of explicitly allowed licenses (single licenses only) +allow = [ + "MIT", + "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", + "BSD-2-Clause", + "BSD-3-Clause", + "ISC", + "Unicode-3.0", + "Unlicense", + "BSL-1.0", + "CC0-1.0", + "Zlib", + "OpenSSL", + "bzip2-1.0.6", + "CDLA-Permissive-2.0", +] + +# For compound licenses, we'll be permissive and only block if they contain denied licenses +exceptions = [] + +[bans] +# Lint level for when multiple versions of the same crate are detected +multiple-versions = "warn" +# Lint level for when a crate version requirement is `*` +wildcards = "deny" +# The graph highlighting used when creating dotgraphs for crates with multiple versions +highlight = "all" + +# List of crates that are allowed +allow = [] + +# List of crates to deny +deny = [] + +# Skip checking certain crates that are known to have complex but acceptable licensing +skip = [] +skip-tree = [] + +[sources] +# Lint level for what to happen when a crate from a crate registry that is not in the allow list +unknown-registry = "deny" +# Lint level for what to happen when a crate from a git repository that is not in the allow list +unknown-git = "deny" +# List of URLs for allowed crate registries +allow-registry = ["https://github.com/rust-lang/crates.io-index"] +# List of URLs for allowed Git repositories +allow-git = [] diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..12a6950 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,6 @@ +# https://rust-lang.github.io/rustfmt + +group_imports = "StdExternalCrate" +imports_granularity = "Module" +reorder_impl_items = true +merge_derives = false