From 1a9490cf606d4eb466feeb5e04a602ad5f653a3f Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Fri, 16 Jan 2026 06:24:21 +0100
Subject: [PATCH 1/5] feat: add TextExtractor and TableExtractor traits to
 nvisy-document

- Add TextExtractor trait for native text extraction from documents
  - extract_text() returns ExtractedText with raw, by-page, by-region text
  - extract_text_for_page() for single page extraction
  - needs_ocr() heuristic check for scanned documents

- Add TableExtractor trait for table extraction and normalization
  - extract_tables() returns Vec<NormalizedTable>
  - NormalizedTable, NormalizedRow, NormalizedCell types
  - CellDataType inference (Text, Number, Date, Boolean, Formula, Empty)

- Add prelude.rs to nvisy-archive crate

- Refactor nvisy-core:
  - ContentData now supports Bytes or HipStr via ContentBytes enum
  - Content struct wraps ContentData + optional ContentMetadata
  - Remove from_file_extension/common_extensions from ContentKind
  - Move extension-to-kind mapping to nvisy-archive

- Add runtime documentation:
  - docs/README.md - overview and crate structure
  - docs/PIPELINE.md - processing stages with pseudocode
  - docs/DATATYPES.md - core data structures
---
 .github/workflows/build.yml                   |   3 +-
 .github/workflows/security.yml                |   3 +-
 Cargo.lock                                    |   3 +
 Cargo.toml                                    |   5 +-
 crates/nvisy-archive/Cargo.toml               |   4 +
 crates/nvisy-archive/README.md                |  11 +-
 crates/nvisy-archive/src/file/archive_type.rs |  29 +-
 crates/nvisy-archive/src/file/mod.rs          | 112 ++-
 crates/nvisy-archive/src/handler/mod.rs       | 233 ++++++-
 crates/nvisy-archive/src/lib.rs               |  81 +--
 crates/nvisy-archive/src/prelude.rs           |  12 +
 crates/nvisy-core/Cargo.toml                  |   1 +
 crates/nvisy-core/README.md                   |  37 +-
 crates/nvisy-core/src/error/error_source.rs   |  37 +-
 crates/nvisy-core/src/error/error_type.rs     |  27 +-
 crates/nvisy-core/src/error/mod.rs            | 193 +++---
 crates/nvisy-core/src/fs/content_file.rs      |  27 +-
 crates/nvisy-core/src/fs/content_kind.rs      | 110 +--
 crates/nvisy-core/src/fs/content_metadata.rs  |  29 +-
 crates/nvisy-core/src/fs/data_sensitivity.rs  | 108 +--
 .../nvisy-core/src/fs/data_structure_kind.rs  | 130 ----
 crates/nvisy-core/src/fs/mod.rs               |  79 +--
 crates/nvisy-core/src/io/content.rs           | 291 +++++---
 crates/nvisy-core/src/io/content_data.rs      | 378 +++++++++--
 crates/nvisy-core/src/io/content_read.rs      |   8 +-
 crates/nvisy-core/src/io/data_reference.rs    |  38 +-
 crates/nvisy-core/src/io/mod.rs               |   2 +-
 crates/nvisy-core/src/lib.rs                  |  18 -
 crates/nvisy-core/src/prelude.rs              |   4 +-
 crates/nvisy-document/README.md               |  44 +-
 crates/nvisy-document/src/lib.rs              |  39 +-
 crates/nvisy-document/src/table/mod.rs        |  86 +++
 crates/nvisy-document/src/table/types.rs      | 446 ++++++++++++
 crates/nvisy-document/src/text/mod.rs         |  69 ++
 crates/nvisy-document/src/text/types.rs       | 162 +++++
 crates/nvisy-docx/src/lib.rs                  |  16 +-
 crates/nvisy-pdf/src/lib.rs                   |  14 +-
 crates/nvisy-text/src/lib.rs                  |  15 +-
 docs/DATATYPES.md                             | 445 ++++++++++++
 docs/PIPELINE.md                              | 635 ++++++++++++++++++
 docs/README.md                                |  31 +
 41 files changed, 3002 insertions(+), 1013 deletions(-)
 create mode 100644 crates/nvisy-archive/src/prelude.rs
 delete mode 100644 crates/nvisy-core/src/fs/data_structure_kind.rs
 create mode 100644 crates/nvisy-document/src/table/mod.rs
 create mode 100644 crates/nvisy-document/src/table/types.rs
 create mode 100644 crates/nvisy-document/src/text/mod.rs
 create mode 100644 crates/nvisy-document/src/text/types.rs
 create mode 100644 docs/DATATYPES.md
 create mode 100644 docs/PIPELINE.md
 create mode 100644 docs/README.md
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 8a068ef..d4f3796 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -2,7 +2,8 @@ name: Build
 
 on:
   push:
-    branches: [main, release]
+    branches:
+      - "main"
     paths:
       - "crates/**"
       - "Cargo.toml"
diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml
index 835a818..1e84721 100644
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -2,7 +2,8 @@ name: Security
 
 on:
   push:
-    branches: [main, release]
+    branches:
+      - "main"
     paths:
       - "crates/**"
       - "Cargo.toml"
diff --git a/Cargo.lock b/Cargo.lock
index 33d3ba1..0fc610e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -614,10 +614,12 @@ dependencies = [
 name = "nvisy-archive"
 version = "0.1.0"
 dependencies = [
+ "bytes",
  "bzip2",
  "flate2",
  "nvisy-core",
  "sevenz-rust",
+ "strum",
  "tar",
  "tempfile",
  "tokio",
@@ -631,6 +633,7 @@ name = "nvisy-core"
 version = "0.1.0"
 dependencies = [
  "bytes",
+ "derive_more",
  "hex",
  "hipstr",
  "jiff",
diff --git a/Cargo.toml b/Cargo.toml
index 8092a32..c81f9cc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,8 +14,8 @@ members = [
 
 [workspace.package]
 version = "0.1.0"
-rust-version = "1.89"
-edition = "2021"
+rust-version = "1.92"
+edition = "2024"
 license = "MIT"
 publish = false
 
@@ -76,6 +76,7 @@ isolang = { version = "2.4", default-features = false, features = ["english_name
 
 # Text processing and pattern matching
 regex = { version = "1.11", default-features = false, features = [] }
+regex-lite = { version = "0.1", default-features = false, features = ["std"] }
 fancy-regex = { version = "0.16", default-features = false, features = [] }
 aho-corasick = { version = "1.1", default-features = false, features = [] }
 unicode-segmentation = { version = "1.10", default-features = false, features = [] }
diff --git a/crates/nvisy-archive/Cargo.toml b/crates/nvisy-archive/Cargo.toml
index 38742ae..706468b 100644
--- a/crates/nvisy-archive/Cargo.toml
+++ b/crates/nvisy-archive/Cargo.toml
@@ -28,6 +28,10 @@ xz = ["dep:xz2"]
 [dependencies]
 # Core dependencies
 nvisy-core = { workspace = true }
+bytes = { workspace = true }
+
+# Utilities
+strum = { workspace = true, features = ["derive"] }
 
 # Async and I/O
 tokio = { workspace = true, features = ["fs", "io-util", "rt"] }
diff --git a/crates/nvisy-archive/README.md b/crates/nvisy-archive/README.md
index 05cdbf7..fce88ab 100644
--- a/crates/nvisy-archive/README.md
+++ b/crates/nvisy-archive/README.md
@@ -6,6 +6,15 @@ Archive handling and compression library for the Nvisy runtime.
 
 ## Features
 
+- `zip` - ZIP archive support (enabled by default)
+- `tar` - TAR archive support (enabled by default)
+- `sevenz` - 7z archive support
+- `gzip` - GZIP compression support (enabled by default)
+- `bzip2` - BZIP2 compression support (enabled by default)
+- `xz` - XZ/LZMA compression support (enabled by default)
+
+## Capabilities
+
 - **Multiple Formats** - ZIP, TAR, TAR.GZ, TAR.BZ2, TAR.XZ, GZIP, BZIP2, and XZ
 - **Async Operations** - Full async/await support with Tokio
 - **Flexible Loading** - Load from file paths, memory, or byte streams
@@ -13,7 +22,7 @@ Archive handling and compression library for the Nvisy runtime.
 - **Memory Efficient** - Stream-based processing for large archives
 - **Cross-Platform** - Works on Windows, macOS, and Linux
 
-## Key Dependencies
+## Dependencies
 
 - `tokio` - Async runtime for I/O operations
 - `tar` - TAR archive format support
diff --git a/crates/nvisy-archive/src/file/archive_type.rs b/crates/nvisy-archive/src/file/archive_type.rs
index 2ccda40..fdcaa58 100644
--- a/crates/nvisy-archive/src/file/archive_type.rs
+++ b/crates/nvisy-archive/src/file/archive_type.rs
@@ -4,7 +4,8 @@
 //! and provides utilities for working with archive types.
 
 use std::ffi::OsStr;
-use std::fmt;
+
+use strum::{AsRefStr, Display, EnumIter, EnumString};
 
 /// Supported archive types
 ///
@@ -12,24 +13,34 @@ use std::fmt;
 /// It provides methods to determine the archive type from file extensions
 /// and to get the supported extensions for each type.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+#[derive(AsRefStr, Display, EnumIter, EnumString)]
 pub enum ArchiveType {
     /// ZIP archive format
+    #[strum(serialize = "ZIP")]
     Zip,
     /// TAR archive format (uncompressed)
+    #[strum(serialize = "TAR")]
     Tar,
     /// GZIP compressed TAR archive
+    #[strum(serialize = "TAR.GZ")]
     TarGz,
     /// BZIP2 compressed TAR archive
+    #[strum(serialize = "TAR.BZ2")]
     TarBz2,
     /// XZ compressed TAR archive
+    #[strum(serialize = "TAR.XZ")]
     TarXz,
     /// GZIP compression (single file)
+    #[strum(serialize = "GZIP")]
     Gz,
     /// BZIP2 compression (single file)
+    #[strum(serialize = "BZIP2")]
     Bz2,
     /// XZ compression (single file)
+    #[strum(serialize = "XZ")]
     Xz,
     /// 7-Zip archive format
+    #[strum(serialize = "7Z")]
     SevenZ,
 }
 
@@ -127,22 +138,6 @@ impl ArchiveType {
     }
 }
 
-impl fmt::Display for ArchiveType {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Self::Zip => write!(f, "ZIP"),
-            Self::Tar => write!(f, "TAR"),
-            Self::TarGz => write!(f, "TAR.GZ"),
-            Self::TarBz2 => write!(f, "TAR.BZ2"),
-            Self::TarXz => write!(f, "TAR.XZ"),
-            Self::Gz => write!(f, "GZIP"),
-            Self::Bz2 => write!(f, "BZIP2"),
-            Self::Xz => write!(f, "XZ"),
-            Self::SevenZ => write!(f, "7Z"),
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/crates/nvisy-archive/src/file/mod.rs b/crates/nvisy-archive/src/file/mod.rs
index a1abe1c..c2eeef9 100644
--- a/crates/nvisy-archive/src/file/mod.rs
+++ b/crates/nvisy-archive/src/file/mod.rs
@@ -10,22 +10,27 @@ use std::io::Cursor;
 use std::path::{Path, PathBuf};
 
 pub use archive_type::ArchiveType;
+use bytes::Bytes;
 use tempfile::TempDir;
 use tokio::fs;
 
 use crate::handler::ArchiveHandler;
 #[cfg(feature = "zip")]
 use crate::ZipResultExt;
-use crate::{ArchiveErrorExt, Error, Result};
+use crate::{ArchiveErrorExt, ContentData, ContentSource, Error, Result};
 
 /// Represents an archive file that can be loaded from various sources
 ///
 /// This struct encapsulates an archive and provides methods for
 /// extracting its contents to a temporary directory for processing.
+/// It integrates with nvisy-core's `ContentData` and `ContentSource`
+/// for content tracking and integrity verification.
 #[derive(Debug)]
 pub struct ArchiveFile {
+    /// Unique identifier for this archive content
+    content_source: ContentSource,
     /// Type of archive
-    pub archive_type: ArchiveType,
+    archive_type: ArchiveType,
     /// Source data for the archive
     source: ArchiveSource,
 }
@@ -35,16 +40,15 @@ pub struct ArchiveFile {
 enum ArchiveSource {
     /// Archive loaded from a file path
     Path(PathBuf),
-    /// Archive loaded from memory
-    Memory(Vec<u8>),
-    /// Archive loaded from an iterator
-    Iterator(Vec<u8>),
+    /// Archive loaded from ContentData (memory with metadata)
+    ContentData(ContentData),
 }
 
 impl ArchiveFile {
     /// Create a new archive file from a file path
     ///
     /// The archive type is automatically detected from the file extension.
+    /// A new `ContentSource` is generated to track this archive.
     ///
     /// # Example
     ///
@@ -82,45 +86,51 @@ impl ArchiveFile {
         .ok_or_else(|| Error::unsupported_format(extension.to_string_lossy().to_string()))?;
 
         Ok(Self {
+            content_source: ContentSource::new(),
             archive_type,
             source: ArchiveSource::Path(path.to_path_buf()),
         })
     }
 
-    /// Create a new archive file from memory with explicit archive type
+    /// Create a new archive file from ContentData
+    ///
+    /// This preserves the content source from the provided ContentData,
+    /// maintaining content lineage tracking.
     ///
     /// # Example
     ///
     /// ```
-    /// use nvisy_archive::{ArchiveFile, ArchiveType};
+    /// use nvisy_archive::{ArchiveFile, ArchiveType, ContentData};
     ///
-    /// let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature
-    /// let archive = ArchiveFile::from_memory(ArchiveType::Zip, data);
+    /// let data = ContentData::from(vec![0x50, 0x4B, 0x03, 0x04]); // ZIP signature
+    /// let archive = ArchiveFile::from_content_data(ArchiveType::Zip, data);
     /// ```
-    pub fn from_memory(archive_type: ArchiveType, data: Vec<u8>) -> Self {
+    pub fn from_content_data(archive_type: ArchiveType, content_data: ContentData) -> Self {
         Self {
+            content_source: content_data.content_source,
             archive_type,
-            source: ArchiveSource::Memory(data),
+            source: ArchiveSource::ContentData(content_data),
         }
     }
 
-    /// Create a new archive file from an iterator of bytes
+    /// Create a new archive file from raw bytes with explicit archive type
     ///
-    /// The iterator will be consumed immediately and stored in memory.
+    /// A new `ContentSource` is generated to track this archive.
     ///
     /// # Example
     ///
     /// ```
     /// use nvisy_archive::{ArchiveFile, ArchiveType};
     ///
-    /// let data = [0x50, 0x4B, 0x03, 0x04]; // ZIP signature
-    /// let archive = ArchiveFile::from_iterator(ArchiveType::Zip, data.into_iter());
+    /// let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature
+    /// let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data);
     /// ```
-    pub fn from_iterator(archive_type: ArchiveType, data: impl Iterator<Item = u8>) -> Self {
-        let data: Vec<u8> = data.collect();
+    pub fn from_bytes(archive_type: ArchiveType, data: impl Into<Bytes>) -> Self {
+        let content_data = ContentData::from(data.into());
         Self {
+            content_source: content_data.content_source,
             archive_type,
-            source: ArchiveSource::Iterator(data),
+            source: ArchiveSource::ContentData(content_data),
         }
     }
 
@@ -130,6 +140,11 @@ impl ArchiveFile {
         self
     }
 
+    /// Get the content source identifier for this archive
+    pub fn content_source(&self) -> ContentSource {
+        self.content_source
+    }
+
     /// Get the archive type
     pub fn archive_type(&self) -> ArchiveType {
         self.archive_type
@@ -139,7 +154,7 @@ impl ArchiveFile {
     pub async fn exists(&self) -> bool {
         match &self.source {
             ArchiveSource::Path(path) => fs::try_exists(path).await.unwrap_or(false),
-            ArchiveSource::Memory(_) | ArchiveSource::Iterator(_) => true,
+            ArchiveSource::ContentData(_) => true,
         }
     }
 
@@ -147,18 +162,33 @@ impl ArchiveFile {
     pub fn path(&self) -> Option<&Path> {
         match &self.source {
             ArchiveSource::Path(path) => Some(path),
-            _ => None,
+            ArchiveSource::ContentData(_) => None,
         }
     }
 
-    /// Get the size of the archive data
+    /// Get the size of the archive data in bytes
     pub async fn size(&self) -> Result<u64> {
         match &self.source {
             ArchiveSource::Path(path) => {
                 let metadata = fs::metadata(path).await?;
                 Ok(metadata.len())
             }
-            ArchiveSource::Memory(data) | ArchiveSource::Iterator(data) => Ok(data.len() as u64),
+            ArchiveSource::ContentData(data) => Ok(data.size() as u64),
+        }
+    }
+
+    /// Get the SHA256 hash of the archive content
+    ///
+    /// For file-based archives, this reads the file first.
+    /// For memory-based archives, the hash is computed lazily.
+    pub async fn sha256(&self) -> Result<String> {
+        match &self.source {
+            ArchiveSource::Path(path) => {
+                let data = fs::read(path).await?;
+                let content_data = ContentData::from(data);
+                Ok(content_data.sha256_hex())
+            }
+            ArchiveSource::ContentData(data) => Ok(data.sha256_hex()),
         }
     }
 
@@ -198,14 +228,15 @@ impl ArchiveFile {
             Error::invalid_archive(format!("Failed to create temporary directory: {}", e))
         })?;
 
-        // Get archive data as bytes
-        let data = self.get_data().await?;
-        let cursor = Cursor::new(data);
+        // Get archive data as ContentData
+        let content_data = self.get_content_data().await?;
+        let cursor = Cursor::new(content_data.as_bytes().to_vec());
 
         // Extract based on archive type
         let files = self.extract_archive(cursor, temp_dir.path()).await?;
 
         Ok(ArchiveHandler::new(
+            self.content_source,
             self.archive_type,
             self.path().map(|p| p.to_path_buf()),
             temp_dir,
@@ -213,11 +244,14 @@ impl ArchiveFile {
         ))
     }
 
-    /// Get the archive data as bytes
-    async fn get_data(&self) -> Result<Vec<u8>> {
+    /// Get the archive data as ContentData
+    async fn get_content_data(&self) -> Result<ContentData> {
         match &self.source {
-            ArchiveSource::Path(path) => fs::read(path).await.map_err(Into::into),
-            ArchiveSource::Memory(data) | ArchiveSource::Iterator(data) => Ok(data.clone()),
+            ArchiveSource::Path(path) => {
+                let data = fs::read(path).await?;
+                Ok(ContentData::new(self.content_source, data.into()))
+            }
+            ArchiveSource::ContentData(data) => Ok(data.clone()),
         }
     }
 
@@ -597,18 +631,22 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_archive_file_from_memory() {
+    fn test_archive_file_from_bytes() {
         let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature
-        let archive = ArchiveFile::from_memory(ArchiveType::Zip, data);
+        let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data);
         assert_eq!(archive.archive_type(), ArchiveType::Zip);
         assert!(archive.path().is_none());
+        // Content source should be valid
+        assert!(!archive.content_source().as_uuid().is_nil());
     }
 
     #[test]
-    fn test_archive_file_from_iterator() {
-        let data = [0x50, 0x4B, 0x03, 0x04]; // ZIP signature
-        let archive = ArchiveFile::from_iterator(ArchiveType::Zip, data.into_iter());
-        assert_eq!(archive.archive_type(), ArchiveType::Zip);
+    fn test_archive_file_from_content_data() {
+        let content_data = ContentData::from(vec![0x50, 0x4B, 0x03, 0x04]);
+        let original_source = content_data.content_source;
+        let archive = ArchiveFile::from_content_data(ArchiveType::Zip, content_data);
+        // Should preserve the original content source
+        assert_eq!(archive.content_source(), original_source);
     }
 
     #[test]
@@ -635,7 +673,7 @@ mod tests {
     #[tokio::test]
     async fn test_memory_size() {
         let data = vec![1, 2, 3, 4, 5];
-        let archive = ArchiveFile::from_memory(ArchiveType::Zip, data);
+        let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data);
         assert_eq!(archive.size().await.unwrap(), 5);
     }
 }
diff --git a/crates/nvisy-archive/src/handler/mod.rs b/crates/nvisy-archive/src/handler/mod.rs
index 40a8398..ef3415b 100644
--- a/crates/nvisy-archive/src/handler/mod.rs
+++ b/crates/nvisy-archive/src/handler/mod.rs
@@ -15,7 +15,39 @@ pub use tar_handler::{TarArchiveBuilder, TarArchiveHandler, TarDirectoryBuilder,
 use tempfile::TempDir;
 pub use zip_handler::{ZipArchiveBuilder, ZipArchiveHandler, ZipDirectoryBuilder, ZipEntryInfo};
 
-use crate::{ArchiveErrorExt, ArchiveType, Error, Result};
+use crate::{
+    ArchiveErrorExt, ArchiveType, ContentKind, ContentMetadata, ContentSource, Error, Result,
+};
+
+/// Detect content kind from file extension
+///
+/// This function maps common file extensions to their content kind categories.
+fn content_kind_from_extension(extension: &str) -> ContentKind {
+    let ext = extension.to_lowercase();
+    match ext.as_str() {
+        // Text formats
+        "txt" | "text" | "md" | "markdown" | "rst" | "xml" | "json" | "yaml" | "yml" | "toml"
+        | "ini" | "cfg" | "conf" | "log" => ContentKind::Text,
+
+        // Document formats
+        "pdf" | "doc" | "docx" | "rtf" | "odt" | "pages" => ContentKind::Document,
+
+        // Spreadsheet formats
+        "csv" | "tsv" | "xls" | "xlsx" | "ods" | "numbers" => ContentKind::Spreadsheet,
+
+        // Image formats
+        "jpg" | "jpeg" | "png" | "gif" | "bmp" | "svg" | "webp" | "ico" | "tiff" | "tif" => {
+            ContentKind::Image
+        }
+
+        // Archive formats
+        "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "tgz" | "tbz2" | "txz" => {
+            ContentKind::Archive
+        }
+
+        _ => ContentKind::Unknown,
+    }
+}
 
 /// Handler for unpacked archive contents
 ///
@@ -24,6 +56,8 @@ use crate::{ArchiveErrorExt, ArchiveType, Error, Result};
 /// and repacking the archive.
 #[derive(Debug)]
 pub struct ArchiveHandler {
+    /// Content source identifier for the original archive
+    pub content_source: ContentSource,
     /// Type of the original archive
     pub archive_type: ArchiveType,
     /// Original archive file path (if loaded from file)
@@ -39,12 +73,14 @@ impl ArchiveHandler {
     ///
     /// This is typically called internally by `ArchiveFile::unpack()`.
     pub fn new(
+        content_source: ContentSource,
         archive_type: ArchiveType,
         original_path: Option<PathBuf>,
         temp_dir: TempDir,
         files: Vec<PathBuf>,
     ) -> Self {
         Self {
+            content_source,
             archive_type,
             original_path,
             temp_dir,
@@ -87,6 +123,44 @@ impl ArchiveHandler {
         })
     }
 
+    /// Find files matching a specific content kind
+    pub fn find_files_by_kind(&self, kind: ContentKind) -> Vec<&PathBuf> {
+        self.find_files(|path| self.content_kind_for_path(path) == kind)
+    }
+
+    /// Get the content kind for a file path based on its extension
+    pub fn content_kind_for_path(&self, path: &Path) -> ContentKind {
+        path.extension()
+            .and_then(|ext| ext.to_str())
+            .map(content_kind_from_extension)
+            .unwrap_or_default()
+    }
+
+    /// Create content metadata for a file using its relative path within the archive
+    ///
+    /// The returned metadata has a new ContentSource (derived from the archive's source)
+    /// and includes the relative path within the archive.
+    pub fn content_metadata_for_file(&self, relative_path: impl AsRef<Path>) -> ContentMetadata {
+        ContentMetadata::with_path(ContentSource::new(), relative_path.as_ref())
+    }
+
+    /// Get content metadata for all files in the archive
+    ///
+    /// Returns a list of ContentMetadata entries for each extracted file,
+    /// using relative paths within the archive.
+    pub fn all_content_metadata(&self) -> Result<Vec<ContentMetadata>> {
+        let temp_path = self.temp_path();
+        self.files
+            .iter()
+            .map(|path| {
+                let relative = path
+                    .strip_prefix(temp_path)
+                    .map_err(|e| Error::invalid_archive(format!("Invalid file path: {}", e)))?;
+                Ok(ContentMetadata::with_path(ContentSource::new(), relative))
+            })
+            .collect()
+    }
+
     /// Get all files recursively in the temporary directory
     pub fn refresh_file_list(&mut self) -> Result<()> {
         self.files = Self::scan_files(self.temp_path())?;
@@ -281,8 +355,10 @@ mod tests {
     fn test_archive_handler_creation() {
         let temp_dir = TempDir::new().unwrap();
         let files = vec![PathBuf::from("test.txt")];
+        let content_source = ContentSource::new();
 
         let handler = ArchiveHandler::new(
+            content_source,
             ArchiveType::Zip,
             Some(PathBuf::from("test.zip")),
             temp_dir,
@@ -299,7 +375,13 @@ mod tests {
         let temp_dir = TempDir::new().unwrap();
         let files = vec![];
 
-        let handler = ArchiveHandler::new(ArchiveType::Zip, None, temp_dir, files);
+        let handler = ArchiveHandler::new(
+            ContentSource::new(),
+            ArchiveType::Zip,
+            None,
+            temp_dir,
+            files,
+        );
 
         assert_eq!(handler.file_count(), 0);
         assert!(handler.is_empty());
@@ -314,7 +396,13 @@ mod tests {
             PathBuf::from("image.png"),
         ];
 
-        let handler = ArchiveHandler::new(ArchiveType::Zip, None, temp_dir, files);
+        let handler = ArchiveHandler::new(
+            ContentSource::new(),
+            ArchiveType::Zip,
+            None,
+            temp_dir,
+            files,
+        );
 
         let txt_files = handler.find_files_by_extension("txt");
         assert_eq!(txt_files.len(), 1);
@@ -328,7 +416,13 @@ mod tests {
         let temp_dir = TempDir::new().unwrap();
         let files = vec![PathBuf::from("file1.txt"), PathBuf::from("file2.txt")];
 
-        let handler = ArchiveHandler::new(ArchiveType::Zip, None, temp_dir, files.clone());
+        let handler = ArchiveHandler::new(
+            ContentSource::new(),
+            ArchiveType::Zip,
+            None,
+            temp_dir,
+            files,
+        );
 
         let collected: Vec<&PathBuf> = (&handler).into_iter().collect();
         assert_eq!(collected.len(), 2);
@@ -337,7 +431,13 @@ mod tests {
     #[tokio::test]
     async fn test_write_and_read_file() {
         let temp_dir = TempDir::new().unwrap();
-        let mut handler = ArchiveHandler::new(ArchiveType::Zip, None, temp_dir, vec![]);
+        let mut handler = ArchiveHandler::new(
+            ContentSource::new(),
+            ArchiveType::Zip,
+            None,
+            temp_dir,
+            vec![],
+        );
 
         let content = b"Hello, World!";
         handler.write_file("test.txt", content).await.unwrap();
@@ -346,4 +446,127 @@ mod tests {
         let read_content = handler.read_file("test.txt").await.unwrap();
         assert_eq!(read_content, content);
     }
+
+    #[test]
+    fn test_find_files_by_kind() {
+        let temp_dir = TempDir::new().unwrap();
+        let files = vec![
+            PathBuf::from("document.pdf"),
+            PathBuf::from("data.csv"),
+            PathBuf::from("image.png"),
+            PathBuf::from("archive.zip"),
+            PathBuf::from("notes.txt"),
+        ];
+
+        let handler = ArchiveHandler::new(
+            ContentSource::new(),
+            ArchiveType::Zip,
+            None,
+            temp_dir,
+            files,
+        );
+
+        let docs = handler.find_files_by_kind(ContentKind::Document);
+        assert_eq!(docs.len(), 1);
+        assert!(docs[0].to_string_lossy().contains("document.pdf"));
+
+        let spreadsheets = handler.find_files_by_kind(ContentKind::Spreadsheet);
+        assert_eq!(spreadsheets.len(), 1);
+        assert!(spreadsheets[0].to_string_lossy().contains("data.csv"));
+
+        let images = handler.find_files_by_kind(ContentKind::Image);
+        assert_eq!(images.len(), 1);
+
+        let text = handler.find_files_by_kind(ContentKind::Text);
+        assert_eq!(text.len(), 1);
+    }
+
+    #[test]
+    fn test_content_kind_for_path() {
+        let temp_dir = TempDir::new().unwrap();
+        let handler = ArchiveHandler::new(
+            ContentSource::new(),
+            ArchiveType::Zip,
+            None,
+            temp_dir,
+            vec![],
+        );
+
+        assert_eq!(
+            handler.content_kind_for_path(Path::new("test.pdf")),
+            ContentKind::Document
+        );
+        assert_eq!(
+            handler.content_kind_for_path(Path::new("data.csv")),
+            ContentKind::Spreadsheet
+        );
+        assert_eq!(
+            handler.content_kind_for_path(Path::new("image.png")),
+            ContentKind::Image
+        );
+        assert_eq!(
+            handler.content_kind_for_path(Path::new("notes.txt")),
+            ContentKind::Text
+        );
+        assert_eq!(
+            handler.content_kind_for_path(Path::new("archive.zip")),
+            ContentKind::Archive
+        );
+        assert_eq!(
+            handler.content_kind_for_path(Path::new("no_extension")),
+            ContentKind::Unknown
+        );
+    }
+
+    #[test]
+    fn test_content_metadata_for_file() {
+        let temp_dir = TempDir::new().unwrap();
+        let handler = ArchiveHandler::new(
+            ContentSource::new(),
+            ArchiveType::Zip,
+            None,
+            temp_dir,
+            vec![],
+        );
+
+        let metadata = handler.content_metadata_for_file("docs/report.pdf");
+        assert_eq!(metadata.filename(), Some("report.pdf"));
+        assert_eq!(metadata.file_extension(), Some("pdf"));
+        assert!(!metadata.content_source.as_uuid().is_nil());
+    }
+
+    #[tokio::test]
+    async fn test_all_content_metadata() {
+        let temp_dir = TempDir::new().unwrap();
+        let temp_path = temp_dir.path().to_path_buf();
+
+        // Create actual files in temp dir
+        let file1 = temp_path.join("doc.pdf");
+        let file2 = temp_path.join("data.csv");
+        tokio::fs::write(&file1, b"pdf content").await.unwrap();
+        tokio::fs::write(&file2, b"csv content").await.unwrap();
+
+        let files = vec![file1, file2];
+        let handler = ArchiveHandler::new(
+            ContentSource::new(),
+            ArchiveType::Zip,
+            None,
+            temp_dir,
+            files,
+        );
+
+        let metadata_list = handler.all_content_metadata().unwrap();
+        assert_eq!(metadata_list.len(), 2);
+
+        // Check that each metadata has the correct relative path
+        let filenames: Vec<_> = metadata_list.iter().filter_map(|m| m.filename()).collect();
+        assert!(filenames.contains(&"doc.pdf"));
+        assert!(filenames.contains(&"data.csv"));
+
+        // Each should have a unique content source
+        assert_ne!(
+            metadata_list[0].content_source,
+            metadata_list[1].content_source
+        );
+    }
 }
diff --git a/crates/nvisy-archive/src/lib.rs b/crates/nvisy-archive/src/lib.rs
index 8fc23af..8f2d86c 100644
--- a/crates/nvisy-archive/src/lib.rs
+++ b/crates/nvisy-archive/src/lib.rs
@@ -1,29 +1,20 @@
-//! Archive handling library for nvisy
-//!
-//! This crate provides functionality for working with various archive formats
-//! including ZIP, TAR, 7z, and other compressed archive types. It supports both
-//! reading from files and memory, with flexible loading options.
-//!
-//! # Features
-//!
-//! - `zip` - ZIP archive support (enabled by default)
-//! - `tar` - TAR archive support (enabled by default)
-//! - `sevenz` - 7z archive support
-//! - `gzip` - GZIP compression support (enabled by default)
-//! - `bzip2` - BZIP2 compression support (enabled by default)
-//! - `xz` - XZ/LZMA compression support (enabled by default)
-
 #![forbid(unsafe_code)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
 
 pub mod file;
 pub mod handler;
+pub mod prelude;
 
 // Re-exports for convenience
 pub use file::{ArchiveFile, ArchiveType};
 pub use handler::ArchiveHandler;
-// Re-export error types from nvisy-core
+
+// Re-export core types used in archive operations
 pub use nvisy_core::error::{Error, ErrorResource, ErrorType, Result};
+pub use nvisy_core::fs::{ContentKind, ContentMetadata};
+pub use nvisy_core::io::ContentData;
+pub use nvisy_core::path::ContentSource;
 
 /// Extension trait for creating archive-specific errors
 pub trait ArchiveErrorExt {
@@ -48,51 +39,39 @@ pub trait ArchiveErrorExt {
 
 impl ArchiveErrorExt for Error {
     fn unsupported_format(format: impl Into<String>) -> Error {
-        Error::new(
-            ErrorType::Runtime,
-            ErrorResource::Archive,
-            format!("Unsupported archive format: {}", format.into()),
-        )
+        Error::new(format!("Unsupported archive format: {}", format.into()))
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Archive)
     }
 
     fn invalid_archive(message: impl Into<String>) -> Error {
-        Error::new(
-            ErrorType::Runtime,
-            ErrorResource::Archive,
-            format!("Invalid archive: {}", message.into()),
-        )
+        Error::new(format!("Invalid archive: {}", message.into()))
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Archive)
     }
 
     fn entry_not_found(name: impl Into<String>) -> Error {
-        Error::new(
-            ErrorType::Runtime,
-            ErrorResource::Archive,
-            format!("Entry not found: {}", name.into()),
-        )
+        Error::new(format!("Entry not found: {}", name.into()))
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Archive)
     }
 
     fn archive_permission_denied(message: impl Into<String>) -> Error {
-        Error::new(
-            ErrorType::Runtime,
-            ErrorResource::Archive,
-            format!("Permission denied: {}", message.into()),
-        )
+        Error::new(format!("Permission denied: {}", message.into()))
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Archive)
     }
 
     fn corrupted(message: impl Into<String>) -> Error {
-        Error::new(
-            ErrorType::Runtime,
-            ErrorResource::Archive,
-            format!("Corrupted archive: {}", message.into()),
-        )
+        Error::new(format!("Corrupted archive: {}", message.into()))
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Archive)
     }
 
     fn archive_resource_limit(message: impl Into<String>) -> Error {
-        Error::new(
-            ErrorType::Runtime,
-            ErrorResource::Archive,
-            format!("Resource limit exceeded: {}", message.into()),
-        )
+        Error::new(format!("Resource limit exceeded: {}", message.into()))
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Archive)
     }
 }
 
@@ -106,12 +85,9 @@ pub trait ZipErrorExt {
 #[cfg(feature = "zip")]
 impl ZipErrorExt for zip::result::ZipError {
     fn into_archive_error(self) -> Error {
-        Error::from_source(
-            ErrorType::Runtime,
-            ErrorResource::Archive,
-            "ZIP operation failed",
-            self,
-        )
+        Error::from_source("ZIP operation failed", self)
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Archive)
     }
 }
 
@@ -135,7 +111,6 @@ mod tests {
 
     #[test]
     fn test_error_creation() {
-        // Test archive-specific error constructors from ArchiveErrorExt trait
         let error = <Error as ArchiveErrorExt>::unsupported_format("custom");
         assert_eq!(error.resource, ErrorResource::Archive);
 
diff --git a/crates/nvisy-archive/src/prelude.rs b/crates/nvisy-archive/src/prelude.rs
new file mode 100644
index 0000000..35ecb8c
--- /dev/null
+++ b/crates/nvisy-archive/src/prelude.rs
@@ -0,0 +1,12 @@
+//! Prelude module for commonly used types.
+//!
+//! This module re-exports the most commonly used types from this crate.
+//! It is intended to be glob-imported for convenience.
+
+// Archive types
+pub use crate::file::{ArchiveFile, ArchiveType};
+pub use crate::handler::ArchiveHandler;
+// Error handling
+pub use crate::{ArchiveErrorExt, Error, ErrorResource, ErrorType, Result};
+// Core types re-exported for convenience
+pub use crate::{ContentData, ContentKind, ContentMetadata, ContentSource};
diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml
index 13130f3..390a46a 100644
--- a/crates/nvisy-core/Cargo.toml
+++ b/crates/nvisy-core/Cargo.toml
@@ -36,6 +36,7 @@ serde = { workspace = true, features = ["derive"] }
 
 # Utilities
 strum = { workspace = true, features = ["derive"] }
+derive_more = { workspace = true, features = ["as_ref", "deref"] }
 
 # Error handling (moved from nvisy-error crate)
 thiserror = { workspace = true, features = ["std"] }
diff --git a/crates/nvisy-core/README.md b/crates/nvisy-core/README.md
index 524b07b..68059c0 100644
--- a/crates/nvisy-core/README.md
+++ b/crates/nvisy-core/README.md
@@ -9,8 +9,20 @@ processing system.
 ## Overview
 
 This crate provides the foundational building blocks for the Nvisy ecosystem,
-including data processing primitives, structured error handling, and component
-health monitoring.
+including data processing primitives, structured error handling, and content
+tracking.
+
+## Core Types
+
+- [`fs::DataSensitivity`] - Sensitivity levels for risk assessment
+- [`fs::ContentFile`] - File operations with content tracking
+- [`fs::ContentKind`] - Classification of content types by file extension
+- [`fs::ContentMetadata`] - Metadata information for content files
+- [`io::Content`] - Content types and data structures
+- [`io::ContentData`] - Container for content data with metadata
+- [`io::DataReference`] - Data references with source tracking
+- [`path::ContentSource`] - UUIDv7-based content source identification
+- [`error::Error`] - Structured error handling with source classification
 
 ## Features
 
@@ -19,30 +31,23 @@ health monitoring.
 - **Content Management** - Unified content structures with SHA256 hashing and
   metadata
 - **File Operations** - Async file handling with content source tracking
-- **Data Classification** - Sensitivity levels and structure type classification
+- **Data Classification** - Sensitivity levels for risk assessment
 - **Format Detection** - Automatic content kind detection from file extensions
 - **I/O Abstractions** - Modern async traits for content reading and writing
 - **Zero-Copy Operations** - Efficient data handling using `bytes::Bytes`
 
-### Error Handling & Monitoring
+### Error Handling
 
 - **Structured Errors** - Rich error types with source classification and
   context tracking
-- **Component Health** - Health status monitoring with operational state
-  tracking
-- **Status Reporting** - Comprehensive status information with severity levels
-- **Component Trait** - Standardized interface for component health checks
+- **Builder Pattern** - Fluent API with `with_type()`, `with_resource()`,
+  `with_source()`, and `with_context()` methods
 - **Result Types** - Ergonomic error handling with custom `Result<T>` type
 
-## Feature Flags
-
-- `serde` - Enable serialization/deserialization support for all types using
-  serde. This allows converting structs to/from JSON, YAML, and other formats.
-- `jiff` - Enable timestamp support using the jiff datetime library. This adds
-  timestamp fields to `ComponentStatus` and time-based operations.
-
 ## Dependencies
 
 - `tokio` - Async runtime for I/O operations
 - `bytes` - Zero-copy byte buffer management
-- `uuid` - Unique identifiers with v7 support
+- `uuid` - Unique identifiers with UUIDv7 support
+- `jiff` - Timestamp support for content source tracking
+- `strum` - Derive macros for enums
diff --git a/crates/nvisy-core/src/error/error_source.rs b/crates/nvisy-core/src/error/error_source.rs
index 8839fa9..71a5719 100644
--- a/crates/nvisy-core/src/error/error_source.rs
+++ b/crates/nvisy-core/src/error/error_source.rs
@@ -1,15 +1,19 @@
 use serde::{Deserialize, Serialize};
-use strum::{AsRefStr, Display};
+use strum::{AsRefStr, Display, EnumString};
 
 /// System component sources where errors can originate.
 ///
 /// This enum identifies the subsystem or component that generated an error,
 /// enabling better error categorization and handling across the nvisy ecosystem.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, AsRefStr, Display)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
+#[derive(AsRefStr, Display, EnumString)]
 #[derive(Serialize, Deserialize)]
 #[strum(serialize_all = "snake_case")]
 #[serde(rename_all = "snake_case")]
 pub enum ErrorResource {
+    /// Unknown or unspecified component.
+    #[default]
+    Unknown,
     /// Core framework and foundational components.
     Core,
     /// Execution engine and processing components.
@@ -48,22 +52,31 @@ impl ErrorResource {
     #[must_use]
     pub const fn priority_level(&self) -> u8 {
         match self {
-            Self::Core => 6, // Highest priority
+            Self::Core => 6,
             Self::Engine => 5,
             Self::Document | Self::Archive => 4,
             Self::Pattern => 3,
             Self::Runtime => 2,
-            Self::Gateway => 1, // Lowest priority
+            Self::Gateway => 1,
+            Self::Unknown => 0,
         }
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use std::str::FromStr;
+
     use super::*;
 
+    #[test]
+    fn test_default() {
+        assert_eq!(ErrorResource::default(), ErrorResource::Unknown);
+    }
+
     #[test]
     fn test_string_representations() {
+        assert_eq!(ErrorResource::Unknown.as_ref(), "unknown");
         assert_eq!(ErrorResource::Core.as_ref(), "core");
         assert_eq!(ErrorResource::Engine.as_ref(), "engine");
         assert_eq!(ErrorResource::Document.as_ref(), "document");
@@ -73,6 +86,19 @@ mod tests {
         assert_eq!(ErrorResource::Gateway.as_ref(), "gateway");
     }
 
+    #[test]
+    fn test_from_str() {
+        assert_eq!(
+            ErrorResource::from_str("core").unwrap(),
+            ErrorResource::Core
+        );
+        assert_eq!(
+            ErrorResource::from_str("engine").unwrap(),
+            ErrorResource::Engine
+        );
+        assert!(ErrorResource::from_str("invalid").is_err());
+    }
+
     #[test]
     fn test_priority_levels() {
         assert_eq!(ErrorResource::Core.priority_level(), 6);
@@ -82,6 +108,7 @@ mod tests {
         assert_eq!(ErrorResource::Pattern.priority_level(), 3);
         assert_eq!(ErrorResource::Runtime.priority_level(), 2);
         assert_eq!(ErrorResource::Gateway.priority_level(), 1);
+        assert_eq!(ErrorResource::Unknown.priority_level(), 0);
     }
 
     #[test]
@@ -93,5 +120,7 @@ mod tests {
         assert!(ErrorResource::Archive.is_internal());
         assert!(ErrorResource::Runtime.is_external());
         assert!(ErrorResource::Gateway.is_external());
+        assert!(!ErrorResource::Unknown.is_internal());
+        assert!(!ErrorResource::Unknown.is_external());
     }
 }
diff --git a/crates/nvisy-core/src/error/error_type.rs b/crates/nvisy-core/src/error/error_type.rs
index a1f6073..f9a740c 100644
--- a/crates/nvisy-core/src/error/error_type.rs
+++ b/crates/nvisy-core/src/error/error_type.rs
@@ -1,8 +1,9 @@
 use serde::{Deserialize, Serialize};
-use strum::{AsRefStr, Display};
+use strum::{AsRefStr, Display, EnumString};
 
 /// Classification of error types by their operational domain.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, AsRefStr, Display)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
+#[derive(AsRefStr, Display, EnumString)]
 #[derive(Serialize, Deserialize)]
 #[strum(serialize_all = "snake_case")]
 #[serde(rename_all = "snake_case")]
@@ -12,21 +13,37 @@ pub enum ErrorType {
     /// Execution-time operational failures.
     Runtime,
     /// Internal system logic or state failures.
+    #[default]
     Other,
 }
 
 impl ErrorType {
-    /// Check if this error type is typically recoverable
+    /// Check if this error type is typically recoverable.
     #[must_use]
-    pub fn is_recoverable(&self) -> bool {
-        matches!(self, ErrorType::Runtime)
+    pub const fn is_recoverable(&self) -> bool {
+        matches!(self, Self::Runtime)
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use std::str::FromStr;
+
     use super::*;
 
+    #[test]
+    fn test_default() {
+        assert_eq!(ErrorType::default(), ErrorType::Other);
+    }
+
+    #[test]
+    fn test_from_str() {
+        assert_eq!(ErrorType::from_str("config").unwrap(), ErrorType::Config);
+        assert_eq!(ErrorType::from_str("runtime").unwrap(), ErrorType::Runtime);
+        assert_eq!(ErrorType::from_str("other").unwrap(), ErrorType::Other);
+        assert!(ErrorType::from_str("invalid").is_err());
+    }
+
     #[test]
     fn test_recoverability() {
         assert!(ErrorType::Runtime.is_recoverable());
diff --git a/crates/nvisy-core/src/error/mod.rs b/crates/nvisy-core/src/error/mod.rs
index 62cb82f..26dca6a 100644
--- a/crates/nvisy-core/src/error/mod.rs
+++ b/crates/nvisy-core/src/error/mod.rs
@@ -1,7 +1,4 @@
 //! Structured error handling for the nvisy ecosystem.
-//!
-//! This module provides structured error handling with source classification and context tracking
-//! that can be reused across all nvisy crates.
 
 use std::fmt;
 
@@ -20,6 +17,17 @@ pub type BoxError = Box<dyn std::error::Error + Send + Sync>;
 ///
 /// This error type is designed to be used across the entire nvisy ecosystem,
 /// providing consistent error handling with classification and context.
+///
+/// # Example
+///
+/// ```
+/// use nvisy_core::error::{Error, ErrorType, ErrorResource};
+///
+/// let error = Error::new("Something went wrong")
+///     .with_type(ErrorType::Runtime)
+///     .with_resource(ErrorResource::Engine)
+///     .with_context("during document processing");
+/// ```
 #[must_use]
 #[derive(Debug)]
 pub struct Error {
@@ -29,7 +37,6 @@ pub struct Error {
     pub resource: ErrorResource,
     /// Primary error message.
     pub message: HipStr<'static>,
-
     /// Underlying source error, if any.
     source: Option<BoxError>,
     /// Additional context information.
@@ -40,34 +47,31 @@ pub struct Error {
 pub type Result<T> = std::result::Result<T, Error>;
 
 impl Error {
-    /// Creates a new error with the specified type, source, and message.
-    pub fn new(
-        etype: ErrorType,
-        resource: ErrorResource,
-        message: impl Into<HipStr<'static>>,
-    ) -> Self {
+    /// Creates a new error with the given message.
+    ///
+    /// The error type defaults to `ErrorType::Other` and
+    /// resource defaults to `ErrorResource::Unknown`.
+    pub fn new(message: impl Into<HipStr<'static>>) -> Self {
         Self {
-            etype,
-            resource,
+            etype: ErrorType::default(),
+            resource: ErrorResource::default(),
+            message: message.into(),
             source: None,
             context: None,
-            message: message.into(),
         }
     }
 
-    /// Creates a new error with the specified type, source, message, and source error.
-    pub fn from_source(
-        etype: ErrorType,
-        resource: ErrorResource,
-        message: impl Into<HipStr<'static>>,
-        source: impl Into<BoxError>,
-    ) -> Self {
+    /// Creates a new error from a source error.
+    ///
+    /// The error type defaults to `ErrorType::Other` and
+    /// resource defaults to `ErrorResource::Unknown`.
+    pub fn from_source(message: impl Into<HipStr<'static>>, source: impl Into<BoxError>) -> Self {
         Self {
-            etype,
-            resource,
+            etype: ErrorType::default(),
+            resource: ErrorResource::default(),
+            message: message.into(),
             source: Some(source.into()),
             context: None,
-            message: message.into(),
         }
     }
 
@@ -106,66 +110,23 @@ impl Error {
     pub fn is_recoverable(&self) -> bool {
         self.etype.is_recoverable()
     }
+}
 
-    /// Returns the display message for the error.
-    fn display_message(&self) -> String {
-        let mut parts = Vec::new();
-
-        parts.push(format!(
-            "[{}:{}]",
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "[{}:{}] {}",
             self.resource.as_ref(),
-            self.etype.as_ref()
-        ));
-        parts.push(self.message.to_string());
+            self.etype.as_ref(),
+            self.message
+        )?;
 
         if let Some(ref context) = self.context {
-            parts.push(format!("(context: {context})"));
+            write!(f, " (context: {context})")?;
         }
 
-        parts.join(" ")
-    }
-
-    // Convenience constructors for common error patterns
-
-    /// Creates a runtime error.
-    pub fn runtime(resource: ErrorResource, message: impl Into<HipStr<'static>>) -> Self {
-        Self::new(ErrorType::Runtime, resource, message)
-    }
-
-    /// Creates a configuration error.
-    pub fn config(resource: ErrorResource, message: impl Into<HipStr<'static>>) -> Self {
-        Self::new(ErrorType::Config, resource, message)
-    }
-
-    /// Creates an unsupported format error.
-    pub fn unsupported_format(message: impl Into<HipStr<'static>>) -> Self {
-        Self::new(ErrorType::Runtime, ErrorResource::Core, message)
-    }
-
-    /// Creates an invalid input error.
-    pub fn invalid_input(message: impl Into<HipStr<'static>>) -> Self {
-        Self::new(ErrorType::Runtime, ErrorResource::Core, message)
-    }
-
-    /// Creates a not found error.
-    pub fn not_found(message: impl Into<HipStr<'static>>) -> Self {
-        Self::new(ErrorType::Runtime, ErrorResource::Core, message)
-    }
-
-    /// Creates a permission denied error.
-    pub fn permission_denied(message: impl Into<HipStr<'static>>) -> Self {
-        Self::new(ErrorType::Runtime, ErrorResource::Core, message)
-    }
-
-    /// Creates a resource limit exceeded error.
-    pub fn resource_limit(message: impl Into<HipStr<'static>>) -> Self {
-        Self::new(ErrorType::Runtime, ErrorResource::Core, message)
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "{}", self.display_message())
+        Ok(())
     }
 }
 
@@ -179,34 +140,25 @@ impl std::error::Error for Error {
 
 impl From<std::io::Error> for Error {
     fn from(error: std::io::Error) -> Self {
-        Self::from_source(
-            ErrorType::Runtime,
-            ErrorResource::Core,
-            "I/O operation failed",
-            error,
-        )
+        Self::from_source("I/O operation failed", error)
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Core)
     }
 }
 
 impl From<std::string::FromUtf8Error> for Error {
     fn from(error: std::string::FromUtf8Error) -> Self {
-        Self::from_source(
-            ErrorType::Runtime,
-            ErrorResource::Core,
-            "Invalid UTF-8 encoding",
-            error,
-        )
+        Self::from_source("Invalid UTF-8 encoding", error)
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Core)
     }
 }
 
 impl From<std::str::Utf8Error> for Error {
     fn from(error: std::str::Utf8Error) -> Self {
-        Self::from_source(
-            ErrorType::Runtime,
-            ErrorResource::Core,
-            "Invalid UTF-8 encoding",
-            error,
-        )
+        Self::from_source("Invalid UTF-8 encoding", error)
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Core)
     }
 }
 
@@ -215,25 +167,32 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_error_builder() {
-        let error = Error::new(ErrorType::Config, ErrorResource::Core, "test message");
-        assert_eq!(error.etype, ErrorType::Config);
-        assert_eq!(error.resource, ErrorResource::Core);
+    fn test_error_new() {
+        let error = Error::new("test message");
+        assert_eq!(error.etype, ErrorType::Other);
+        assert_eq!(error.resource, ErrorResource::Unknown);
         assert_eq!(error.message, "test message");
         assert!(error.source.is_none());
         assert!(error.context.is_none());
     }
 
     #[test]
-    fn test_error_with_context() {
-        let error = Error::new(ErrorType::Other, ErrorResource::Engine, "test")
+    fn test_error_builder_pattern() {
+        let error = Error::new("test message")
+            .with_type(ErrorType::Config)
+            .with_resource(ErrorResource::Engine)
             .with_context("additional context");
+
+        assert_eq!(error.etype, ErrorType::Config);
+        assert_eq!(error.resource, ErrorResource::Engine);
         assert_eq!(error.context.as_deref(), Some("additional context"));
     }
 
     #[test]
     fn test_error_display() {
-        let error = Error::new(ErrorType::Runtime, ErrorResource::Core, "test error")
+        let error = Error::new("test error")
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Core)
             .with_context("additional info");
 
         let display_str = error.to_string();
@@ -255,27 +214,25 @@ mod tests {
     }
 
     #[test]
-    fn test_convenience_constructors() {
-        let runtime_err = Error::runtime(ErrorResource::Engine, "runtime failure");
-        assert_eq!(runtime_err.etype, ErrorType::Runtime);
-        assert_eq!(runtime_err.resource, ErrorResource::Engine);
-
-        let config_err = Error::config(ErrorResource::Core, "config failure");
-        assert_eq!(config_err.etype, ErrorType::Config);
+    fn test_is_recoverable() {
+        let runtime_err = Error::new("test").with_type(ErrorType::Runtime);
+        assert!(runtime_err.is_recoverable());
 
-        let unsupported = Error::unsupported_format("unknown format");
-        assert_eq!(unsupported.etype, ErrorType::Runtime);
+        let config_err = Error::new("test").with_type(ErrorType::Config);
+        assert!(!config_err.is_recoverable());
 
-        let not_found = Error::not_found("file missing");
-        assert_eq!(not_found.etype, ErrorType::Runtime);
+        let other_err = Error::new("test");
+        assert!(!other_err.is_recoverable());
     }
 
     #[test]
-    fn test_is_recoverable() {
-        let runtime_err = Error::runtime(ErrorResource::Core, "test");
-        assert!(runtime_err.is_recoverable());
+    fn test_from_source() {
+        let source = std::io::Error::other("underlying error");
+        let error = Error::from_source("operation failed", source)
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Document);
 
-        let config_err = Error::config(ErrorResource::Core, "test");
-        assert!(!config_err.is_recoverable());
+        assert!(error.source.is_some());
+        assert_eq!(error.resource, ErrorResource::Document);
     }
 }
diff --git a/crates/nvisy-core/src/fs/content_file.rs b/crates/nvisy-core/src/fs/content_file.rs
index a9d102d..86bbd8a 100644
--- a/crates/nvisy-core/src/fs/content_file.rs
+++ b/crates/nvisy-core/src/fs/content_file.rs
@@ -6,11 +6,12 @@
 use std::io;
 use std::path::{Path, PathBuf};
 
+use bytes::Bytes;
 use tokio::fs::{File, OpenOptions};
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt, AsyncWrite, AsyncWriteExt, SeekFrom};
 
 use crate::error::{Error, ErrorResource, ErrorType, Result};
-use crate::fs::{ContentKind, ContentMetadata};
+use crate::fs::ContentMetadata;
 use crate::io::{AsyncContentRead, AsyncContentWrite, ContentData};
 use crate::path::ContentSource;
 
@@ -187,7 +188,7 @@ impl ContentFile {
         let mut buffer = Vec::new();
         self.file.read_to_end(&mut buffer).await?;
 
-        let content_data = ContentData::new(self.content_source, buffer.into());
+        let content_data = ContentData::new(self.content_source, Bytes::from(buffer));
 
         Ok(content_data)
     }
@@ -210,18 +211,18 @@ impl ContentFile {
             }
 
             if total_read + bytes_read > max_size {
-                return Err(Error::new(
-                    ErrorType::Runtime,
-                    ErrorResource::Core,
-                    format!("File size exceeds maximum limit of {max_size} bytes"),
-                ));
+                return Err(Error::new(format!(
+                    "File size exceeds maximum limit of {max_size} bytes"
+                ))
+                .with_type(ErrorType::Runtime)
+                .with_resource(ErrorResource::Core));
             }
 
             buffer.extend_from_slice(&temp_buffer[..bytes_read]);
             total_read += bytes_read;
         }
 
-        let content_data = ContentData::new(self.content_source, buffer.into());
+        let content_data = ContentData::new(self.content_source, Bytes::from(buffer));
 
         Ok(content_data)
     }
@@ -357,13 +358,6 @@ impl ContentFile {
         self.path.extension().and_then(|ext| ext.to_str())
     }
 
-    /// Detect content kind from file extension
-    pub fn detect_content_kind(&self) -> ContentKind {
-        self.extension()
-            .map(ContentKind::from_file_extension)
-            .unwrap_or_default()
-    }
-
     /// Sync all data to disk
     ///
     /// # Errors
@@ -499,13 +493,12 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_content_kind_detection() {
+    async fn test_file_extension() {
         let temp_file = NamedTempFile::new().unwrap();
         let mut path = temp_file.path().to_path_buf();
         path.set_extension("txt");
 
         let content_file = ContentFile::create(&path).await.unwrap();
-        assert_eq!(content_file.detect_content_kind(), ContentKind::Text);
         assert_eq!(content_file.extension(), Some("txt"));
         assert_eq!(
             content_file.filename(),
diff --git a/crates/nvisy-core/src/fs/content_kind.rs b/crates/nvisy-core/src/fs/content_kind.rs
index 0994bf4..288f488 100644
--- a/crates/nvisy-core/src/fs/content_kind.rs
+++ b/crates/nvisy-core/src/fs/content_kind.rs
@@ -1,14 +1,19 @@
 //! Content type classification for different categories of data
 //!
 //! This module provides the [`ContentKind`] enum for classifying content
-//! based on file extensions.
+//! into broad categories. Extension-to-kind mapping is handled by the
+//! engine's format registry.
 
 use serde::{Deserialize, Serialize};
-use strum::{Display, EnumIter, EnumString};
+use strum::{AsRefStr, Display, EnumIter, EnumString};
 
 /// Content type classification for different categories of data
+///
+/// This enum represents high-level content categories without knowledge
+/// of specific file extensions or MIME types. The engine's format registry
+/// handles the mapping from extensions/MIME types to content kinds.
 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
-#[derive(Display, EnumString, EnumIter)]
+#[derive(AsRefStr, Display, EnumString, EnumIter)]
 #[derive(Serialize, Deserialize)]
 #[strum(serialize_all = "lowercase")]
 #[serde(rename_all = "lowercase")]
@@ -29,35 +34,6 @@ pub enum ContentKind {
 }
 
 impl ContentKind {
-    /// Detect content kind from file extension
-    #[must_use]
-    pub fn from_file_extension(extension: &str) -> Self {
-        let ext = extension.to_lowercase();
-        match ext.as_str() {
-            // Text formats
-            "txt" | "text" | "md" | "markdown" | "rst" | "xml" | "json" | "yaml" | "yml"
-            | "toml" | "ini" | "cfg" | "conf" | "log" => Self::Text,
-
-            // Document formats
-            "pdf" | "doc" | "docx" | "rtf" | "odt" | "pages" => Self::Document,
-
-            // Spreadsheet formats
-            "csv" | "tsv" | "xls" | "xlsx" | "ods" | "numbers" => Self::Spreadsheet,
-
-            // Image formats
-            "jpg" | "jpeg" | "png" | "gif" | "bmp" | "svg" | "webp" | "ico" | "tiff" | "tif" => {
-                Self::Image
-            }
-
-            // Archive formats
-            "zip" | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "tgz" | "tbz2" | "txz" => {
-                Self::Archive
-            }
-
-            _ => Self::Unknown,
-        }
-    }
-
     /// Check if this content kind represents text-based content
     #[must_use]
     pub fn is_text_based(&self) -> bool {
@@ -87,54 +63,12 @@ impl ContentKind {
     pub fn is_archive(&self) -> bool {
         matches!(self, Self::Archive)
     }
-
-    /// Get common file extensions for this content kind
-    #[must_use]
-    pub fn common_extensions(&self) -> &'static [&'static str] {
-        match self {
-            Self::Text => &["txt", "md", "json", "xml", "yaml", "toml"],
-            Self::Document => &["pdf", "doc", "docx", "rtf", "odt"],
-            Self::Spreadsheet => &["csv", "xls", "xlsx", "ods"],
-            Self::Image => &["jpg", "jpeg", "png", "gif", "svg", "webp"],
-            Self::Archive => &["zip", "tar", "gz", "7z", "rar"],
-            Self::Unknown => &[],
-        }
-    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
 
-    #[test]
-    fn test_content_kind_from_extension() {
-        assert_eq!(ContentKind::from_file_extension("txt"), ContentKind::Text);
-        assert_eq!(ContentKind::from_file_extension("TXT"), ContentKind::Text);
-        assert_eq!(ContentKind::from_file_extension("json"), ContentKind::Text);
-        assert_eq!(
-            ContentKind::from_file_extension("pdf"),
-            ContentKind::Document
-        );
-        assert_eq!(
-            ContentKind::from_file_extension("csv"),
-            ContentKind::Spreadsheet
-        );
-        assert_eq!(
-            ContentKind::from_file_extension("xlsx"),
-            ContentKind::Spreadsheet
-        );
-        assert_eq!(ContentKind::from_file_extension("png"), ContentKind::Image);
-        assert_eq!(
-            ContentKind::from_file_extension("zip"),
-            ContentKind::Archive
-        );
-        assert_eq!(ContentKind::from_file_extension("7z"), ContentKind::Archive);
-        assert_eq!(
-            ContentKind::from_file_extension("unknown"),
-            ContentKind::Unknown
-        );
-    }
-
     #[test]
     fn test_content_kind_predicates() {
         assert!(ContentKind::Text.is_text_based());
@@ -164,31 +98,21 @@ mod tests {
     }
 
     #[test]
-    fn test_common_extensions() {
-        let text_ext = ContentKind::Text.common_extensions();
-        assert!(text_ext.contains(&"txt"));
-        assert!(text_ext.contains(&"json"));
-
-        let archive_ext = ContentKind::Archive.common_extensions();
-        assert!(archive_ext.contains(&"zip"));
-        assert!(archive_ext.contains(&"7z"));
-
-        let unknown_ext = ContentKind::Unknown.common_extensions();
-        assert!(unknown_ext.is_empty());
+    fn test_content_kind_as_ref() {
+        assert_eq!(ContentKind::Text.as_ref(), "text");
+        assert_eq!(ContentKind::Document.as_ref(), "document");
     }
 
     #[test]
-    fn test_case_insensitive_extension_detection() {
-        assert_eq!(ContentKind::from_file_extension("TXT"), ContentKind::Text);
+    fn test_content_kind_from_str() {
+        use std::str::FromStr;
+
+        assert_eq!(ContentKind::from_str("text").unwrap(), ContentKind::Text);
         assert_eq!(
-            ContentKind::from_file_extension("PDF"),
+            ContentKind::from_str("document").unwrap(),
             ContentKind::Document
         );
-        assert_eq!(ContentKind::from_file_extension("PNG"), ContentKind::Image);
-        assert_eq!(
-            ContentKind::from_file_extension("ZIP"),
-            ContentKind::Archive
-        );
+        assert!(ContentKind::from_str("invalid").is_err());
     }
 
     #[test]
diff --git a/crates/nvisy-core/src/fs/content_metadata.rs b/crates/nvisy-core/src/fs/content_metadata.rs
index 401ed4f..23d01da 100644
--- a/crates/nvisy-core/src/fs/content_metadata.rs
+++ b/crates/nvisy-core/src/fs/content_metadata.rs
@@ -1,21 +1,19 @@
 //! Content metadata for filesystem operations
 //!
 //! This module provides the [`ContentMetadata`] struct for handling metadata
-//! about content files, including paths, content types, and source tracking.
+//! about content files, including paths and source tracking.
 
 use std::path::{Path, PathBuf};
 
 use serde::{Deserialize, Serialize};
 
-use super::ContentKind;
 use crate::path::ContentSource;
 
 /// Metadata associated with content files
 ///
-/// This struct stores metadata about content including its source identifier,
-/// file path, and detected content kind based on file extension.
-#[derive(Debug, Clone, PartialEq, Eq)]
-#[derive(Serialize, Deserialize)]
+/// This struct stores metadata about content including its source identifier
+/// and file path.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct ContentMetadata {
     /// Unique identifier for the content source
     pub content_source: ContentSource,
@@ -70,24 +68,6 @@ impl ContentMetadata {
             .and_then(|ext| ext.to_str())
     }
 
-    /// Detect content kind from file extension
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// use nvisy_core::{fs::{ContentMetadata, ContentKind}, path::ContentSource};
-    /// use std::path::PathBuf;
-    ///
-    /// let source = ContentSource::new();
-    /// let metadata = ContentMetadata::with_path(source, PathBuf::from("image.png"));
-    /// assert_eq!(metadata.content_kind(), ContentKind::Image);
-    /// ```
-    pub fn content_kind(&self) -> ContentKind {
-        self.file_extension()
-            .map(ContentKind::from_file_extension)
-            .unwrap_or_default()
-    }
-
     /// Get the filename if available
     #[must_use]
     pub fn filename(&self) -> Option<&str> {
@@ -157,7 +137,6 @@ mod tests {
         let metadata = ContentMetadata::with_path(source, PathBuf::from("document.pdf"));
 
         assert_eq!(metadata.file_extension(), Some("pdf"));
-        assert_eq!(metadata.content_kind(), ContentKind::Document);
     }
 
     #[test]
diff --git a/crates/nvisy-core/src/fs/data_sensitivity.rs b/crates/nvisy-core/src/fs/data_sensitivity.rs
index 93f636c..b7e1a3b 100644
--- a/crates/nvisy-core/src/fs/data_sensitivity.rs
+++ b/crates/nvisy-core/src/fs/data_sensitivity.rs
@@ -1,7 +1,7 @@
 //! Data sensitivity level classification
 //!
 //! This module provides a systematic way to classify data based on sensitivity
-//! and risk levels for proper handling and compliance requirements.
+//! and risk levels for proper handling.
 
 use serde::{Deserialize, Serialize};
 use strum::{Display, EnumIter, EnumString};
@@ -25,7 +25,6 @@ use strum::{Display, EnumIter, EnumString};
 ///
 /// assert!(high > medium);
 /// assert!(medium > low);
-/// assert!(high.requires_special_handling());
 /// ```
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 #[derive(EnumIter, EnumString, Display, Serialize, Deserialize)]
@@ -62,58 +61,6 @@ impl DataSensitivity {
         *self as u8
     }
 
-    /// Check if this sensitivity level requires special handling
-    #[must_use]
-    pub fn requires_special_handling(&self) -> bool {
-        *self >= DataSensitivity::High
-    }
-
-    /// Check if this sensitivity level requires encryption
-    #[must_use]
-    pub fn requires_encryption(&self) -> bool {
-        *self >= DataSensitivity::Medium
-    }
-
-    /// Check if this sensitivity level requires access logging
-    #[must_use]
-    pub fn requires_access_logging(&self) -> bool {
-        *self >= DataSensitivity::High
-    }
-
-    /// Check if this sensitivity level requires a retention policy
-    #[must_use]
-    pub fn requires_retention_policy(&self) -> bool {
-        *self >= DataSensitivity::Medium
-    }
-
-    /// Check if this sensitivity level requires compliance oversight
-    #[must_use]
-    pub fn requires_compliance_oversight(&self) -> bool {
-        *self >= DataSensitivity::High
-    }
-
-    /// Get the recommended maximum retention period in days (None = indefinite)
-    #[must_use]
-    pub fn max_retention_days(&self) -> Option<u32> {
-        match self {
-            DataSensitivity::None => None,         // Indefinite
-            DataSensitivity::Low => Some(2555),    // ~7 years
-            DataSensitivity::Medium => Some(1095), // 3 years
-            DataSensitivity::High => Some(90),     // 90 days
-        }
-    }
-
-    /// Get all sensitivity levels in ascending order
-    #[must_use]
-    pub fn all() -> Vec<DataSensitivity> {
-        vec![
-            DataSensitivity::None,
-            DataSensitivity::Low,
-            DataSensitivity::Medium,
-            DataSensitivity::High,
-        ]
-    }
-
     /// Create from a numeric level (0-3)
     #[must_use]
     pub fn from_level(level: u8) -> Option<DataSensitivity> {
@@ -161,62 +108,21 @@ mod tests {
     #[test]
     fn test_from_level() {
         assert_eq!(DataSensitivity::from_level(0), Some(DataSensitivity::None));
+        assert_eq!(DataSensitivity::from_level(1), Some(DataSensitivity::Low));
+        assert_eq!(
+            DataSensitivity::from_level(2),
+            Some(DataSensitivity::Medium)
+        );
+        assert_eq!(DataSensitivity::from_level(3), Some(DataSensitivity::High));
         assert_eq!(DataSensitivity::from_level(4), None);
     }
 
-    #[test]
-    fn test_requirements() {
-        let none = DataSensitivity::None;
-        let low = DataSensitivity::Low;
-        let medium = DataSensitivity::Medium;
-        let high = DataSensitivity::High;
-        // Special handling
-        assert!(!none.requires_special_handling());
-        assert!(!low.requires_special_handling());
-        assert!(!medium.requires_special_handling());
-        assert!(high.requires_special_handling());
-
-        // Encryption
-        assert!(!none.requires_encryption());
-        assert!(!low.requires_encryption());
-        assert!(medium.requires_encryption());
-        assert!(high.requires_encryption());
-
-        // Access logging
-        assert!(!none.requires_access_logging());
-        assert!(!low.requires_access_logging());
-        assert!(!medium.requires_access_logging());
-        assert!(high.requires_access_logging());
-
-        // Compliance oversight
-        assert!(!none.requires_compliance_oversight());
-        assert!(!low.requires_compliance_oversight());
-        assert!(!medium.requires_compliance_oversight());
-        assert!(high.requires_compliance_oversight());
-    }
-
-    #[test]
-    fn test_retention_periods() {
-        assert_eq!(DataSensitivity::None.max_retention_days(), None);
-        assert_eq!(DataSensitivity::Low.max_retention_days(), Some(2555));
-        assert_eq!(DataSensitivity::Medium.max_retention_days(), Some(1095));
-        assert_eq!(DataSensitivity::High.max_retention_days(), Some(90));
-    }
-
     #[test]
     fn test_display() {
         assert_eq!(format!("{}", DataSensitivity::High), "High");
         assert_eq!(format!("{}", DataSensitivity::None), "None");
     }
 
-    #[test]
-    fn test_all_levels() {
-        let all = DataSensitivity::all();
-        assert_eq!(all.len(), 4);
-        assert_eq!(all[0], DataSensitivity::None);
-        assert_eq!(all[3], DataSensitivity::High);
-    }
-
     #[test]
     fn test_serialization() {
         let level = DataSensitivity::High;
diff --git a/crates/nvisy-core/src/fs/data_structure_kind.rs b/crates/nvisy-core/src/fs/data_structure_kind.rs
deleted file mode 100644
index 81562fa..0000000
--- a/crates/nvisy-core/src/fs/data_structure_kind.rs
+++ /dev/null
@@ -1,130 +0,0 @@
-//! Data structure type classification
-//!
-//! This module provides classification for different ways data can be structured,
-//! from highly organized formats to completely unstructured content.
-
-use serde::{Deserialize, Serialize};
-use strum::{EnumIter, EnumString};
-
-use crate::fs::DataSensitivity;
-
-/// Classification of data based on its structural organization
-///
-/// This enum distinguishes between different levels of data organization,
-/// from highly structured formats with defined schemas to completely
-/// unstructured content without predefined organization.
-///
-/// # Examples
-///
-/// ```rust
-/// use nvisy_core::fs::DataStructureKind;
-///
-/// let structured = DataStructureKind::HighlyStructured;
-/// assert!(structured.has_schema());
-///
-/// let unstructured = DataStructureKind::Unstructured;
-/// assert!(!unstructured.has_schema());
-/// ```
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-#[derive(Serialize, Deserialize, EnumIter, EnumString)]
-pub enum DataStructureKind {
-    /// Highly Structured Data
-    ///
-    /// Data with rigid schema, defined relationships, and strict formatting rules.
-    /// Examples: Relational database tables, XML with XSD schema, JSON with JSON Schema.
-    ///
-    /// **Schema**: Required and enforced
-    /// **Queryable**: Highly queryable with structured query languages
-    /// **Parsing**: Predictable parsing with validation
-    HighlyStructured,
-
-    /// Semi-Structured Data
-    ///
-    /// Data with some organizational structure but flexible schema.
-    /// Examples: JSON without strict schema, XML without XSD, CSV files, log files.
-    ///
-    /// **Schema**: Optional or loosely defined
-    /// **Queryable**: Moderately queryable with specialized tools
-    /// **Parsing**: Parseable but may require schema inference
-    SemiStructured,
-
-    /// Unstructured Data
-    ///
-    /// Data without predefined format, schema, or organizational structure.
-    /// Examples: Plain text, images, audio, video, documents, emails.
-    ///
-    /// **Schema**: No schema
-    /// **Queryable**: Requires full-text search or content analysis
-    /// **Parsing**: Content-dependent parsing and analysis
-    Unstructured,
-}
-
-impl DataStructureKind {
-    /// Get the base sensitivity level for this structure type
-    ///
-    /// Note: Actual sensitivity depends on the content, not just the structure
-    #[must_use]
-    pub fn base_sensitivity_level(&self) -> DataSensitivity {
-        match self {
-            // Structure type alone doesn't determine sensitivity
-            // Content analysis is required for actual sensitivity assessment
-            DataStructureKind::HighlyStructured
-            | DataStructureKind::SemiStructured
-            | DataStructureKind::Unstructured => DataSensitivity::Low,
-        }
-    }
-
-    /// Check if this structure type has a defined schema
-    #[must_use]
-    pub fn has_schema(&self) -> bool {
-        matches!(self, DataStructureKind::HighlyStructured)
-    }
-
-    /// Check if this structure type is easily queryable
-    #[must_use]
-    pub fn is_queryable(&self) -> bool {
-        !matches!(self, DataStructureKind::Unstructured)
-    }
-
-    /// Check if parsing is predictable for this structure type
-    #[must_use]
-    pub fn has_predictable_parsing(&self) -> bool {
-        matches!(self, DataStructureKind::HighlyStructured)
-    }
-
-    /// Check if this structure type supports relationship queries
-    #[must_use]
-    pub fn supports_relationships(&self) -> bool {
-        matches!(self, DataStructureKind::HighlyStructured)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_structure_characteristics() {
-        let highly_structured = DataStructureKind::HighlyStructured;
-        assert!(highly_structured.has_schema());
-        assert!(highly_structured.is_queryable());
-        assert!(highly_structured.has_predictable_parsing());
-
-        let unstructured = DataStructureKind::Unstructured;
-        assert!(!unstructured.has_schema());
-        assert!(!unstructured.is_queryable());
-        assert!(!unstructured.has_predictable_parsing());
-
-        let highly_structured = DataStructureKind::HighlyStructured;
-        assert!(highly_structured.supports_relationships());
-        assert!(highly_structured.has_schema());
-    }
-
-    #[test]
-    fn test_serialization() {
-        let structure_type = DataStructureKind::SemiStructured;
-        let json = serde_json::to_string(&structure_type).unwrap();
-        let deserialized: DataStructureKind = serde_json::from_str(&json).unwrap();
-        assert_eq!(structure_type, deserialized);
-    }
-}
diff --git a/crates/nvisy-core/src/fs/mod.rs b/crates/nvisy-core/src/fs/mod.rs
index ab2638f..c6386bd 100644
--- a/crates/nvisy-core/src/fs/mod.rs
+++ b/crates/nvisy-core/src/fs/mod.rs
@@ -6,7 +6,9 @@
 //! # Core Types
 //!
 //! - [`ContentFile`]: A file wrapper that combines filesystem operations with content tracking
-//! - [`ContentFileMetadata`]: Metadata information for content files
+//! - [`ContentMetadata`]: Metadata information for content files
+//! - [`ContentKind`]: Classification of content types by file extension
+//! - [`DataSensitivity`]: Sensitivity levels for risk assessment
 //!
 //! # Example
 //!
@@ -31,84 +33,9 @@ mod content_file;
 mod content_kind;
 mod content_metadata;
 mod data_sensitivity;
-mod data_structure_kind;
-
-use std::path::PathBuf;
 
 // Re-export main types
 pub use content_file::ContentFile;
 pub use content_kind::ContentKind;
 pub use content_metadata::ContentMetadata;
 pub use data_sensitivity::DataSensitivity;
-pub use data_structure_kind::DataStructureKind;
-use serde::{Deserialize, Serialize};
-
-use crate::path::ContentSource;
-
-/// Metadata information for content files
-///
-/// TODO: Implement comprehensive file metadata handling including:
-/// - File timestamps (created, modified, accessed)
-/// - File permissions and ownership
-/// - File size and disk usage
-/// - Extended attributes
-/// - Content type detection beyond extensions
-#[derive(Debug, Clone, PartialEq, Eq)]
-#[derive(Serialize, Deserialize)]
-pub struct ContentFileMetadata {
-    /// Content source identifier
-    pub content_source: ContentSource,
-    /// Path to the file
-    pub path: PathBuf,
-    /// Detected content kind
-    pub content_kind: Option<ContentKind>,
-    /// File size in bytes
-    pub size: Option<u64>,
-}
-
-impl ContentFileMetadata {
-    /// Create new file metadata
-    #[must_use]
-    pub fn new(content_source: ContentSource, path: PathBuf) -> Self {
-        Self {
-            content_source,
-            path,
-            content_kind: None,
-            size: None,
-        }
-    }
-
-    /// Set the content kind
-    #[must_use]
-    pub fn with_content_kind(mut self, kind: ContentKind) -> Self {
-        self.content_kind = Some(kind);
-        self
-    }
-
-    /// Set the file size
-    #[must_use]
-    pub fn with_size(mut self, size: u64) -> Self {
-        self.size = Some(size);
-        self
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_content_file_metadata() {
-        let source = ContentSource::new();
-        let path = PathBuf::from("test.txt");
-
-        let metadata = ContentFileMetadata::new(source, path.clone())
-            .with_content_kind(ContentKind::Text)
-            .with_size(1024);
-
-        assert_eq!(metadata.content_source, source);
-        assert_eq!(metadata.path, path);
-        assert_eq!(metadata.content_kind, Some(ContentKind::Text));
-        assert_eq!(metadata.size, Some(1024));
-    }
-}
diff --git a/crates/nvisy-core/src/io/content.rs b/crates/nvisy-core/src/io/content.rs
index cf0af5f..b3870f4 100644
--- a/crates/nvisy-core/src/io/content.rs
+++ b/crates/nvisy-core/src/io/content.rs
@@ -1,118 +1,151 @@
-//! Content types supported by the Nvisy system
+//! Content representation combining data with metadata
 //!
-//! This module provides the Content enum for representing different types
-//! of data content within the system.
+//! This module provides the [`Content`] struct that combines [`ContentData`]
+//! with optional [`ContentMetadata`] for complete content representation.
 
-use bytes::Bytes;
+use derive_more::{AsRef, Deref};
 use serde::{Deserialize, Serialize};
 
-/// Content types supported by the Nvisy system
+use super::ContentData;
+use crate::fs::ContentMetadata;
+use crate::path::ContentSource;
+
+/// Complete content representation with data and metadata
 ///
-/// Simplified content representation for efficient processing.
+/// This struct combines [`ContentData`] (the actual content bytes) with
+/// optional [`ContentMetadata`] (path, extension info, etc.) to provide
+/// a complete content representation.
 ///
 /// # Examples
 ///
 /// ```rust
-/// use nvisy_core::io::Content;
-/// use bytes::Bytes;
+/// use nvisy_core::io::{Content, ContentData};
+/// use nvisy_core::fs::ContentMetadata;
+/// use nvisy_core::path::ContentSource;
+///
+/// // Create content from data
+/// let data = ContentData::from("Hello, world!");
+/// let content = Content::new(data);
+///
+/// assert_eq!(content.size(), 13);
+/// assert!(content.is_text());
 ///
-/// let text_content = Content::Text("Sample text".to_string());
-/// let binary_content = Content::Binary {
-///     data: Bytes::from(vec![0x48, 0x65, 0x6C, 0x6C, 0x6F]),
-///     mime_type: "application/octet-stream".to_string(),
-/// };
+/// // Create content with metadata
+/// let source = ContentSource::new();
+/// let data = ContentData::from_text(source, "Sample text");
+/// let metadata = ContentMetadata::with_path(source, "document.txt");
+/// let content = Content::with_metadata(data, metadata);
 ///
-/// assert!(text_content.is_textual());
-/// assert!(!binary_content.is_textual());
+/// assert_eq!(content.metadata().and_then(|m| m.filename()), Some("document.txt"));
 /// ```
 #[derive(Debug, Clone, PartialEq)]
+#[derive(AsRef, Deref)]
 #[derive(Serialize, Deserialize)]
-pub enum Content {
-    /// Text content stored as UTF-8 string
-    Text(String),
-
-    /// Generic binary content with MIME type
-    Binary {
-        /// Raw binary data
-        data: Bytes,
-        /// MIME type describing the content
-        mime_type: String,
-    },
-
-    /// Empty or null content
-    Empty,
+pub struct Content {
+    /// The actual content data
+    #[deref]
+    #[as_ref]
+    data: ContentData,
+    /// Optional metadata about the content
+    metadata: Option<ContentMetadata>,
+}
+
+impl From<ContentData> for Content {
+    fn from(data: ContentData) -> Self {
+        Self::new(data)
+    }
 }
 
 impl Content {
-    /// Get the type name of this content
-    pub fn type_name(&self) -> &'static str {
-        match self {
-            Content::Text(_) => "text",
-            Content::Binary { .. } => "binary",
-            Content::Empty => "empty",
+    /// Create new content from data without metadata
+    pub fn new(data: ContentData) -> Self {
+        Self {
+            data,
+            metadata: None,
         }
     }
 
-    /// Check if this content is textual
-    pub fn is_textual(&self) -> bool {
-        matches!(self, Content::Text(_))
+    /// Create new content with metadata
+    pub fn with_metadata(data: ContentData, metadata: ContentMetadata) -> Self {
+        Self {
+            data,
+            metadata: Some(metadata),
+        }
     }
 
-    /// Check if this content is multimedia (audio, video, image)
-    pub fn is_multimedia(&self) -> bool {
-        false // Simplified - no specific multimedia types
+    /// Get the content data
+    pub fn data(&self) -> &ContentData {
+        &self.data
     }
 
-    /// Check if this content has binary data
-    pub fn has_binary_data(&self) -> bool {
-        !matches!(self, Content::Text(_) | Content::Empty)
+    /// Get the content metadata if available
+    pub fn metadata(&self) -> Option<&ContentMetadata> {
+        self.metadata.as_ref()
     }
 
-    /// Get the estimated size in bytes
-    pub fn estimated_size(&self) -> usize {
-        match self {
-            Content::Text(text) => text.len(),
-            Content::Binary { data, .. } => data.len(),
-            Content::Empty => 0,
-        }
+    /// Get the content source
+    pub fn content_source(&self) -> ContentSource {
+        self.data.content_source
     }
 
-    /// Get the format/MIME type of this content
-    pub fn format(&self) -> Option<&str> {
-        match self {
-            Content::Text(_) => Some("text/plain"),
-            Content::Binary { mime_type, .. } => Some(mime_type),
-            Content::Empty => None,
-        }
+    /// Get the size of the content in bytes
+    pub fn size(&self) -> usize {
+        self.data.size()
     }
 
-    /// Extract raw bytes from content, if available
-    pub fn as_bytes(&self) -> Option<&Bytes> {
-        match self {
-            Content::Binary { data, .. } => Some(data),
-            Content::Text(_) | Content::Empty => None,
-        }
+    /// Check if the content is empty
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
     }
 
-    /// Extract text from content, if it's textual
-    pub fn as_text(&self) -> Option<&str> {
-        match self {
-            Content::Text(text) => Some(text),
-            _ => None,
-        }
+    /// Check if the content is stored as text
+    pub fn is_text(&self) -> bool {
+        self.data.is_text()
     }
 
-    /// Create text content
-    pub fn text<S: Into<String>>(content: S) -> Self {
-        Content::Text(content.into())
+    /// Check if the content is stored as binary
+    pub fn is_binary(&self) -> bool {
+        self.data.is_binary()
     }
 
-    /// Create binary content
-    pub fn binary<S: Into<String>>(data: Bytes, mime_type: S) -> Self {
-        Content::Binary {
-            data,
-            mime_type: mime_type.into(),
-        }
+    /// Get the content as bytes
+    pub fn as_bytes(&self) -> &[u8] {
+        self.data.as_bytes()
+    }
+
+    /// Try to get the content as a string slice
+    pub fn as_str(&self) -> crate::error::Result<&str> {
+        self.data.as_str()
+    }
+
+    /// Get the file extension from metadata if available
+    pub fn file_extension(&self) -> Option<&str> {
+        self.metadata.as_ref().and_then(|m| m.file_extension())
+    }
+
+    /// Get the filename from metadata if available
+    pub fn filename(&self) -> Option<&str> {
+        self.metadata.as_ref().and_then(|m| m.filename())
+    }
+
+    /// Set the metadata
+    pub fn set_metadata(&mut self, metadata: ContentMetadata) {
+        self.metadata = Some(metadata);
+    }
+
+    /// Remove the metadata
+    pub fn clear_metadata(&mut self) {
+        self.metadata = None;
+    }
+
+    /// Consume and return the inner ContentData
+    pub fn into_data(self) -> ContentData {
+        self.data
+    }
+
+    /// Consume and return both data and metadata
+    pub fn into_parts(self) -> (ContentData, Option<ContentMetadata>) {
+        (self.data, self.metadata)
     }
 }
 
@@ -121,54 +154,92 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_content_types() {
-        let text = Content::text("Hello");
-        assert!(text.is_textual());
-        assert!(!text.is_multimedia());
-        assert!(!text.has_binary_data());
-        assert_eq!(text.type_name(), "text");
-        assert_eq!(text.format(), Some("text/plain"));
-
-        let binary_data = Bytes::from(vec![1, 2, 3, 4]);
-        let binary = Content::binary(binary_data, "application/octet-stream");
-        assert!(!binary.is_textual());
-        assert!(!binary.is_multimedia());
-        assert!(binary.has_binary_data());
-        assert_eq!(binary.type_name(), "binary");
+    fn test_content_creation() {
+        let data = ContentData::from("Hello, world!");
+        let content = Content::new(data.clone());
+
+        assert_eq!(content.size(), 13);
+        assert!(content.is_text());
+        assert!(content.metadata().is_none());
     }
 
     #[test]
-    fn test_content_size_estimation() {
-        let text = Content::text("Hello, world!");
-        assert_eq!(text.estimated_size(), 13);
+    fn test_content_with_metadata() {
+        let source = ContentSource::new();
+        let data = ContentData::from_text(source, "Test content");
+        let metadata = ContentMetadata::with_path(source, "test.txt");
+        let content = Content::with_metadata(data, metadata);
+
+        assert!(content.metadata().is_some());
+        assert_eq!(content.file_extension(), Some("txt"));
+        assert_eq!(content.filename(), Some("test.txt"));
+    }
 
-        let binary_data = Bytes::from(vec![0; 100]);
-        let binary = Content::binary(binary_data, "application/octet-stream");
-        assert_eq!(binary.estimated_size(), 100);
+    #[test]
+    fn test_content_deref() {
+        let data = ContentData::from("Hello");
+        let content = Content::new(data);
 
-        let empty = Content::Empty;
-        assert_eq!(empty.estimated_size(), 0);
+        // Test that Deref works - we can call ContentData methods directly
+        assert_eq!(content.size(), 5);
+        assert_eq!(content.as_str().unwrap(), "Hello");
     }
 
     #[test]
-    fn test_content_data_access() {
-        let text_content = Content::text("Hello");
-        assert_eq!(text_content.as_text(), Some("Hello"));
-        assert!(text_content.as_bytes().is_none());
+    fn test_content_from() {
+        let data = ContentData::from("Test");
+        let content: Content = data.into();
 
-        let binary_data = Bytes::from(vec![1, 2, 3]);
-        let binary_content = Content::binary(binary_data.clone(), "test");
-        assert_eq!(binary_content.as_bytes(), Some(&binary_data));
-        assert!(binary_content.as_text().is_none());
+        assert_eq!(content.size(), 4);
+    }
+
+    #[test]
+    fn test_metadata_operations() {
+        let data = ContentData::from("Test");
+        let mut content = Content::new(data);
+
+        assert!(content.metadata().is_none());
+
+        let source = content.content_source();
+        let metadata = ContentMetadata::with_path(source, "file.pdf");
+        content.set_metadata(metadata);
+
+        assert!(content.metadata().is_some());
+        assert_eq!(content.file_extension(), Some("pdf"));
+
+        content.clear_metadata();
+        assert!(content.metadata().is_none());
+    }
+
+    #[test]
+    fn test_into_parts() {
+        let source = ContentSource::new();
+        let data = ContentData::from_text(source, "Test");
+        let metadata = ContentMetadata::with_path(source, "test.txt");
+        let content = Content::with_metadata(data.clone(), metadata.clone());
+
+        let (recovered_data, recovered_metadata) = content.into_parts();
+        assert_eq!(recovered_data, data);
+        assert_eq!(recovered_metadata, Some(metadata));
     }
 
     #[test]
     fn test_serialization() {
-        let content = Content::text("Test content");
+        let data = ContentData::from("Test content");
+        let content = Content::new(data);
 
         let json = serde_json::to_string(&content).unwrap();
         let deserialized: Content = serde_json::from_str(&json).unwrap();
 
         assert_eq!(content, deserialized);
     }
+
+    #[test]
+    fn test_content_source() {
+        let source = ContentSource::new();
+        let data = ContentData::from_text(source, "Test");
+        let content = Content::new(data);
+
+        assert_eq!(content.content_source(), source);
+    }
 }
diff --git a/crates/nvisy-core/src/io/content_data.rs b/crates/nvisy-core/src/io/content_data.rs
index dda1542..1f08bc3 100644
--- a/crates/nvisy-core/src/io/content_data.rs
+++ b/crates/nvisy-core/src/io/content_data.rs
@@ -7,32 +7,157 @@ use std::fmt;
 use std::sync::OnceLock;
 
 use bytes::Bytes;
+use hipstr::HipStr;
 use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
 
 use crate::error::{Error, ErrorResource, ErrorType, Result};
 use crate::path::ContentSource;
 
+/// The underlying data storage type for content
+///
+/// This enum allows content to be stored as either binary data (`Bytes`)
+/// or text data (`HipStr`). Both types are cheap to clone as they use
+/// reference counting internally.
+#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum ContentBytes {
+    /// Binary data stored as `bytes::Bytes`
+    Binary(Bytes),
+    /// Text data stored as `hipstr::HipStr<'static>` (owned)
+    Text(HipStr<'static>),
+}
+
+impl ContentBytes {
+    /// Get the size of the content in bytes
+    pub fn len(&self) -> usize {
+        match self {
+            Self::Binary(bytes) => bytes.len(),
+            Self::Text(text) => text.len(),
+        }
+    }
+
+    /// Check if the content is empty
+    pub fn is_empty(&self) -> bool {
+        match self {
+            Self::Binary(bytes) => bytes.is_empty(),
+            Self::Text(text) => text.is_empty(),
+        }
+    }
+
+    /// Get the content as a byte slice
+    pub fn as_bytes(&self) -> &[u8] {
+        match self {
+            Self::Binary(bytes) => bytes,
+            Self::Text(text) => text.as_bytes(),
+        }
+    }
+
+    /// Check if this is text content
+    pub fn is_text(&self) -> bool {
+        matches!(self, Self::Text(_))
+    }
+
+    /// Check if this is binary content
+    pub fn is_binary(&self) -> bool {
+        matches!(self, Self::Binary(_))
+    }
+
+    /// Try to get the content as a string slice
+    pub fn as_str(&self) -> Option<&str> {
+        match self {
+            Self::Binary(bytes) => std::str::from_utf8(bytes).ok(),
+            Self::Text(text) => Some(text.as_str()),
+        }
+    }
+
+    /// Convert to Bytes (clones if text)
+    pub fn to_bytes(&self) -> Bytes {
+        match self {
+            Self::Binary(bytes) => bytes.clone(),
+            Self::Text(text) => Bytes::copy_from_slice(text.as_bytes()),
+        }
+    }
+
+    /// Convert to HipStr if valid UTF-8
+    pub fn to_hipstr(&self) -> Result<HipStr<'static>> {
+        match self {
+            Self::Binary(bytes) => {
+                let s = std::str::from_utf8(bytes).map_err(|e| {
+                    Error::new(format!("Invalid UTF-8: {e}"))
+                        .with_type(ErrorType::Runtime)
+                        .with_resource(ErrorResource::Core)
+                })?;
+                Ok(HipStr::from(s))
+            }
+            Self::Text(text) => Ok(text.clone()),
+        }
+    }
+}
+
+impl Default for ContentBytes {
+    fn default() -> Self {
+        Self::Binary(Bytes::new())
+    }
+}
+
+impl From<&str> for ContentBytes {
+    fn from(s: &str) -> Self {
+        Self::Text(HipStr::from(s))
+    }
+}
+
+impl From<String> for ContentBytes {
+    fn from(s: String) -> Self {
+        Self::Text(HipStr::from(s))
+    }
+}
+
+impl From<HipStr<'static>> for ContentBytes {
+    fn from(s: HipStr<'static>) -> Self {
+        Self::Text(s)
+    }
+}
+
+impl From<&[u8]> for ContentBytes {
+    fn from(bytes: &[u8]) -> Self {
+        Self::Binary(Bytes::copy_from_slice(bytes))
+    }
+}
+
+impl From<Vec<u8>> for ContentBytes {
+    fn from(vec: Vec<u8>) -> Self {
+        Self::Binary(Bytes::from(vec))
+    }
+}
+
+impl From<Bytes> for ContentBytes {
+    fn from(bytes: Bytes) -> Self {
+        Self::Binary(bytes)
+    }
+}
+
 /// Content data with metadata and computed hashes
 ///
-/// This struct is a minimal wrapper around `bytes::Bytes` that stores content data
+/// This struct wraps [`ContentBytes`] (either `Bytes` or `HipStr`) and stores content data
 /// along with metadata about its source and optional computed SHA256 hash.
-/// It's designed to be cheap to clone using the `bytes::Bytes` type.
+/// It's designed to be cheap to clone using reference-counted types.
 /// The SHA256 hash is lazily computed using `OnceLock` for lock-free access after initialization.
 #[derive(Debug)]
 #[derive(Serialize, Deserialize)]
 pub struct ContentData {
     /// Unique identifier for the content source
     pub content_source: ContentSource,
-    /// The actual content data
-    pub content_data: Bytes,
+    /// The actual content data (binary or text)
+    data: ContentBytes,
     /// Lazily computed SHA256 hash of the content
     #[serde(skip)]
-    content_sha256: OnceLock<Bytes>,
+    sha256_cache: OnceLock<Bytes>,
 }
 
 impl ContentData {
-    /// Create new content data
+    /// Create new content data from bytes
     ///
     /// # Example
     ///
@@ -46,17 +171,47 @@ impl ContentData {
     ///
     /// assert_eq!(content.size(), 13);
     /// ```
-    pub fn new(content_source: ContentSource, content_data: Bytes) -> Self {
+    pub fn new(content_source: ContentSource, data: Bytes) -> Self {
         Self {
             content_source,
-            content_data,
-            content_sha256: OnceLock::new(),
+            data: ContentBytes::Binary(data),
+            sha256_cache: OnceLock::new(),
+        }
+    }
+
+    /// Create new content data from text
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use nvisy_core::{io::ContentData, path::ContentSource};
+    ///
+    /// let source = ContentSource::new();
+    /// let content = ContentData::from_text(source, "Hello, world!");
+    ///
+    /// assert!(content.is_text());
+    /// assert_eq!(content.as_str().unwrap(), "Hello, world!");
+    /// ```
+    pub fn from_text(content_source: ContentSource, text: impl Into<HipStr<'static>>) -> Self {
+        Self {
+            content_source,
+            data: ContentBytes::Text(text.into()),
+            sha256_cache: OnceLock::new(),
+        }
+    }
+
+    /// Create content data with explicit content bytes type
+    pub fn with_content_bytes(content_source: ContentSource, data: ContentBytes) -> Self {
+        Self {
+            content_source,
+            data,
+            sha256_cache: OnceLock::new(),
         }
     }
 
     /// Get the size of the content in bytes
     pub fn size(&self) -> usize {
-        self.content_data.len()
+        self.data.len()
     }
 
     /// Get pretty formatted size string
@@ -73,19 +228,45 @@ impl ContentData {
 
     /// Get the content data as bytes slice
     pub fn as_bytes(&self) -> &[u8] {
-        &self.content_data
+        self.data.as_bytes()
+    }
+
+    /// Get the underlying content bytes
+    pub fn content_bytes(&self) -> &ContentBytes {
+        &self.data
+    }
+
+    /// Convert the content data to Bytes
+    pub fn to_bytes(&self) -> Bytes {
+        self.data.to_bytes()
     }
 
-    /// Get the content data as bytes
+    /// Consume and convert into Bytes
     pub fn into_bytes(self) -> Bytes {
-        self.content_data
+        match self.data {
+            ContentBytes::Binary(bytes) => bytes,
+            ContentBytes::Text(text) => Bytes::copy_from_slice(text.as_bytes()),
+        }
+    }
+
+    /// Check if the content is stored as text
+    pub fn is_text(&self) -> bool {
+        self.data.is_text()
     }
 
-    /// Check if the content is likely text (basic heuristic)
+    /// Check if the content is stored as binary
+    pub fn is_binary(&self) -> bool {
+        self.data.is_binary()
+    }
+
+    /// Check if the content is likely text (basic heuristic for binary data)
     pub fn is_likely_text(&self) -> bool {
-        self.content_data
-            .iter()
-            .all(|&b| b.is_ascii_graphic() || b.is_ascii_whitespace())
+        match &self.data {
+            ContentBytes::Text(_) => true,
+            ContentBytes::Binary(bytes) => bytes
+                .iter()
+                .all(|&b| b.is_ascii_graphic() || b.is_ascii_whitespace()),
+        }
     }
 
     /// Try to convert the content data to a UTF-8 string
@@ -94,13 +275,14 @@ impl ContentData {
     ///
     /// Returns an error if the content data contains invalid UTF-8 sequences.
     pub fn as_string(&self) -> Result<String> {
-        String::from_utf8(self.content_data.to_vec()).map_err(|e| {
-            Error::new(
-                ErrorType::Runtime,
-                ErrorResource::Core,
-                format!("Invalid UTF-8: {e}"),
-            )
-        })
+        match &self.data {
+            ContentBytes::Text(text) => Ok(text.to_string()),
+            ContentBytes::Binary(bytes) => String::from_utf8(bytes.to_vec()).map_err(|e| {
+                Error::new(format!("Invalid UTF-8: {e}"))
+                    .with_type(ErrorType::Runtime)
+                    .with_resource(ErrorResource::Core)
+            }),
+        }
     }
 
     /// Try to convert the content data to a UTF-8 string slice
@@ -109,25 +291,26 @@ impl ContentData {
     ///
     /// Returns an error if the content data contains invalid UTF-8 sequences.
     pub fn as_str(&self) -> Result<&str> {
-        std::str::from_utf8(&self.content_data).map_err(|e| {
-            Error::new(
-                ErrorType::Runtime,
-                ErrorResource::Core,
-                format!("Invalid UTF-8: {e}"),
-            )
-        })
+        match &self.data {
+            ContentBytes::Text(text) => Ok(text.as_str()),
+            ContentBytes::Binary(bytes) => std::str::from_utf8(bytes).map_err(|e| {
+                Error::new(format!("Invalid UTF-8: {e}"))
+                    .with_type(ErrorType::Runtime)
+                    .with_resource(ErrorResource::Core)
+            }),
+        }
     }
 
     /// Compute SHA256 hash of the content
     fn compute_sha256_internal(&self) -> Bytes {
         let mut hasher = Sha256::new();
-        hasher.update(&self.content_data);
+        hasher.update(self.data.as_bytes());
         Bytes::from(hasher.finalize().to_vec())
     }
 
     /// Get the SHA256 hash, computing it if not already done
     pub fn sha256(&self) -> &Bytes {
-        self.content_sha256
+        self.sha256_cache
             .get_or_init(|| self.compute_sha256_internal())
     }
 
@@ -148,15 +331,13 @@ impl ContentData {
         if actual_hash.as_ref() == expected {
             Ok(())
         } else {
-            Err(Error::new(
-                ErrorType::Runtime,
-                ErrorResource::Core,
-                format!(
-                    "Hash mismatch: expected {}, got {}",
-                    hex::encode(expected),
-                    hex::encode(actual_hash)
-                ),
+            Err(Error::new(format!(
+                "Hash mismatch: expected {}, got {}",
+                hex::encode(expected),
+                hex::encode(actual_hash)
             ))
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Core))
         }
     }
 
@@ -166,30 +347,29 @@ impl ContentData {
     ///
     /// Returns an error if the end index is beyond the content length or if start is greater than end.
     pub fn slice(&self, start: usize, end: usize) -> Result<Bytes> {
-        if end > self.content_data.len() {
-            return Err(Error::new(
-                ErrorType::Runtime,
-                ErrorResource::Core,
-                format!(
-                    "Slice end {} exceeds content length {}",
-                    end,
-                    self.content_data.len()
-                ),
-            ));
+        let bytes = self.data.as_bytes();
+        if end > bytes.len() {
+            return Err(Error::new(format!(
+                "Slice end {} exceeds content length {}",
+                end,
+                bytes.len()
+            ))
+            .with_type(ErrorType::Runtime)
+            .with_resource(ErrorResource::Core));
         }
         if start > end {
-            return Err(Error::new(
-                ErrorType::Runtime,
-                ErrorResource::Core,
-                format!("Slice start {start} is greater than end {end}"),
-            ));
+            return Err(
+                Error::new(format!("Slice start {start} is greater than end {end}"))
+                    .with_type(ErrorType::Runtime)
+                    .with_resource(ErrorResource::Core),
+            );
         }
-        Ok(self.content_data.slice(start..end))
+        Ok(Bytes::copy_from_slice(&bytes[start..end]))
     }
 
     /// Check if the content is empty
     pub fn is_empty(&self) -> bool {
-        self.content_data.is_empty()
+        self.data.is_empty()
     }
 }
 
@@ -198,14 +378,14 @@ impl Clone for ContentData {
     fn clone(&self) -> Self {
         let new_lock = OnceLock::new();
         // Copy the computed hash if available
-        if let Some(hash) = self.content_sha256.get() {
+        if let Some(hash) = self.sha256_cache.get() {
             let _ = new_lock.set(hash.clone());
         }
 
         Self {
             content_source: self.content_source,
-            content_data: self.content_data.clone(),
-            content_sha256: new_lock,
+            data: self.data.clone(),
+            sha256_cache: new_lock,
         }
     }
 }
@@ -213,7 +393,7 @@ impl Clone for ContentData {
 // Manual implementation of PartialEq
 impl PartialEq for ContentData {
     fn eq(&self, other: &Self) -> bool {
-        self.content_source == other.content_source && self.content_data == other.content_data
+        self.content_source == other.content_source && self.data == other.data
     }
 }
 
@@ -223,14 +403,14 @@ impl Eq for ContentData {}
 impl From<&str> for ContentData {
     fn from(s: &str) -> Self {
         let source = ContentSource::new();
-        Self::new(source, Bytes::from(s.to_string()))
+        Self::from_text(source, s)
     }
 }
 
 impl From<String> for ContentData {
     fn from(s: String) -> Self {
         let source = ContentSource::new();
-        Self::new(source, Bytes::from(s))
+        Self::from_text(source, s)
     }
 }
 
@@ -255,6 +435,13 @@ impl From<Bytes> for ContentData {
     }
 }
 
+impl From<HipStr<'static>> for ContentData {
+    fn from(text: HipStr<'static>) -> Self {
+        let source = ContentSource::new();
+        Self::from_text(source, text)
+    }
+}
+
 impl fmt::Display for ContentData {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         if let Ok(text) = self.as_str() {
@@ -278,7 +465,34 @@ mod tests {
         assert_eq!(content.content_source, source);
         assert_eq!(content.size(), 13);
         // Check that hash is not computed yet
-        assert!(content.content_sha256.get().is_none());
+        assert!(content.sha256_cache.get().is_none());
+    }
+
+    #[test]
+    fn test_content_data_from_text() {
+        let source = ContentSource::new();
+        let content = ContentData::from_text(source, "Hello, world!");
+
+        assert!(content.is_text());
+        assert!(!content.is_binary());
+        assert_eq!(content.as_str().unwrap(), "Hello, world!");
+    }
+
+    #[test]
+    fn test_content_bytes_text() {
+        let text = ContentBytes::from("Hello");
+        assert!(text.is_text());
+        assert!(!text.is_binary());
+        assert_eq!(text.as_str(), Some("Hello"));
+        assert_eq!(text.len(), 5);
+    }
+
+    #[test]
+    fn test_content_bytes_binary() {
+        let binary = ContentBytes::from(vec![0xFF, 0xFE]);
+        assert!(binary.is_binary());
+        assert!(!binary.is_text());
+        assert_eq!(binary.len(), 2);
     }
 
     #[test]
@@ -295,7 +509,7 @@ mod tests {
         let content = ContentData::from("Hello, world!");
         let hash = content.sha256();
 
-        assert!(content.content_sha256.get().is_some());
+        assert!(content.sha256_cache.get().is_some());
         assert_eq!(hash.len(), 32); // SHA256 is 32 bytes
 
         // Test getting cached hash
@@ -364,6 +578,10 @@ mod tests {
         assert_eq!(from_bytes.as_str().unwrap(), "test");
         assert_eq!(from_vec.as_str().unwrap(), "test");
         assert_eq!(from_bytes_type.as_str().unwrap(), "test");
+
+        // Text types should be stored as text
+        assert!(from_str.is_text());
+        assert!(from_string.is_text());
     }
 
     #[test]
@@ -384,21 +602,18 @@ mod tests {
         let cloned = original.clone();
 
         // Both should have the hash computed
-        assert!(original.content_sha256.get().is_some());
-        assert!(cloned.content_sha256.get().is_some());
+        assert!(original.sha256_cache.get().is_some());
+        assert!(cloned.sha256_cache.get().is_some());
         assert_eq!(original.sha256(), cloned.sha256());
     }
 
     #[test]
-    fn test_cloning_shares_bytes() {
+    fn test_cloning_is_cheap() {
         let original = ContentData::from("Hello, world!");
         let cloned = original.clone();
 
         // They should be equal
         assert_eq!(original, cloned);
-
-        // The underlying bytes should share the same memory
-        assert_eq!(original.content_data.as_ptr(), cloned.content_data.as_ptr());
     }
 
     #[test]
@@ -414,4 +629,23 @@ mod tests {
         assert!(content.is_empty());
         assert_eq!(content.size(), 0);
     }
+
+    #[test]
+    fn test_to_bytes() {
+        let text_content = ContentData::from_text(ContentSource::new(), "Hello");
+        let bytes = text_content.to_bytes();
+        assert_eq!(bytes.as_ref(), b"Hello");
+
+        let binary_content = ContentData::new(ContentSource::new(), Bytes::from("World"));
+        let bytes = binary_content.to_bytes();
+        assert_eq!(bytes.as_ref(), b"World");
+    }
+
+    #[test]
+    fn test_from_hipstr() {
+        let hipstr = HipStr::from("Hello from HipStr");
+        let content = ContentData::from(hipstr);
+        assert!(content.is_text());
+        assert_eq!(content.as_str().unwrap(), "Hello from HipStr");
+    }
 }
diff --git a/crates/nvisy-core/src/io/content_read.rs b/crates/nvisy-core/src/io/content_read.rs
index 3f3b61e..f889aea 100644
--- a/crates/nvisy-core/src/io/content_read.rs
+++ b/crates/nvisy-core/src/io/content_read.rs
@@ -44,7 +44,7 @@ pub trait AsyncContentRead: AsyncRead + Unpin + Send {
             let mut buffer = Vec::new();
             self.read_to_end(&mut buffer).await?;
 
-            let content_data = ContentData::new(ContentSource::new(), buffer.into());
+            let content_data = ContentData::new(ContentSource::new(), Bytes::from(buffer));
             Ok(content_data)
         }
     }
@@ -79,7 +79,7 @@ pub trait AsyncContentRead: AsyncRead + Unpin + Send {
             let mut buffer = Vec::new();
             self.read_to_end(&mut buffer).await?;
 
-            let content_data = ContentData::new(source, buffer.into());
+            let content_data = ContentData::new(source, Bytes::from(buffer));
             Ok(content_data)
         }
     }
@@ -137,7 +137,7 @@ pub trait AsyncContentRead: AsyncRead + Unpin + Send {
                 total_read += bytes_read;
             }
 
-            let content_data = ContentData::new(ContentSource::new(), buffer.into());
+            let content_data = ContentData::new(ContentSource::new(), Bytes::from(buffer));
             Ok(content_data)
         }
     }
@@ -225,7 +225,7 @@ pub trait AsyncContentRead: AsyncRead + Unpin + Send {
             }
 
             // Convert to ContentData after verification
-            let content_data = ContentData::new(ContentSource::new(), buffer.into());
+            let content_data = ContentData::new(ContentSource::new(), Bytes::from(buffer));
             Ok(content_data)
         }
     }
diff --git a/crates/nvisy-core/src/io/data_reference.rs b/crates/nvisy-core/src/io/data_reference.rs
index 7dc51df..f97eb1a 100644
--- a/crates/nvisy-core/src/io/data_reference.rs
+++ b/crates/nvisy-core/src/io/data_reference.rs
@@ -17,9 +17,9 @@ use crate::path::ContentSource;
 /// # Examples
 ///
 /// ```rust
-/// use nvisy_core::io::{DataReference, Content};
+/// use nvisy_core::io::{DataReference, Content, ContentData};
 ///
-/// let content = Content::Text("Hello, world!".to_string());
+/// let content = Content::new(ContentData::from("Hello, world!"));
 /// let data_ref = DataReference::new(content)
 ///     .with_mapping_id("line-42");
 ///
@@ -38,7 +38,7 @@ pub struct DataReference {
     mapping_id: Option<String>,
 
     /// The actual content data
-    content_type: Content,
+    content: Content,
 }
 
 impl DataReference {
@@ -47,7 +47,7 @@ impl DataReference {
         Self {
             source: ContentSource::new(),
             mapping_id: None,
-            content_type: content,
+            content,
         }
     }
 
@@ -56,7 +56,7 @@ impl DataReference {
         Self {
             source,
             mapping_id: None,
-            content_type: content,
+            content,
         }
     }
 
@@ -79,39 +79,41 @@ impl DataReference {
 
     /// Get a reference to the content
     pub fn content(&self) -> &Content {
-        &self.content_type
+        &self.content
     }
 
-    /// Get the content type name
-    pub fn content_type_name(&self) -> &'static str {
-        self.content_type.type_name()
+    /// Check if the content is text-based
+    pub fn is_text(&self) -> bool {
+        self.content.is_text()
     }
 
-    /// Get the estimated size of the content in bytes
-    pub fn estimated_size(&self) -> usize {
-        self.content_type.estimated_size()
+    /// Get the size of the content in bytes
+    pub fn size(&self) -> usize {
+        self.content.size()
     }
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::io::ContentData;
+
     use super::*;
 
     #[test]
     fn test_data_reference_creation() {
-        let content = Content::text("Hello, world!");
+        let content = Content::new(ContentData::from("Hello, world!"));
         let data_ref = DataReference::new(content);
 
-        assert_eq!(data_ref.content_type_name(), "text");
+        assert!(data_ref.is_text());
         assert!(data_ref.mapping_id().is_none());
-        assert_eq!(data_ref.estimated_size(), 13);
+        assert_eq!(data_ref.size(), 13);
         // Verify UUIDv7 is used
         assert_eq!(data_ref.source().as_uuid().get_version_num(), 7);
     }
 
     #[test]
     fn test_data_reference_with_mapping() {
-        let content = Content::text("Test content");
+        let content = Content::new(ContentData::from("Test content"));
         let data_ref = DataReference::new(content).with_mapping_id("line-42");
 
         assert_eq!(data_ref.mapping_id(), Some("line-42"));
@@ -120,7 +122,7 @@ mod tests {
     #[test]
     fn test_data_reference_with_source() {
         let source = ContentSource::new();
-        let content = Content::text("Test content");
+        let content = Content::new(ContentData::from("Test content"));
         let data_ref = DataReference::with_source(source, content);
 
         assert_eq!(data_ref.source(), source);
@@ -128,7 +130,7 @@ mod tests {
 
     #[test]
     fn test_serialization() {
-        let content = Content::text("Test content");
+        let content = Content::new(ContentData::from("Test content"));
         let data_ref = DataReference::new(content).with_mapping_id("test-mapping");
 
         let json = serde_json::to_string(&data_ref).unwrap();
diff --git a/crates/nvisy-core/src/io/mod.rs b/crates/nvisy-core/src/io/mod.rs
index e0f3c44..aa33482 100644
--- a/crates/nvisy-core/src/io/mod.rs
+++ b/crates/nvisy-core/src/io/mod.rs
@@ -20,7 +20,7 @@ mod data_reference;
 
 // Re-export core types and traits
 pub use content::Content;
-pub use content_data::ContentData;
+pub use content_data::{ContentBytes, ContentData};
 pub use content_read::AsyncContentRead;
 pub use content_write::AsyncContentWrite;
 pub use data_reference::DataReference;
diff --git a/crates/nvisy-core/src/lib.rs b/crates/nvisy-core/src/lib.rs
index b166bd9..4d0dd3e 100644
--- a/crates/nvisy-core/src/lib.rs
+++ b/crates/nvisy-core/src/lib.rs
@@ -3,24 +3,6 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-//! # Nvisy Core
-//!
-//! Core types and enums for data categorization in the Nvisy content processing system.
-//!
-//! This crate provides the fundamental data classification system used throughout
-//! the Nvisy ecosystem to identify and categorize different types of sensitive data,
-//! as well as structured error handling.
-//!
-//! ## Core Types
-//!
-//! - [`fs::DataSensitivity`]: Sensitivity levels for risk assessment
-//! - [`io::Content`]: Content types and data structures
-//! - [`io::DataReference`]: Data references with source tracking
-//! - [`fs::DataStructureKind`]: Classification of data structure types
-//! - [`fs::ContentFile`]: File operations with content tracking
-//! - [`io::ContentData`]: Container for content data with metadata
-//! - [`error::Error`]: Structured error handling with source classification
-
 pub mod error;
 pub mod fs;
 pub mod io;
diff --git a/crates/nvisy-core/src/prelude.rs b/crates/nvisy-core/src/prelude.rs
index f39f7e6..b369d58 100644
--- a/crates/nvisy-core/src/prelude.rs
+++ b/crates/nvisy-core/src/prelude.rs
@@ -6,9 +6,7 @@
 // Error handling
 pub use crate::error::{BoxError, Error, ErrorResource, ErrorType, Result};
 // File system types
-pub use crate::fs::{
-    ContentFile, ContentKind, ContentMetadata, DataSensitivity, DataStructureKind,
-};
+pub use crate::fs::{ContentFile, ContentKind, ContentMetadata, DataSensitivity};
 // I/O types
 pub use crate::io::{AsyncContentRead, AsyncContentWrite, Content, ContentData, DataReference};
 // Path types
diff --git a/crates/nvisy-document/README.md b/crates/nvisy-document/README.md
index 0a793b6..9b1d780 100644
--- a/crates/nvisy-document/README.md
+++ b/crates/nvisy-document/README.md
@@ -1,18 +1,42 @@
 # nvisy-document
 
-Document manipulation traits and types for the Nvisy system.
+Document manipulation library for VLM-driven editing workflows.
 
-This crate provides a unified interface for working with different document
-formats, enabling semantic editing operations driven by VLM (Vision Language
-Model) understanding.
+This crate provides a format-agnostic abstraction for document editing,
+designed to support Vision Language Model (VLM) function calls for
+operations like redaction, text replacement, splitting, and merging.
+
+## Core Concepts
+
+- **[`DocumentFormat`]** - A format handler that can load and create documents.
+  Implementations know about format capabilities and how to parse/serialize
+  documents.
+
+- **[`Document`]** - A loaded document instance for reading document content.
+
+- **[`EditableDocument`]** - Extension trait for documents that support editing.
+
+- **[`Region`]** - Semantic units within a document (text blocks, images,
+  tables) with stable IDs that persist across edit sessions.
+
+- **[`EditOperation`]** - Edit commands that target regions by ID,
+  supporting undo/redo and batch operations.
+
+## Extension Traits
+
+Document implementations can optionally implement these extension traits:
+
+- [`Conversion`] - Convert documents to other formats
+- [`Metadata`] - Extract and modify document metadata
+- [`ThumbnailGenerator`] - Generate thumbnail images
 
 ## Features
 
-- **Document Format Trait**: Common interface for PDF, DOCX, and other formats
-- **Format Registry**: Register and look up formats by MIME type or extension
-- **Region-based Editing**: Reference and modify document regions with stable IDs
-- **Edit Operations**: Redaction, text replacement, structural changes
-- **Streaming Support**: Handle large documents with pagination
+- **Document Format Trait** - Common interface for PDF, DOCX, and other formats
+- **Format Registry** - Register and look up formats by MIME type or extension
+- **Region-based Editing** - Reference and modify document regions with stable IDs
+- **Edit Operations** - Redaction, text replacement, structural changes
+- **Streaming Support** - Handle large documents with pagination
 
 ## Architecture
 
@@ -31,7 +55,7 @@ Model) understanding.
             ┌─────────────────┼─────────────────┐
             ▼                 ▼                 ▼
       ┌──────────┐      ┌──────────┐      ┌──────────┐
-      │nvisy-pdf │      │nvisy-docx│      │nvisy-txt │
+      │nvisy-pdf │      │nvisy-docx│      │nvisy-text│
       └──────────┘      └──────────┘      └──────────┘
 ```
 
diff --git a/crates/nvisy-document/src/lib.rs b/crates/nvisy-document/src/lib.rs
index f85af03..34435f8 100644
--- a/crates/nvisy-document/src/lib.rs
+++ b/crates/nvisy-document/src/lib.rs
@@ -2,39 +2,6 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-//! # nvisy-document
-//!
-//! Document manipulation library for VLM-driven editing workflows.
-//!
-//! This crate provides a format-agnostic abstraction for document editing,
-//! designed to support Vision Language Model (VLM) function calls for
-//! operations like redaction, text replacement, splitting, and merging.
-//!
-//! ## Core Concepts
-//!
-//! - **[`DocumentFormat`]**: A format handler (like a class) that can load
-//!   and create documents. Implementations know about format capabilities
-//!   and how to parse/serialize documents.
-//!
-//! - **[`Document`]**: A loaded document instance for reading document content.
-//!   Think of this as an instance of a DocumentFormat.
-//!
-//! - **[`EditableDocument`]**: Extension trait for documents that support editing.
-//!
-//! - **[`Region`]**: Semantic units within a document (text blocks, images,
-//!   tables) with stable IDs that persist across edit sessions.
-//!
-//! - **[`EditOperation`]**: Edit commands that target regions by ID,
-//!   supporting undo/redo and batch operations.
-//!
-//! ## Extension Traits
-//!
-//! Document implementations can optionally implement these extension traits:
-//!
-//! - [`Conversion`]: Convert documents to other formats
-//! - [`Metadata`]: Extract and modify document metadata
-//! - [`ThumbnailGenerator`]: Generate thumbnail images
-
 // Core modules
 pub mod error;
 pub mod format;
@@ -43,6 +10,8 @@ pub mod operation;
 // Extension trait modules
 pub mod conversion;
 pub mod metadata;
+pub mod table;
+pub mod text;
 pub mod thumbnail;
 
 // Error re-exports
@@ -73,5 +42,9 @@ pub use operation::{
     MergeOrder, MetadataOperation, PageOperation, RedactStyle, SplitBoundary, StructuralOperation,
     TextStyle,
 };
+// Table re-exports
+pub use table::{CellDataType, NormalizedCell, NormalizedRow, NormalizedTable, TableExtractor};
+// Text re-exports
+pub use text::{ExtractedText, TextExtractor};
 // Thumbnail re-exports
 pub use thumbnail::{ImageFormat, Thumbnail, ThumbnailGenerator, ThumbnailOptions, ThumbnailSize};
diff --git a/crates/nvisy-document/src/table/mod.rs b/crates/nvisy-document/src/table/mod.rs
new file mode 100644
index 0000000..44b24f9
--- /dev/null
+++ b/crates/nvisy-document/src/table/mod.rs
@@ -0,0 +1,86 @@
+//! Table extraction and normalization traits and types.
+//!
+//! This module defines the [`TableExtractor`] trait for extracting and
+//! normalizing tables from documents.
+
+mod types;
+
+use async_trait::async_trait;
+pub use types::{CellDataType, NormalizedCell, NormalizedRow, NormalizedTable};
+
+use crate::error::Result;
+use crate::format::{Document, Region, RegionKind};
+
+/// Trait for document table extraction and normalization.
+///
+/// This trait is implemented by [`Document`] types that contain tabular data.
+/// Tables are extracted as [`NormalizedTable`] structures with consistent
+/// representation across formats.
+///
+/// # Example
+///
+/// ```ignore
+/// use nvisy_document::{Document, TableExtractor, NormalizedTable};
+///
+/// async fn process_tables<D>(doc: &D) -> Result<Vec<NormalizedTable>>
+/// where
+///     D: TableExtractor,
+/// {
+///     let tables = doc.extract_tables().await?;
+///
+///     for table in &tables {
+///         println!("Table with {} rows, {} columns",
+///             table.row_count(),
+///             table.column_count
+///         );
+///
+///         if table.has_header {
+///             println!("Headers: {:?}", table.column_headers());
+///         }
+///     }
+///
+///     Ok(tables)
+/// }
+/// ```
+#[async_trait]
+pub trait TableExtractor: Document {
+    /// Extracts and normalizes all tables from the document.
+    ///
+    /// Tables are identified from the document's regions and converted
+    /// to a normalized format with:
+    /// - Consistent cell structure
+    /// - Resolved merged cells
+    /// - Inferred column types
+    /// - Detected headers
+    async fn extract_tables(&self) -> Result<Vec<NormalizedTable>>;
+
+    /// Extracts and normalizes a specific table by its region ID.
+    ///
+    /// # Arguments
+    ///
+    /// * `region_id` - The ID of the table region
+    ///
+    /// # Returns
+    ///
+    /// The normalized table, or `None` if the region is not a table.
+    async fn extract_table(
+        &self,
+        region_id: crate::format::RegionId,
+    ) -> Result<Option<NormalizedTable>>;
+
+    /// Returns the table regions in the document without normalizing them.
+    ///
+    /// This is a quick way to check how many tables exist without
+    /// performing full extraction.
+    fn table_regions(&self) -> Vec<&Region> {
+        self.regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Table)
+            .collect()
+    }
+
+    /// Returns the number of tables in the document.
+    fn table_count(&self) -> usize {
+        self.table_regions().len()
+    }
+}
diff --git a/crates/nvisy-document/src/table/types.rs b/crates/nvisy-document/src/table/types.rs
new file mode 100644
index 0000000..f5d4594
--- /dev/null
+++ b/crates/nvisy-document/src/table/types.rs
@@ -0,0 +1,446 @@
+//! Table extraction and normalization types.
+
+use serde::{Deserialize, Serialize};
+
+use crate::format::RegionId;
+
+/// A normalized table structure.
+///
+/// Tables from different formats (PDF, DOCX, XLSX, HTML) are converted
+/// to this common representation for consistent processing.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NormalizedTable {
+    /// Reference to the table's region in the document.
+    pub id: RegionId,
+
+    /// Table rows.
+    pub rows: Vec<NormalizedRow>,
+
+    /// Total number of columns.
+    pub column_count: usize,
+
+    /// Whether the table has a header row.
+    pub has_header: bool,
+
+    /// Number of header rows (0 if no header).
+    pub header_row_count: usize,
+
+    /// Optional table caption or title.
+    pub caption: Option<String>,
+}
+
+impl NormalizedTable {
+    /// Creates a new normalized table.
+    #[must_use]
+    pub fn new(id: RegionId) -> Self {
+        Self {
+            id,
+            rows: Vec::new(),
+            column_count: 0,
+            has_header: false,
+            header_row_count: 0,
+            caption: None,
+        }
+    }
+
+    /// Sets the column count.
+    #[must_use]
+    pub fn with_column_count(mut self, count: usize) -> Self {
+        self.column_count = count;
+        self
+    }
+
+    /// Sets the header row count.
+    #[must_use]
+    pub fn with_header_rows(mut self, count: usize) -> Self {
+        self.header_row_count = count;
+        self.has_header = count > 0;
+        self
+    }
+
+    /// Sets the caption.
+    #[must_use]
+    pub fn with_caption(mut self, caption: impl Into<String>) -> Self {
+        self.caption = Some(caption.into());
+        self
+    }
+
+    /// Adds a row to the table.
+    pub fn add_row(&mut self, row: NormalizedRow) {
+        self.rows.push(row);
+    }
+
+    /// Returns the number of rows.
+    #[must_use]
+    pub fn row_count(&self) -> usize {
+        self.rows.len()
+    }
+
+    /// Returns the header rows.
+    #[must_use]
+    pub fn header_rows(&self) -> &[NormalizedRow] {
+        &self.rows[..self.header_row_count.min(self.rows.len())]
+    }
+
+    /// Returns the data rows (non-header).
+    #[must_use]
+    pub fn data_rows(&self) -> &[NormalizedRow] {
+        let start = self.header_row_count.min(self.rows.len());
+        &self.rows[start..]
+    }
+
+    /// Returns a cell at the given position.
+    #[must_use]
+    pub fn cell(&self, row: usize, col: usize) -> Option<&NormalizedCell> {
+        self.rows.get(row).and_then(|r| r.cells.get(col))
+    }
+
+    /// Returns the column headers as strings.
+    #[must_use]
+    pub fn column_headers(&self) -> Vec<&str> {
+        if !self.has_header || self.rows.is_empty() {
+            return Vec::new();
+        }
+        self.rows[0].cells.iter().map(|c| c.text.as_str()).collect()
+    }
+}
+
+/// A row within a normalized table.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NormalizedRow {
+    /// Cells in this row.
+    pub cells: Vec<NormalizedCell>,
+
+    /// Whether this is a header row.
+    pub is_header: bool,
+}
+
+impl NormalizedRow {
+    /// Creates a new row.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            cells: Vec::new(),
+            is_header: false,
+        }
+    }
+
+    /// Creates a new header row.
+    #[must_use]
+    pub fn header() -> Self {
+        Self {
+            cells: Vec::new(),
+            is_header: true,
+        }
+    }
+
+    /// Adds a cell to the row.
+    pub fn add_cell(&mut self, cell: NormalizedCell) {
+        self.cells.push(cell);
+    }
+
+    /// Adds a text cell to the row.
+    pub fn add_text(&mut self, text: impl Into<String>) {
+        self.cells.push(NormalizedCell::text(text));
+    }
+
+    /// Returns the number of cells.
+    #[must_use]
+    pub fn cell_count(&self) -> usize {
+        self.cells.len()
+    }
+}
+
+impl Default for NormalizedRow {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// A cell within a normalized table.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct NormalizedCell {
+    /// Text content of the cell.
+    pub text: String,
+
+    /// Number of columns this cell spans.
+    pub col_span: usize,
+
+    /// Number of rows this cell spans.
+    pub row_span: usize,
+
+    /// Inferred data type of the cell content.
+    pub data_type: CellDataType,
+
+    /// Whether this cell is a continuation of a merged cell.
+    ///
+    /// True for cells that are covered by a cell with col_span > 1 or row_span > 1.
+    pub is_merged_continuation: bool,
+}
+
+impl NormalizedCell {
+    /// Creates a new cell with text content.
+    #[must_use]
+    pub fn new(text: impl Into<String>) -> Self {
+        let text = text.into();
+        let data_type = CellDataType::infer(&text);
+        Self {
+            text,
+            col_span: 1,
+            row_span: 1,
+            data_type,
+            is_merged_continuation: false,
+        }
+    }
+
+    /// Creates a text cell.
+    #[must_use]
+    pub fn text(text: impl Into<String>) -> Self {
+        Self::new(text)
+    }
+
+    /// Creates an empty cell.
+    #[must_use]
+    pub fn empty() -> Self {
+        Self {
+            text: String::new(),
+            col_span: 1,
+            row_span: 1,
+            data_type: CellDataType::Empty,
+            is_merged_continuation: false,
+        }
+    }
+
+    /// Creates a merged continuation cell.
+    #[must_use]
+    pub fn merged_continuation() -> Self {
+        Self {
+            text: String::new(),
+            col_span: 1,
+            row_span: 1,
+            data_type: CellDataType::Empty,
+            is_merged_continuation: true,
+        }
+    }
+
+    /// Sets the column span.
+    #[must_use]
+    pub fn with_col_span(mut self, span: usize) -> Self {
+        self.col_span = span;
+        self
+    }
+
+    /// Sets the row span.
+    #[must_use]
+    pub fn with_row_span(mut self, span: usize) -> Self {
+        self.row_span = span;
+        self
+    }
+
+    /// Sets the data type explicitly.
+    #[must_use]
+    pub fn with_data_type(mut self, data_type: CellDataType) -> Self {
+        self.data_type = data_type;
+        self
+    }
+
+    /// Returns whether the cell is empty.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.text.is_empty() || self.data_type == CellDataType::Empty
+    }
+
+    /// Returns whether the cell spans multiple columns or rows.
+    #[must_use]
+    pub fn is_merged(&self) -> bool {
+        self.col_span > 1 || self.row_span > 1
+    }
+}
+
+/// Inferred data type of cell content.
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    PartialEq,
+    Eq,
+    Hash,
+    Default,
+    Serialize,
+    Deserialize
+)]
+#[serde(rename_all = "snake_case")]
+pub enum CellDataType {
+    /// Plain text content.
+    #[default]
+    Text,
+
+    /// Numeric value (integer or float).
+    Number,
+
+    /// Date value.
+    Date,
+
+    /// Date and time value.
+    DateTime,
+
+    /// Boolean value.
+    Boolean,
+
+    /// Formula (spreadsheet).
+    Formula,
+
+    /// Empty cell.
+    Empty,
+}
+
+impl CellDataType {
+    /// Infers the data type from a string value.
+    #[must_use]
+    pub fn infer(text: &str) -> Self {
+        let trimmed = text.trim();
+
+        if trimmed.is_empty() {
+            return Self::Empty;
+        }
+
+        // Check for boolean
+        match trimmed.to_lowercase().as_str() {
+            "true" | "false" | "yes" | "no" => return Self::Boolean,
+            _ => {}
+        }
+
+        // Check for formula (starts with =)
+        if trimmed.starts_with('=') {
+            return Self::Formula;
+        }
+
+        // Check for number
+        if Self::is_number(trimmed) {
+            return Self::Number;
+        }
+
+        // Check for date patterns
+        if Self::is_date(trimmed) {
+            return Self::Date;
+        }
+
+        if Self::is_datetime(trimmed) {
+            return Self::DateTime;
+        }
+
+        Self::Text
+    }
+
+    fn is_number(s: &str) -> bool {
+        // Remove common number formatting
+        let cleaned: String = s
+            .chars()
+            .filter(|c| *c != ',' && *c != ' ' && *c != '$' && *c != '€' && *c != '%')
+            .collect();
+
+        cleaned.parse::<f64>().is_ok()
+    }
+
+    fn is_date(s: &str) -> bool {
+        // Simple date pattern detection without regex
+        // Matches: YYYY-MM-DD, MM/DD/YYYY, DD.MM.YYYY, DD-MM-YYYY
+        let chars: Vec<char> = s.chars().collect();
+
+        // Check for ISO format: YYYY-MM-DD (10 chars)
+        if chars.len() == 10 {
+            let is_iso = chars[0..4].iter().all(|c| c.is_ascii_digit())
+                && chars[4] == '-'
+                && chars[5..7].iter().all(|c| c.is_ascii_digit())
+                && chars[7] == '-'
+                && chars[8..10].iter().all(|c| c.is_ascii_digit());
+
+            if is_iso {
+                return true;
+            }
+
+            // Check for other formats: XX/XX/XXXX, XX.XX.XXXX, XX-XX-XXXX
+            let sep = chars[2];
+            if (sep == '/' || sep == '.' || sep == '-')
+                && chars[5] == sep
+                && chars[0..2].iter().all(|c| c.is_ascii_digit())
+                && chars[3..5].iter().all(|c| c.is_ascii_digit())
+                && chars[6..10].iter().all(|c| c.is_ascii_digit())
+            {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    fn is_datetime(s: &str) -> bool {
+        // Contains date-like pattern and time-like pattern
+        s.contains(':') && (s.contains('-') || s.contains('/'))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_normalized_table() {
+        let id = RegionId::new();
+        let mut table = NormalizedTable::new(id)
+            .with_column_count(3)
+            .with_header_rows(1);
+
+        let mut header = NormalizedRow::header();
+        header.add_text("Name");
+        header.add_text("Age");
+        header.add_text("City");
+        table.add_row(header);
+
+        let mut row1 = NormalizedRow::new();
+        row1.add_text("Alice");
+        row1.add_text("30");
+        row1.add_text("NYC");
+        table.add_row(row1);
+
+        assert_eq!(table.row_count(), 2);
+        assert_eq!(table.column_count, 3);
+        assert!(table.has_header);
+        assert_eq!(table.header_rows().len(), 1);
+        assert_eq!(table.data_rows().len(), 1);
+        assert_eq!(table.column_headers(), vec!["Name", "Age", "City"]);
+    }
+
+    #[test]
+    fn test_cell_data_type_inference() {
+        assert_eq!(CellDataType::infer(""), CellDataType::Empty);
+        assert_eq!(CellDataType::infer("  "), CellDataType::Empty);
+        assert_eq!(CellDataType::infer("Hello"), CellDataType::Text);
+        assert_eq!(CellDataType::infer("123"), CellDataType::Number);
+        assert_eq!(CellDataType::infer("123.45"), CellDataType::Number);
+        assert_eq!(CellDataType::infer("$1,234.56"), CellDataType::Number);
+        assert_eq!(CellDataType::infer("true"), CellDataType::Boolean);
+        assert_eq!(CellDataType::infer("YES"), CellDataType::Boolean);
+        assert_eq!(CellDataType::infer("=SUM(A1:A10)"), CellDataType::Formula);
+        assert_eq!(CellDataType::infer("2024-01-15"), CellDataType::Date);
+        assert_eq!(CellDataType::infer("01/15/2024"), CellDataType::Date);
+    }
+
+    #[test]
+    fn test_merged_cell() {
+        let cell = NormalizedCell::text("Merged")
+            .with_col_span(2)
+            .with_row_span(3);
+
+        assert!(cell.is_merged());
+        assert_eq!(cell.col_span, 2);
+        assert_eq!(cell.row_span, 3);
+    }
+
+    #[test]
+    fn test_merged_continuation() {
+        let cell = NormalizedCell::merged_continuation();
+        assert!(cell.is_merged_continuation);
+        assert!(cell.is_empty());
+    }
+}
diff --git a/crates/nvisy-document/src/text/mod.rs b/crates/nvisy-document/src/text/mod.rs
new file mode 100644
index 0000000..cfcdc25
--- /dev/null
+++ b/crates/nvisy-document/src/text/mod.rs
@@ -0,0 +1,69 @@
+//! Text extraction traits and types.
+//!
+//! This module defines the [`TextExtractor`] trait for extracting text
+//! content from documents.
+
+mod types;
+
+use async_trait::async_trait;
+pub use types::ExtractedText;
+
+use crate::error::Result;
+use crate::format::Document;
+
+/// Trait for document text extraction.
+///
+/// This trait is implemented by [`Document`] types that support extracting
+/// native text content. Documents that are image-based (scanned PDFs, images)
+/// should not implement this trait - they require OCR which is handled externally.
+///
+/// # Example
+///
+/// ```ignore
+/// use nvisy_document::{Document, TextExtractor, ExtractedText};
+///
+/// async fn extract_document_text<D>(doc: &D) -> Result<ExtractedText>
+/// where
+///     D: TextExtractor,
+/// {
+///     let text = doc.extract_text().await?;
+///
+///     if text.needs_ocr {
+///         println!("Document may need OCR for complete extraction");
+///     }
+///
+///     println!("Extracted {} words", text.word_count());
+///     Ok(text)
+/// }
+/// ```
+#[async_trait]
+pub trait TextExtractor: Document {
+    /// Extracts all text from the document.
+    ///
+    /// Returns [`ExtractedText`] containing:
+    /// - Raw concatenated text
+    /// - Text organized by page
+    /// - Text mapped to regions
+    /// - Whether OCR might be needed for complete extraction
+    async fn extract_text(&self) -> Result<ExtractedText>;
+
+    /// Extracts text from a specific page.
+    ///
+    /// # Arguments
+    ///
+    /// * `page` - The page number (1-indexed)
+    ///
+    /// # Returns
+    ///
+    /// The text content of the page, or `None` if the page doesn't exist.
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>>;
+
+    /// Returns whether this document likely needs OCR for text extraction.
+    ///
+    /// This is a quick heuristic check without performing full extraction.
+    /// Returns `true` if:
+    /// - Document appears to be a scanned image
+    /// - Document has no extractable text layer
+    /// - Document is an image format
+    fn needs_ocr(&self) -> bool;
+}
diff --git a/crates/nvisy-document/src/text/types.rs b/crates/nvisy-document/src/text/types.rs
new file mode 100644
index 0000000..30039e4
--- /dev/null
+++ b/crates/nvisy-document/src/text/types.rs
@@ -0,0 +1,162 @@
+//! Text extraction types.
+
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::format::RegionId;
+
+/// Result of text extraction from a document.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ExtractedText {
+    /// Full document text concatenated.
+    pub raw: String,
+
+    /// Text grouped by page number (1-indexed).
+    pub by_page: HashMap<u32, String>,
+
+    /// Text mapped to region IDs.
+    pub by_region: HashMap<RegionId, String>,
+
+    /// Whether the document likely needs OCR for full text extraction.
+    ///
+    /// True if:
+    /// - Document appears to be scanned (images with no text layer)
+    /// - Text extraction yielded very little content relative to page count
+    /// - Document contains primarily images
+    pub needs_ocr: bool,
+
+    /// Extraction warnings or issues encountered.
+    pub warnings: Vec<String>,
+}
+
+impl ExtractedText {
+    /// Creates a new empty extracted text result.
+    #[must_use]
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Creates an extracted text result indicating OCR is needed.
+    #[must_use]
+    pub fn needs_ocr() -> Self {
+        Self {
+            needs_ocr: true,
+            ..Default::default()
+        }
+    }
+
+    /// Creates an extracted text result from raw text.
+    #[must_use]
+    pub fn from_raw(text: impl Into<String>) -> Self {
+        Self {
+            raw: text.into(),
+            ..Default::default()
+        }
+    }
+
+    /// Sets the raw text.
+    #[must_use]
+    pub fn with_raw(mut self, text: impl Into<String>) -> Self {
+        self.raw = text.into();
+        self
+    }
+
+    /// Adds text for a specific page.
+    #[must_use]
+    pub fn with_page(mut self, page: u32, text: impl Into<String>) -> Self {
+        self.by_page.insert(page, text.into());
+        self
+    }
+
+    /// Adds text for a specific region.
+    #[must_use]
+    pub fn with_region(mut self, region_id: RegionId, text: impl Into<String>) -> Self {
+        self.by_region.insert(region_id, text.into());
+        self
+    }
+
+    /// Adds a warning message.
+    #[must_use]
+    pub fn with_warning(mut self, warning: impl Into<String>) -> Self {
+        self.warnings.push(warning.into());
+        self
+    }
+
+    /// Returns the text for a specific page.
+    #[must_use]
+    pub fn page_text(&self, page: u32) -> Option<&str> {
+        self.by_page.get(&page).map(String::as_str)
+    }
+
+    /// Returns the text for a specific region.
+    #[must_use]
+    pub fn region_text(&self, region_id: RegionId) -> Option<&str> {
+        self.by_region.get(&region_id).map(String::as_str)
+    }
+
+    /// Returns the total character count.
+    #[must_use]
+    pub fn char_count(&self) -> usize {
+        self.raw.len()
+    }
+
+    /// Returns an approximate word count.
+    #[must_use]
+    pub fn word_count(&self) -> usize {
+        self.raw.split_whitespace().count()
+    }
+
+    /// Returns whether any text was extracted.
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.raw.is_empty()
+    }
+
+    /// Returns the number of pages with extracted text.
+    #[must_use]
+    pub fn page_count(&self) -> usize {
+        self.by_page.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_extracted_text_builder() {
+        let region_id = RegionId::new();
+        let text = ExtractedText::new()
+            .with_raw("Hello world")
+            .with_page(1, "Hello")
+            .with_page(2, "world")
+            .with_region(region_id, "Hello");
+
+        assert_eq!(text.raw, "Hello world");
+        assert_eq!(text.page_text(1), Some("Hello"));
+        assert_eq!(text.page_text(2), Some("world"));
+        assert_eq!(text.region_text(region_id), Some("Hello"));
+        assert!(!text.needs_ocr);
+    }
+
+    #[test]
+    fn test_needs_ocr() {
+        let text = ExtractedText::needs_ocr();
+        assert!(text.needs_ocr);
+        assert!(text.is_empty());
+    }
+
+    #[test]
+    fn test_word_count() {
+        let text = ExtractedText::from_raw("Hello world, this is a test.");
+        assert_eq!(text.word_count(), 6);
+    }
+
+    #[test]
+    fn test_from_raw() {
+        let text = ExtractedText::from_raw("Simple text");
+        assert_eq!(text.raw, "Simple text");
+        assert!(text.by_page.is_empty());
+    }
+}
diff --git a/crates/nvisy-docx/src/lib.rs b/crates/nvisy-docx/src/lib.rs
index 40b31c4..0a0c451 100644
--- a/crates/nvisy-docx/src/lib.rs
+++ b/crates/nvisy-docx/src/lib.rs
@@ -1,20 +1,6 @@
-//! DOCX document format support for nvisy.
-//!
-//! This crate provides a `DocumentFormat` implementation for Microsoft Word
-//! DOCX files (.docx).
-//!
-//! # Example
-//!
-//! ```ignore
-//! use nvisy_docx::DocxFormat;
-//! use nvisy_engine::Engine;
-//!
-//! let engine = Engine::new();
-//! let doc = engine.load_docx(data).await?;
-//! ```
-
 #![forbid(unsafe_code)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
 
 mod document;
 mod format;
diff --git a/crates/nvisy-pdf/src/lib.rs b/crates/nvisy-pdf/src/lib.rs
index 5011638..4b72b88 100644
--- a/crates/nvisy-pdf/src/lib.rs
+++ b/crates/nvisy-pdf/src/lib.rs
@@ -1,18 +1,6 @@
-//! PDF document format support for nvisy.
-//!
-//! This crate provides a `DocumentFormat` implementation for PDF files (.pdf).
-//!
-//! # Example
-//!
-//! ```ignore
-//! use nvisy_pdf::PdfFormat;
-//! use nvisy_engine::Engine;
-//!
-//! let engine = Engine::new().with_pdf(PdfFormat::new());
-//! ```
-
 #![forbid(unsafe_code)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
 
 mod document;
 mod format;
diff --git a/crates/nvisy-text/src/lib.rs b/crates/nvisy-text/src/lib.rs
index 5c5f5c4..b8b6981 100644
--- a/crates/nvisy-text/src/lib.rs
+++ b/crates/nvisy-text/src/lib.rs
@@ -1,19 +1,6 @@
-//! Plain text document format support for nvisy.
-//!
-//! This crate provides a `DocumentFormat` implementation for plain text
-//! files (.txt, .md, .rst, etc.).
-//!
-//! # Example
-//!
-//! ```ignore
-//! use nvisy_text::TextFormat;
-//! use nvisy_engine::Engine;
-//!
-//! let engine = Engine::new().with_text(TextFormat::new());
-//! ```
-
 #![forbid(unsafe_code)]
 #![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
 
 mod document;
 mod format;
diff --git a/docs/DATATYPES.md b/docs/DATATYPES.md
new file mode 100644
index 0000000..f3d5954
--- /dev/null
+++ b/docs/DATATYPES.md
@@ -0,0 +1,445 @@
+# Data Types
+
+This document defines the core data structures used throughout the processing pipeline.
+
+---
+
+## Format Detection
+
+### FormatResult
+
+Result of format detection and validation.
+
+```
+FormatResult {
+    format: Format           # Detected file format
+    mime_type: String        # MIME type (e.g., "application/pdf")
+    extension_matches: Bool  # True if claimed extension matches content
+    is_valid: Bool           # True if file passed integrity check
+    errors: List<String>     # Validation errors, if any
+}
+```
+
+### Format
+
+Enumeration of supported file formats.
+
+```
+Format = 
+    # Documents
+    | PDF
+    | DOCX
+    | DOC
+    | RTF
+    | ODT
+    
+    # Spreadsheets
+    | XLSX
+    | XLS
+    | CSV
+    | ODS
+    
+    # Text
+    | TEXT
+    | MARKDOWN
+    | JSON
+    | XML
+    | HTML
+    | CODE
+    
+    # Images
+    | PNG
+    | JPEG
+    | WEBP
+    | GIF
+    | TIFF
+    | BMP
+    
+    # Archives
+    | ZIP
+    | TAR
+    | GZIP
+    | SEVENZ
+    | RAR
+    
+    | UNKNOWN
+```
+
+---
+
+## Metadata
+
+### FileMetadata
+
+Document properties and embedded metadata.
+
+```
+FileMetadata {
+    # Basic properties
+    title: String?
+    author: String?
+    subject: String?
+    keywords: List<String>
+    
+    # Timestamps
+    created_at: DateTime?
+    modified_at: DateTime?
+    
+    # Document-specific
+    page_count: Int?
+    word_count: Int?
+    character_count: Int?
+    
+    # Image-specific
+    dimensions: Dimensions?
+    color_space: String?
+    bit_depth: Int?
+    
+    # Media-specific
+    duration: Duration?
+    
+    # Embedded metadata
+    exif: Map<String, Any>?
+    xmp: Map<String, Any>?
+    
+    # Archive-specific
+    entry_count: Int?
+    total_uncompressed_size: Int?
+}
+```
+
+### Dimensions
+
+Width and height in pixels.
+
+```
+Dimensions {
+    width: Int
+    height: Int
+}
+```
+
+---
+
+## Regions
+
+### Region
+
+A semantic segment of a document with position information.
+
+```
+Region {
+    id: RegionId             # Unique identifier (UUID)
+    page: Int?               # Page number (1-indexed), if applicable
+    bounds: BoundingBox      # Position in normalized coordinates
+    text: String?            # Text content, if extractable
+    kind: RegionKind         # Semantic type
+    status: RegionStatus?    # Processing status
+    source: RegionSource     # How region was identified
+    parent: RegionId?        # Parent region, if nested
+    children: List<RegionId> # Child regions, if container
+}
+```
+
+### RegionId
+
+Stable unique identifier for a region.
+
+```
+RegionId = UUID
+
+# Display format: "region_" + first 8 chars of UUID
+# Example: "region_a1b2c3d4"
+```
+
+### RegionKind
+
+Classification of region by semantic type.
+
+```
+RegionKind =
+    | Text          # Paragraphs, sentences
+    | Heading       # Titles, section headers
+    | Table         # Tabular data container
+    | TableRow      # Row within table
+    | TableCell     # Cell within row
+    | Image         # Embedded graphics
+    | List          # Bulleted/numbered list
+    | ListItem      # Item within list
+    | Header        # Page header
+    | Footer        # Page footer
+    | Footnote      # Footnotes/endnotes
+    | Code          # Code blocks
+    | Quote         # Block quotes
+    | Formula       # Math equations
+    | Link          # Hyperlinks
+    | FormField     # Interactive form elements
+    | Annotation    # Comments, annotations
+    | Unknown       # Unclassified content
+```
+
+### RegionSource
+
+How the region was identified.
+
+```
+RegionSource =
+    | Parser        # Extracted by document parser
+    | Layout        # Detected by layout analysis
+    | OCR           # Identified by OCR
+    | User          # Manually defined by user
+    | VLM           # Identified by vision-language model
+```
+
+### RegionStatus
+
+Current state of the region in an editing session.
+
+```
+RegionStatus =
+    | Active        # Normal, editable state
+    | Modified      # Content has been changed
+    | Deleted       # Marked for deletion
+    | Locked        # Cannot be modified
+```
+
+### BoundingBox
+
+Position in normalized coordinates (0.0 to 1.0 relative to page/container).
+
+```
+BoundingBox {
+    x: Float       # Left edge (0.0 = left of page)
+    y: Float       # Top edge (0.0 = top of page)
+    width: Float   # Width as fraction of page width
+    height: Float  # Height as fraction of page height
+}
+
+# Coordinate system:
+#   Origin (0,0) is top-left
+#   X increases left to right
+#   Y increases top to bottom
+#   Full page = {x: 0, y: 0, width: 1, height: 1}
+```
+
+### Point
+
+A 2D coordinate.
+
+```
+Point {
+    x: Float
+    y: Float
+}
+```
+
+---
+
+## Tables
+
+### NormalizedTable
+
+Standardized table structure across all formats.
+
+```
+NormalizedTable {
+    id: RegionId              # Reference to table region
+    rows: List<NormalizedRow>
+    column_count: Int
+    has_header: Bool
+    header_row_count: Int
+}
+```
+
+### NormalizedRow
+
+A row within a normalized table.
+
+```
+NormalizedRow {
+    cells: List<NormalizedCell>
+    is_header: Bool
+}
+```
+
+### NormalizedCell
+
+A cell within a normalized table.
+
+```
+NormalizedCell {
+    text: String
+    col_span: Int            # Number of columns this cell spans
+    row_span: Int            # Number of rows this cell spans
+    data_type: CellDataType
+    is_merged_continuation: Bool  # True if this is a continuation of a merged cell
+}
+```
+
+### CellDataType
+
+Inferred data type of cell content.
+
+```
+CellDataType =
+    | Text
+    | Number
+    | Date
+    | DateTime
+    | Boolean
+    | Formula
+    | Empty
+```
+
+---
+
+## Text Extraction
+
+### ExtractedText
+
+Result of native text extraction.
+
+```
+ExtractedText {
+    raw: String                           # All text concatenated
+    by_page: Map<Int, String>             # Text grouped by page number
+    by_region: Map<RegionId, String>      # Text mapped to regions
+    needs_ocr: Bool                       # True if document appears scanned
+}
+```
+
+---
+
+## Archives
+
+### ArchiveContents
+
+Contents of an unpacked archive.
+
+```
+ArchiveContents {
+    entries: List<ArchiveEntry>
+    total_size: Int              # Total uncompressed size in bytes
+    compressed_size: Int         # Compressed size in bytes
+}
+```
+
+### ArchiveEntry
+
+A single entry within an archive.
+
+```
+ArchiveEntry {
+    path: String                 # Relative path within archive
+    size: Int                    # Uncompressed size in bytes
+    is_directory: Bool
+    content_kind: ContentKind    # Detected content type
+    nested: ArchiveContents?     # If entry is itself an archive
+}
+```
+
+### ContentKind
+
+High-level content classification.
+
+```
+ContentKind =
+    | Text
+    | Document
+    | Spreadsheet
+    | Image
+    | Archive
+    | Unknown
+```
+
+---
+
+## Thumbnails
+
+### Thumbnail
+
+A generated preview image.
+
+```
+Thumbnail {
+    data: Bytes              # Encoded image data
+    width: Int
+    height: Int
+    format: ImageFormat
+    page: Int?               # Page number for multi-page documents
+}
+```
+
+### ImageFormat
+
+Output format for thumbnails.
+
+```
+ImageFormat =
+    | PNG
+    | JPEG
+    | WEBP
+```
+
+### ThumbnailConfig
+
+Configuration for thumbnail generation.
+
+```
+ThumbnailConfig {
+    max_width: Int           # Maximum width in pixels
+    max_height: Int          # Maximum height in pixels
+    format: ImageFormat
+    quality: Int             # 1-100, for lossy formats
+    pages: PageSelection
+}
+```
+
+### PageSelection
+
+Which pages to generate thumbnails for.
+
+```
+PageSelection =
+    | First                  # Only first page
+    | Range(start, end)      # Specific page range
+    | All                    # All pages
+```
+
+---
+
+## Processing Result
+
+### ProcessingResult
+
+Complete result of file processing pipeline.
+
+```
+ProcessingResult {
+    format: Result<FormatResult>
+    metadata: Result<FileMetadata>?
+    regions: Result<List<Region>>?
+    text: Result<ExtractedText>?
+    normalized_tables: Result<List<NormalizedTable>>?
+    thumbnails: Result<List<Thumbnail>>?
+    archive_contents: Result<ArchiveContents>?
+}
+```
+
+### ProcessingOptions
+
+Configuration for the processing pipeline.
+
+```
+ProcessingOptions {
+    claimed_extension: String?
+    extract_metadata: Bool
+    extract_regions: Bool
+    extract_text: Bool
+    normalize_tables: Bool
+    generate_thumbnails: Bool
+    unpack_archive: Bool
+    thumbnail_config: ThumbnailConfig?
+    max_archive_depth: Int
+}
+```
diff --git a/docs/PIPELINE.md b/docs/PIPELINE.md
new file mode 100644
index 0000000..e9c9da3
--- /dev/null
+++ b/docs/PIPELINE.md
@@ -0,0 +1,635 @@
+# Processing Pipeline
+
+This document describes the file processing pipeline implemented by the runtime crates.
+
+## Pipeline Overview
+
+```
+                    ┌─────────────────┐
+                    │   Input File    │
+                    └────────┬────────┘
+                             │
+                             ▼
+                    ┌─────────────────┐
+                    │ Format Detection│
+                    │ & Validation    │
+                    └────────┬────────┘
+                             │
+              ┌──────────────┼──────────────┐
+              │              │              │
+              ▼              ▼              ▼
+     ┌────────────┐  ┌────────────┐  ┌────────────┐
+     │  Metadata  │  │   Region   │  │  Archive   │
+     │ Extraction │  │ Extraction │  │ Unpacking  │
+     └────────────┘  └─────┬──────┘  └────────────┘
+                           │
+              ┌────────────┼────────────┐
+              │            │            │
+              ▼            ▼            ▼
+     ┌────────────┐  ┌────────────┐  ┌────────────┐
+     │    Text    │  │   Table    │  │ Thumbnail  │
+     │ Extraction │  │Normalizatn │  │ Generation │
+     └────────────┘  └────────────┘  └────────────┘
+```
+
+---
+
+## 1. Format Detection & Validation
+
+Validates file integrity and identifies the actual format regardless of file extension.
+
+### Algorithm
+
+```
+function detect_format(file_bytes, claimed_extension):
+    # Read magic bytes (first 16-32 bytes)
+    magic = file_bytes[0:32]
+    
+    # Match against known signatures
+    detected_format = match magic:
+        [0x25, 0x50, 0x44, 0x46] -> PDF
+        [0x50, 0x4B, 0x03, 0x04] -> ZIP_BASED  # Could be DOCX, XLSX, ODT, etc.
+        [0x89, 0x50, 0x4E, 0x47] -> PNG
+        [0xFF, 0xD8, 0xFF]       -> JPEG
+        [0x52, 0x49, 0x46, 0x46] -> RIFF_BASED  # Could be WEBP, AVI, etc.
+        ...
+        _ -> UNKNOWN
+    
+    # For container formats, inspect contents
+    if detected_format == ZIP_BASED:
+        detected_format = inspect_zip_contents(file_bytes)
+    
+    # Validate extension matches content
+    is_extension_valid = matches(detected_format, claimed_extension)
+    
+    # Attempt to parse to verify integrity
+    integrity_check = try_parse(file_bytes, detected_format)
+    
+    return FormatResult {
+        format: detected_format,
+        mime_type: get_mime_type(detected_format),
+        extension_matches: is_extension_valid,
+        is_valid: integrity_check.success,
+        errors: integrity_check.errors
+    }
+```
+
+### ZIP-Based Format Detection
+
+```
+function inspect_zip_contents(zip_bytes):
+    entries = list_zip_entries(zip_bytes)
+    
+    if contains(entries, "[Content_Types].xml"):
+        if contains(entries, "word/document.xml"):
+            return DOCX
+        if contains(entries, "xl/workbook.xml"):
+            return XLSX
+        if contains(entries, "ppt/presentation.xml"):
+            return PPTX
+    
+    if contains(entries, "mimetype"):
+        mimetype = read_entry(zip_bytes, "mimetype")
+        if mimetype == "application/vnd.oasis.opendocument.text":
+            return ODT
+        if mimetype == "application/vnd.oasis.opendocument.spreadsheet":
+            return ODS
+    
+    return ZIP
+```
+
+---
+
+## 2. Metadata Extraction
+
+Extracts document properties and embedded metadata.
+
+### Algorithm
+
+```
+function extract_metadata(file_bytes, format):
+    metadata = Metadata {}
+    
+    match format:
+        PDF:
+            info_dict = parse_pdf_info_dictionary(file_bytes)
+            metadata.title = info_dict["Title"]
+            metadata.author = info_dict["Author"]
+            metadata.created = parse_pdf_date(info_dict["CreationDate"])
+            metadata.modified = parse_pdf_date(info_dict["ModDate"])
+            metadata.page_count = count_pdf_pages(file_bytes)
+            
+        DOCX:
+            core_xml = extract_zip_entry(file_bytes, "docProps/core.xml")
+            app_xml = extract_zip_entry(file_bytes, "docProps/app.xml")
+            metadata.title = xpath(core_xml, "//dc:title")
+            metadata.author = xpath(core_xml, "//dc:creator")
+            metadata.created = xpath(core_xml, "//dcterms:created")
+            metadata.page_count = xpath(app_xml, "//Pages")
+            metadata.word_count = xpath(app_xml, "//Words")
+            
+        IMAGE:
+            exif = parse_exif(file_bytes)
+            metadata.dimensions = get_image_dimensions(file_bytes)
+            metadata.created = exif["DateTimeOriginal"]
+            metadata.camera = exif["Make"] + " " + exif["Model"]
+            metadata.gps = extract_gps_coordinates(exif)
+            
+        ARCHIVE:
+            entries = list_archive_entries(file_bytes)
+            metadata.entry_count = length(entries)
+            metadata.total_uncompressed_size = sum(entry.size for entry in entries)
+    
+    return metadata
+```
+
+---
+
+## 3. Region Extraction
+
+Parses documents into semantic regions with positions.
+
+### Algorithm
+
+```
+function extract_regions(file_bytes, format):
+    regions = []
+    
+    match format:
+        PDF:
+            for page_num, page in enumerate_pages(file_bytes):
+                page_regions = extract_pdf_page_regions(page, page_num)
+                regions.extend(page_regions)
+                
+        DOCX:
+            document_xml = extract_zip_entry(file_bytes, "word/document.xml")
+            body = parse_xml(document_xml).body
+            regions = extract_docx_regions(body)
+            
+        XLSX:
+            workbook = parse_xlsx(file_bytes)
+            for sheet in workbook.sheets:
+                table_region = create_table_region(sheet)
+                regions.append(table_region)
+    
+    # Build hierarchy
+    regions = build_region_tree(regions)
+    
+    return regions
+```
+
+### PDF Region Extraction
+
+```
+function extract_pdf_page_regions(page, page_num):
+    regions = []
+    content_stream = page.content_stream
+    
+    # Parse text blocks with positions
+    text_blocks = extract_text_blocks(content_stream)
+    for block in text_blocks:
+        region = Region {
+            id: generate_uuid(),
+            page: page_num,
+            bounds: normalize_bounds(block.bbox, page.dimensions),
+            text: block.text,
+            kind: classify_text_block(block)  # Heading, Text, etc.
+        }
+        regions.append(region)
+    
+    # Detect tables using layout analysis
+    tables = detect_tables_from_layout(text_blocks)
+    for table in tables:
+        table_region = Region {
+            id: generate_uuid(),
+            page: page_num,
+            bounds: table.bounds,
+            kind: TABLE,
+            children: []
+        }
+        
+        for row in table.rows:
+            row_region = create_row_region(row, table_region.id)
+            table_region.children.append(row_region.id)
+            regions.append(row_region)
+        
+        regions.append(table_region)
+    
+    # Extract images
+    images = extract_images(content_stream)
+    for image in images:
+        regions.append(Region {
+            id: generate_uuid(),
+            page: page_num,
+            bounds: normalize_bounds(image.bbox, page.dimensions),
+            kind: IMAGE
+        })
+    
+    return regions
+```
+
+### DOCX Region Extraction
+
+```
+function extract_docx_regions(body):
+    regions = []
+    position = 0
+    
+    for element in body.children:
+        match element.tag:
+            "w:p":  # Paragraph
+                style = get_paragraph_style(element)
+                kind = match style:
+                    "Heading1", "Heading2", ... -> HEADING
+                    "ListParagraph" -> LIST_ITEM
+                    _ -> TEXT
+                
+                region = Region {
+                    id: generate_uuid(),
+                    bounds: estimate_bounds(position),
+                    text: extract_paragraph_text(element),
+                    kind: kind
+                }
+                regions.append(region)
+                position += 1
+                
+            "w:tbl":  # Table
+                table_region = extract_docx_table(element, position)
+                regions.append(table_region)
+                regions.extend(table_region.all_descendants())
+                position += 1
+    
+    return regions
+```
+
+---
+
+## 4. Table Normalization
+
+Converts tables from various formats into a consistent structure.
+
+### Algorithm
+
+```
+function normalize_table(table_region, source_format):
+    raw_cells = extract_raw_cells(table_region, source_format)
+    
+    # Step 1: Determine grid dimensions
+    max_row = max(cell.row for cell in raw_cells)
+    max_col = max(cell.col + cell.col_span - 1 for cell in raw_cells)
+    
+    # Step 2: Build occupancy grid for merged cells
+    grid = Grid(max_row + 1, max_col + 1)
+    for cell in raw_cells:
+        for r in range(cell.row, cell.row + cell.row_span):
+            for c in range(cell.col, cell.col + cell.col_span):
+                grid[r][c] = CellRef {
+                    source_cell: cell,
+                    is_origin: (r == cell.row and c == cell.col)
+                }
+    
+    # Step 3: Detect header rows
+    header_row_count = detect_header_rows(raw_cells, source_format)
+    
+    # Step 4: Infer column types
+    column_types = []
+    for col in range(max_col + 1):
+        col_values = [grid[r][col].source_cell.text for r in range(header_row_count, max_row + 1)]
+        column_types.append(infer_data_type(col_values))
+    
+    # Step 5: Build normalized structure
+    normalized = NormalizedTable {
+        id: table_region.id,
+        column_count: max_col + 1,
+        has_header: header_row_count > 0,
+        header_row_count: header_row_count,
+        rows: []
+    }
+    
+    for r in range(max_row + 1):
+        row = NormalizedRow {
+            is_header: r < header_row_count,
+            cells: []
+        }
+        for c in range(max_col + 1):
+            cell_ref = grid[r][c]
+            row.cells.append(NormalizedCell {
+                text: cell_ref.source_cell.text if cell_ref.is_origin else "",
+                col_span: cell_ref.source_cell.col_span if cell_ref.is_origin else 1,
+                row_span: cell_ref.source_cell.row_span if cell_ref.is_origin else 1,
+                data_type: column_types[c],
+                is_merged_continuation: not cell_ref.is_origin
+            })
+        normalized.rows.append(row)
+    
+    return normalized
+```
+
+### Header Detection
+
+```
+function detect_header_rows(cells, format):
+    # Format-specific hints
+    match format:
+        XLSX:
+            # Check for explicit header style
+            if has_header_style(cells[0]):
+                return 1
+        DOCX:
+            # Check for tblHeader property
+            if has_table_header_property(cells):
+                return count_header_rows()
+    
+    # Heuristic detection
+    first_row_cells = [c for c in cells if c.row == 0]
+    
+    # Check if first row is bold
+    if all(cell.is_bold for cell in first_row_cells):
+        return 1
+    
+    # Check if first row has different background
+    if has_distinct_background(first_row_cells, cells):
+        return 1
+    
+    # Check if first row contains no numeric data
+    if all(not is_numeric(cell.text) for cell in first_row_cells):
+        data_rows = [c for c in cells if c.row > 0]
+        if any(is_numeric(cell.text) for cell in data_rows):
+            return 1
+    
+    return 0
+```
+
+---
+
+## 5. Text Extraction
+
+Extracts native text content without OCR.
+
+### Algorithm
+
+```
+function extract_text(file_bytes, format, regions):
+    match format:
+        PDF:
+            return extract_pdf_text(file_bytes, regions)
+        DOCX:
+            return extract_docx_text(file_bytes)
+        XLSX:
+            return extract_xlsx_text(file_bytes)
+        TEXT, MARKDOWN:
+            return decode_text(file_bytes)
+        HTML:
+            return strip_html_tags(decode_text(file_bytes))
+        _:
+            return null  # Requires OCR
+```
+
+### PDF Text Extraction
+
+```
+function extract_pdf_text(file_bytes, regions):
+    result = ExtractedText {
+        raw: "",
+        by_page: {},
+        by_region: {}
+    }
+    
+    for page_num, page in enumerate_pages(file_bytes):
+        page_text = ""
+        
+        # Extract text following reading order
+        text_objects = extract_text_objects(page.content_stream)
+        text_objects = sort_by_reading_order(text_objects)
+        
+        for obj in text_objects:
+            page_text += obj.text + " "
+            
+            # Map to region if available
+            matching_region = find_region_containing(obj.position, regions)
+            if matching_region:
+                result.by_region[matching_region.id] += obj.text + " "
+        
+        result.by_page[page_num] = page_text.trim()
+        result.raw += page_text
+    
+    # Check if text extraction yielded results
+    if is_mostly_empty(result.raw):
+        result.needs_ocr = true
+    
+    return result
+```
+
+### Reading Order Detection
+
+```
+function sort_by_reading_order(text_objects):
+    # Group by approximate Y position (same line)
+    lines = group_by_y_position(text_objects, tolerance=5)
+    
+    # Sort lines top to bottom
+    lines = sort_by_y(lines)
+    
+    # Within each line, sort left to right
+    result = []
+    for line in lines:
+        line = sort_by_x(line)
+        result.extend(line)
+    
+    return result
+```
+
+---
+
+## 6. Archive Unpacking
+
+Extracts and processes files within archives.
+
+### Algorithm
+
+```
+function unpack_archive(file_bytes, format, max_depth=3):
+    if max_depth <= 0:
+        return ArchiveResult { error: "Max nesting depth exceeded" }
+    
+    entries = []
+    
+    match format:
+        ZIP, DOCX, XLSX, ODT:
+            entries = list_zip_entries(file_bytes)
+        TAR:
+            entries = list_tar_entries(file_bytes)
+        GZIP:
+            decompressed = gunzip(file_bytes)
+            # Check if it's a tar inside
+            if starts_with_tar_magic(decompressed):
+                return unpack_archive(decompressed, TAR, max_depth)
+            return SingleFileResult { data: decompressed }
+        SEVENZ:
+            entries = list_7z_entries(file_bytes)
+    
+    result = ArchiveContents {
+        entries: [],
+        total_size: 0,
+        compressed_size: length(file_bytes)
+    }
+    
+    for entry in entries:
+        entry_info = ArchiveEntry {
+            path: entry.path,
+            size: entry.uncompressed_size,
+            is_directory: entry.is_directory,
+            content_kind: detect_content_kind(entry.path)
+        }
+        
+        # Check for nested archives
+        if is_archive_format(entry_info.content_kind):
+            entry_bytes = extract_entry(file_bytes, entry.path)
+            entry_info.nested = unpack_archive(entry_bytes, entry_info.content_kind, max_depth - 1)
+        
+        result.entries.append(entry_info)
+        result.total_size += entry.uncompressed_size
+    
+    return result
+```
+
+---
+
+## 7. Thumbnail Generation
+
+Generates preview images for display.
+
+### Algorithm
+
+```
+function generate_thumbnails(file_bytes, format, config):
+    thumbnails = []
+    
+    match format:
+        PDF:
+            thumbnails = generate_pdf_thumbnails(file_bytes, config)
+        DOCX:
+            # Convert to PDF first, then render
+            pdf_bytes = convert_docx_to_pdf(file_bytes)
+            thumbnails = generate_pdf_thumbnails(pdf_bytes, config)
+        XLSX:
+            thumbnails = generate_spreadsheet_thumbnail(file_bytes, config)
+        IMAGE:
+            thumbnails = generate_image_thumbnails(file_bytes, config)
+        TEXT, MARKDOWN, CODE:
+            thumbnails = generate_text_thumbnail(file_bytes, format, config)
+        ARCHIVE:
+            thumbnails = [generate_archive_icon()]
+    
+    return thumbnails
+```
+
+### PDF Thumbnail Generation
+
+```
+function generate_pdf_thumbnails(pdf_bytes, config):
+    thumbnails = []
+    
+    pages_to_render = match config.pages:
+        FIRST -> [0]
+        RANGE(start, end) -> range(start, end)
+        ALL -> range(0, count_pages(pdf_bytes))
+    
+    for page_num in pages_to_render:
+        # Render page to image at appropriate DPI
+        target_width = config.max_width
+        page_dims = get_page_dimensions(pdf_bytes, page_num)
+        dpi = calculate_dpi_for_width(page_dims, target_width)
+        
+        image = render_pdf_page(pdf_bytes, page_num, dpi)
+        
+        # Resize if needed
+        if image.width > config.max_width or image.height > config.max_height:
+            image = resize_preserving_aspect(image, config.max_width, config.max_height)
+        
+        # Encode to output format
+        encoded = encode_image(image, config.format, config.quality)
+        
+        thumbnails.append(Thumbnail {
+            data: encoded,
+            width: image.width,
+            height: image.height,
+            format: config.format,
+            page: page_num
+        })
+    
+    return thumbnails
+```
+
+### Text/Code Thumbnail Generation
+
+```
+function generate_text_thumbnail(file_bytes, format, config):
+    text = decode_text(file_bytes)
+    
+    # Limit to visible portion
+    lines = split_lines(text)[:50]
+    
+    # Apply syntax highlighting if code
+    if format == CODE:
+        language = detect_language(text)
+        highlighted = apply_syntax_highlighting(lines, language)
+    else:
+        highlighted = lines
+    
+    # Render to image
+    image = render_text_to_image(highlighted, {
+        font: "monospace",
+        font_size: 12,
+        padding: 16,
+        max_width: config.max_width,
+        background: "#ffffff"
+    })
+    
+    encoded = encode_image(image, config.format, config.quality)
+    
+    return [Thumbnail {
+        data: encoded,
+        width: image.width,
+        height: image.height,
+        format: config.format
+    }]
+```
+
+---
+
+## Error Handling
+
+Each pipeline stage operates independently. Failures are captured but don't block other stages.
+
+```
+function process_file(file_bytes, options):
+    result = ProcessingResult {}
+    
+    # Format detection is required
+    result.format = detect_format(file_bytes, options.claimed_extension)
+    if not result.format.is_valid:
+        return result  # Cannot proceed with invalid file
+    
+    # Run remaining stages in parallel where possible
+    if options.extract_metadata:
+        result.metadata = try { extract_metadata(file_bytes, result.format) }
+    
+    if options.extract_regions:
+        result.regions = try { extract_regions(file_bytes, result.format) }
+    
+    if options.extract_text:
+        result.text = try { extract_text(file_bytes, result.format, result.regions) }
+    
+    if options.normalize_tables and result.regions:
+        tables = filter(result.regions, r -> r.kind == TABLE)
+        result.normalized_tables = try { normalize_tables(tables, result.format) }
+    
+    if options.generate_thumbnails:
+        result.thumbnails = try { generate_thumbnails(file_bytes, result.format, options.thumbnail_config) }
+    
+    if options.unpack_archive and is_archive(result.format):
+        result.archive_contents = try { unpack_archive(file_bytes, result.format) }
+    
+    return result
+```
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..5f5b8dd
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,31 @@
+# Runtime
+
+The runtime crates provide file processing capabilities for the collaboration platform. These crates handle local file operations before content is passed to external services (OCR, embeddings).
+
+## Overview
+
+When a user uploads a file, the runtime processes it through a pipeline that:
+
+1. Validates format and integrity
+2. Extracts metadata
+3. Identifies regions (paragraphs, tables, images, etc.)
+4. Normalizes tabular data
+5. Extracts native text
+6. Unpacks archives
+7. Generates thumbnails
+
+## Documentation
+
+- [Pipeline](./PIPELINE.md) — Processing stages and algorithms
+- [Data Types](./DATATYPES.md) — Core data structures
+
+## Crate Structure
+
+| Crate | Responsibility |
+|-------|----------------|
+| `nvisy-core` | ContentData, ContentSource, errors |
+| `nvisy-archive` | ZIP, TAR, 7Z handling |
+| `nvisy-document` | PDF, DOCX parsing and region extraction |
+| `nvisy-spreadsheet` | XLSX, CSV parsing |
+| `nvisy-metadata` | Unified metadata extraction |
+| `nvisy-thumbnail` | Preview image generation |

From 43e655f04c2d90349aa738c6e886ac91796098b8 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Fri, 16 Jan 2026 07:18:41 +0100
Subject: [PATCH 2/5] feat: add nvisy-image crate, dynamic registry, and
 reformat Cargo.toml files

- Add nvisy-image stub crate with ImageDocument and ImageFormat
- Add dynamic FormatRegistry with type erasure for runtime format selection
- Split nvisy-text structured formats into separate modules (xml, yaml, toml, ini)
- Use csv 1.4 crate for CSV parsing
- Use markdown 1.0 crate for Markdown parsing
- Remove write operations from nvisy-document (operation module)
- Reformat all Cargo.toml files with grouped dependencies and comments
- Standardize package section format across all crates
---
 Cargo.lock                                    |  58 ++
 Cargo.toml                                    |  35 +-
 crates/nvisy-archive/Cargo.toml               |  42 +-
 crates/nvisy-core/Cargo.toml                  |  26 +-
 crates/nvisy-document/Cargo.toml              |  10 +-
 crates/nvisy-document/src/error.rs            |  56 +-
 .../nvisy-document/src/format/capabilities.rs | 482 +++-------------
 crates/nvisy-document/src/format/mod.rs       |  36 +-
 crates/nvisy-document/src/format/page.rs      |   2 +-
 .../nvisy-document/src/format/region/core.rs  |  77 +--
 .../nvisy-document/src/format/region/kind.rs  |  34 +-
 .../nvisy-document/src/format/region/mod.rs   |  10 +-
 .../src/format/region/status.rs               |  60 +-
 crates/nvisy-document/src/lib.rs              |  29 +-
 crates/nvisy-document/src/operation/insert.rs | 160 ------
 crates/nvisy-document/src/operation/mod.rs    | 542 ------------------
 crates/nvisy-document/src/operation/redact.rs | 103 ----
 crates/nvisy-document/src/operation/result.rs | 136 -----
 crates/nvisy-document/src/operation/split.rs  | 105 ----
 crates/nvisy-docx/Cargo.toml                  |   6 +-
 crates/nvisy-docx/src/document.rs             |  35 +-
 crates/nvisy-docx/src/format.rs               |  28 +-
 crates/nvisy-engine/Cargo.toml                |  11 +-
 crates/nvisy-engine/src/engine/mod.rs         | 297 +++++-----
 crates/nvisy-engine/src/lib.rs                |  16 +-
 crates/nvisy-engine/src/registry/mod.rs       | 375 ++++++++++++
 crates/nvisy-engine/src/session/history.rs    | 213 ++-----
 crates/nvisy-engine/src/session/mod.rs        | 238 ++------
 crates/nvisy-image/Cargo.toml                 |  30 +
 crates/nvisy-image/README.md                  |  13 +
 crates/nvisy-image/src/document.rs            |  56 ++
 crates/nvisy-image/src/format.rs              |  82 +++
 crates/nvisy-image/src/lib.rs                 |   9 +
 crates/nvisy-pdf/Cargo.toml                   |   6 +-
 crates/nvisy-pdf/src/document.rs              |  33 +-
 crates/nvisy-pdf/src/format.rs                |  20 +-
 crates/nvisy-text/Cargo.toml                  |  11 +-
 crates/nvisy-text/README.md                   | 100 +++-
 crates/nvisy-text/src/document.rs             |  79 ---
 crates/nvisy-text/src/documents/csv.rs        | 355 ++++++++++++
 crates/nvisy-text/src/documents/ini.rs        | 229 ++++++++
 crates/nvisy-text/src/documents/json.rs       | 261 +++++++++
 crates/nvisy-text/src/documents/markdown.rs   | 343 +++++++++++
 crates/nvisy-text/src/documents/mod.rs        |  19 +
 crates/nvisy-text/src/documents/plain.rs      | 207 +++++++
 crates/nvisy-text/src/documents/toml.rs       | 210 +++++++
 crates/nvisy-text/src/documents/xml.rs        | 174 ++++++
 crates/nvisy-text/src/documents/yaml.rs       | 189 ++++++
 crates/nvisy-text/src/format.rs               |  70 ---
 crates/nvisy-text/src/formats/csv.rs          | 114 ++++
 crates/nvisy-text/src/formats/ini.rs          |  98 ++++
 crates/nvisy-text/src/formats/json.rs         |  98 ++++
 crates/nvisy-text/src/formats/markdown.rs     |  99 ++++
 crates/nvisy-text/src/formats/mod.rs          |  19 +
 crates/nvisy-text/src/formats/plain.rs        |  74 +++
 crates/nvisy-text/src/formats/toml.rs         |  97 ++++
 crates/nvisy-text/src/formats/xml.rs          |  99 ++++
 crates/nvisy-text/src/formats/yaml.rs         |  98 ++++
 crates/nvisy-text/src/lib.rs                  |  26 +-
 59 files changed, 4074 insertions(+), 2466 deletions(-)
 delete mode 100644 crates/nvisy-document/src/operation/insert.rs
 delete mode 100644 crates/nvisy-document/src/operation/mod.rs
 delete mode 100644 crates/nvisy-document/src/operation/redact.rs
 delete mode 100644 crates/nvisy-document/src/operation/result.rs
 delete mode 100644 crates/nvisy-document/src/operation/split.rs
 create mode 100644 crates/nvisy-engine/src/registry/mod.rs
 create mode 100644 crates/nvisy-image/Cargo.toml
 create mode 100644 crates/nvisy-image/README.md
 create mode 100644 crates/nvisy-image/src/document.rs
 create mode 100644 crates/nvisy-image/src/format.rs
 create mode 100644 crates/nvisy-image/src/lib.rs
 delete mode 100644 crates/nvisy-text/src/document.rs
 create mode 100644 crates/nvisy-text/src/documents/csv.rs
 create mode 100644 crates/nvisy-text/src/documents/ini.rs
 create mode 100644 crates/nvisy-text/src/documents/json.rs
 create mode 100644 crates/nvisy-text/src/documents/markdown.rs
 create mode 100644 crates/nvisy-text/src/documents/mod.rs
 create mode 100644 crates/nvisy-text/src/documents/plain.rs
 create mode 100644 crates/nvisy-text/src/documents/toml.rs
 create mode 100644 crates/nvisy-text/src/documents/xml.rs
 create mode 100644 crates/nvisy-text/src/documents/yaml.rs
 delete mode 100644 crates/nvisy-text/src/format.rs
 create mode 100644 crates/nvisy-text/src/formats/csv.rs
 create mode 100644 crates/nvisy-text/src/formats/ini.rs
 create mode 100644 crates/nvisy-text/src/formats/json.rs
 create mode 100644 crates/nvisy-text/src/formats/markdown.rs
 create mode 100644 crates/nvisy-text/src/formats/mod.rs
 create mode 100644 crates/nvisy-text/src/formats/plain.rs
 create mode 100644 crates/nvisy-text/src/formats/toml.rs
 create mode 100644 crates/nvisy-text/src/formats/xml.rs
 create mode 100644 crates/nvisy-text/src/formats/yaml.rs

diff --git a/Cargo.lock b/Cargo.lock
index 0fc610e..ccd2d32 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -206,6 +206,27 @@ dependencies = [
  "typenum",
 ]
 
+[[package]]
+name = "csv"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde_core",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "deflate64"
 version = "0.1.10"
@@ -551,6 +572,15 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "markdown"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5cab8f2cadc416a82d2e783a1946388b31654d391d1c7d92cc1f03e295b1deb"
+dependencies = [
+ "unicode-id",
+]
+
 [[package]]
 name = "matchers"
 version = "0.2.0"
@@ -687,9 +717,20 @@ dependencies = [
  "nvisy-text",
  "serde",
  "serde_json",
+ "tokio",
  "uuid",
 ]
 
+[[package]]
+name = "nvisy-image"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "nvisy-document",
+ "thiserror",
+]
+
 [[package]]
 name = "nvisy-pdf"
 version = "0.1.0"
@@ -706,8 +747,13 @@ version = "0.1.0"
 dependencies = [
  "async-trait",
  "bytes",
+ "csv",
+ "markdown",
  "nvisy-document",
+ "serde_json",
  "thiserror",
+ "tokio",
+ "tokio-test",
 ]
 
 [[package]]
@@ -843,6 +889,12 @@ version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
 
+[[package]]
+name = "ryu"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984"
+
 [[package]]
 name = "scoped-tls"
 version = "1.0.1"
@@ -1183,6 +1235,12 @@ version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
 
+[[package]]
+name = "unicode-id"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ba288e709927c043cbe476718d37be306be53fb1fafecd0dbe36d072be2580"
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.19"
diff --git a/Cargo.toml b/Cargo.toml
index c81f9cc..60a8c09 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,6 +8,7 @@ members = [
     "./crates/nvisy-docx",
     "./crates/nvisy-document",
     "./crates/nvisy-engine",
+    "./crates/nvisy-image",
     "./crates/nvisy-pdf",
     "./crates/nvisy-text",
 ]
@@ -24,57 +25,53 @@ repository = "https://github.com/nvisycom/core"
 homepage = "https://github.com/nvisycom/core"
 documentation = "https://docs.rs/nvisy"
 
-[workspace.dependencies]
 # Default features are disabled for certain dependencies to allow
 # downstream workspaces/crates to selectively enable them as needed.
 #
 # See for more details: https://github.com/rust-lang/cargo/issues/11329
 
+[workspace.dependencies]
 # Internal crates
 nvisy-archive = { path = "./crates/nvisy-archive", version = "0.1.0", features = [] }
 nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0", features = [] }
 nvisy-docx = { path = "./crates/nvisy-docx", version = "0.1.0", features = [] }
 nvisy-document = { path = "./crates/nvisy-document", version = "0.1.0", features = [] }
 nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0", features = [] }
+nvisy-image = { path = "./crates/nvisy-image", version = "0.1.0", features = [] }
 nvisy-pdf = { path = "./crates/nvisy-pdf", version = "0.1.0", features = [] }
 nvisy-text = { path = "./crates/nvisy-text", version = "0.1.0", features = [] }
 
-# Multithreading
-rayon = { version = "1.11", default-features = false, features = [] }
-
-# Async I/O and file handling
+# Async runtime and I/O
 tokio = { version = "1.49", default-features = false, features = [] }
 tokio-stream = { version = "0.1", default-features = false, features = [] }
 tokio-util = { version = "0.7", default-features = false, features = [] }
 futures = { version = "0.3", default-features = false, features = [] }
 async-trait = { version = "0.1", default-features = false, features = [] }
+
+# File system utilities
 walkdir = { version = "2.5", default-features = false, features = [] }
 memmap2 = { version = "0.9", default-features = false, features = [] }
 tempfile = { version = "3.24", default-features = false, features = [] }
 
-# Tracing and observability
-tracing = { version = "0.1", features = [] }
-tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
-
-# Error handling
-thiserror = { version = "2.0", features = [] }
-anyhow = { version = "1.0", features = ["backtrace"] }
+# Multithreading
+rayon = { version = "1.11", default-features = false, features = [] }
 
 # Serialization
 serde = { version = "1.0", features = [] }
 serde_json = { version = "1.0", features = [] }
+csv = { version = "1.4", default-features = false, features = [] }
 
 # Data types and utilities
 uuid = { version = "1.19", features = [] }
 jiff = { version = "0.2", default-features = false, features = [] }
 size = { version = "0.5", default-features = false, features = [] }
 bytes = { version = "1.11", default-features = false, features = [] }
-
 rust_decimal = { version = "1.36", default-features = false, features = [] }
 semver = { version = "1.0", default-features = false, features = [] }
 isolang = { version = "2.4", default-features = false, features = ["english_names"] }
 
 # Text processing and pattern matching
+markdown = { version = "1.0.0", default-features = false, features = [] }
 regex = { version = "1.11", default-features = false, features = [] }
 regex-lite = { version = "0.1", default-features = false, features = ["std"] }
 fancy-regex = { version = "0.16", default-features = false, features = [] }
@@ -82,7 +79,7 @@ aho-corasick = { version = "1.1", default-features = false, features = [] }
 unicode-segmentation = { version = "1.10", default-features = false, features = [] }
 hipstr = { version = "0.8", default-features = false, features = [] }
 
-# Crypto and hashing
+# Cryptography and hashing
 sha2 = { version = "0.10", default-features = false, features = [] }
 blake3 = { version = "1.8", default-features = false, features = [] }
 base64 = { version = "0.22", default-features = false, features = [] }
@@ -90,7 +87,15 @@ hex = { version = "0.4", features = [] }
 zeroize = { version = "1.7", default-features = false, features = [] }
 rand = { version = "0.9", default-features = false, features = [] }
 
-# Macros
+# Error handling
+thiserror = { version = "2.0", features = [] }
+anyhow = { version = "1.0", features = ["backtrace"] }
+
+# Tracing and observability
+tracing = { version = "0.1", features = [] }
+tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
+
+# Macros and derive utilities
 derive_more = { version = "2.0", default-features = false, features = [] }
 strum = { version = "0.27", default-features = false, features = [] }
 const_format = { version = "0.2", default-features = false, features = [] }
diff --git a/crates/nvisy-archive/Cargo.toml b/crates/nvisy-archive/Cargo.toml
index 706468b..2c49250 100644
--- a/crates/nvisy-archive/Cargo.toml
+++ b/crates/nvisy-archive/Cargo.toml
@@ -1,20 +1,24 @@
+# https://doc.rust-lang.org/cargo/reference/manifest.html
+
 [package]
 name = "nvisy-archive"
+description = "Archive handling library for nvisy (ZIP, TAR, 7z, etc.)"
+readme = "./README.md"
+
 version = { workspace = true }
 rust-version = { workspace = true }
 edition = { workspace = true }
 license = { workspace = true }
 publish = { workspace = true }
-readme = "./README.md"
 
 authors = { workspace = true }
 repository = { workspace = true }
 homepage = { workspace = true }
 documentation = { workspace = true }
 
-description = "Archive handling library for Nvisy, supports ZIP, TAR, 7z, and other archive formats"
-keywords = ["archive", "zip", "tar", "7z", "compression", "extraction"]
-categories = ["compression", "filesystem"]
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
 
 [features]
 default = ["zip", "tar", "gzip", "bzip2", "xz"]
@@ -26,26 +30,28 @@ bzip2 = ["dep:bzip2"]
 xz = ["dep:xz2"]
 
 [dependencies]
-# Core dependencies
+# Internal crates
 nvisy-core = { workspace = true }
-bytes = { workspace = true }
 
-# Utilities
-strum = { workspace = true, features = ["derive"] }
+# Data types
+bytes = { workspace = true }
 
-# Async and I/O
+# Async runtime and I/O
 tokio = { workspace = true, features = ["fs", "io-util", "rt"] }
-tempfile = { workspace = true, features = [] }
+tempfile = { workspace = true }
+
+# Macros
+strum = { workspace = true, features = ["derive"] }
 
-# Archive formats
-tar = { version = "0.4", optional = true, features = [] }
-zip = { version = "7.1", optional = true, features = [] }
-sevenz-rust = { version = "0.6", optional = true, features = [] }
+# Archive formats (optional)
+tar = { version = "0.4", optional = true }
+zip = { version = "7.1", optional = true }
+sevenz-rust = { version = "0.6", optional = true }
 
-# Compression formats (all optional)
-flate2 = { version = "1.1", optional = true, features = [] }
-bzip2 = { version = "0.6", optional = true, features = [] }
-xz2 = { version = "0.1", optional = true, features = [] }
+# Compression formats (optional)
+flate2 = { version = "1.1", optional = true }
+bzip2 = { version = "0.6", optional = true }
+xz2 = { version = "0.1", optional = true }
 
 [dev-dependencies]
 tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml
index 390a46a..46029ed 100644
--- a/crates/nvisy-core/Cargo.toml
+++ b/crates/nvisy-core/Cargo.toml
@@ -2,12 +2,14 @@
 
 [package]
 name = "nvisy-core"
+description = "Core types and utilities for nvisy"
+readme = "./README.md"
+
 version = { workspace = true }
 rust-version = { workspace = true }
 edition = { workspace = true }
 license = { workspace = true }
 publish = { workspace = true }
-readme = "./README.md"
 
 authors = { workspace = true }
 repository = { workspace = true }
@@ -22,26 +24,26 @@ rustdoc-args = ["--cfg", "docsrs"]
 # Async runtime and I/O
 tokio = { workspace = true, features = ["fs", "io-util", "rt", "macros"] }
 
-# Data structures and utilities
+# Data types
 uuid = { workspace = true, features = ["v4", "v7", "serde"] }
 jiff = { workspace = true, features = ["std", "serde"] }
 bytes = { workspace = true, features = ["serde"] }
 
-# Cryptography
-sha2 = { workspace = true, features = [] }
-hex = { workspace = true, features = [] }
-
-# (De)serialization
+# Serialization
 serde = { workspace = true, features = ["derive"] }
 
-# Utilities
-strum = { workspace = true, features = ["derive"] }
-derive_more = { workspace = true, features = ["as_ref", "deref"] }
+# Cryptography
+sha2 = { workspace = true }
+hex = { workspace = true }
 
-# Error handling (moved from nvisy-error crate)
+# Error handling
 thiserror = { workspace = true, features = ["std"] }
 hipstr = { workspace = true, features = ["std", "serde"] }
 
+# Macros
+strum = { workspace = true, features = ["derive"] }
+derive_more = { workspace = true, features = ["as_ref", "deref"] }
+
 [dev-dependencies]
 serde_json = { workspace = true, features = ["std"] }
-tempfile = { workspace = true, features = [] }
+tempfile = { workspace = true }
diff --git a/crates/nvisy-document/Cargo.toml b/crates/nvisy-document/Cargo.toml
index 3d6ec4b..4351dea 100644
--- a/crates/nvisy-document/Cargo.toml
+++ b/crates/nvisy-document/Cargo.toml
@@ -2,12 +2,14 @@
 
 [package]
 name = "nvisy-document"
+description = "Document abstraction layer for nvisy"
+readme = "./README.md"
+
 version = { workspace = true }
 rust-version = { workspace = true }
 edition = { workspace = true }
 license = { workspace = true }
 publish = { workspace = true }
-readme = "./README.md"
 
 authors = { workspace = true }
 repository = { workspace = true }
@@ -19,10 +21,10 @@ all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
-# Core nvisy types
+# Internal crates
 nvisy-core = { workspace = true }
 
-# Async runtime
+# Async runtime and I/O
 tokio = { workspace = true, features = ["sync", "io-util"] }
 async-trait = { workspace = true }
 
@@ -39,7 +41,7 @@ base64 = { workspace = true, features = ["std"] }
 # Error handling
 thiserror = { workspace = true, features = ["std"] }
 
-# Utilities
+# Macros
 derive_more = { workspace = true, features = ["display", "from", "into", "deref", "deref_mut", "as_ref", "constructor"] }
 
 [dev-dependencies]
diff --git a/crates/nvisy-document/src/error.rs b/crates/nvisy-document/src/error.rs
index b6b5788..d5a2d77 100644
--- a/crates/nvisy-document/src/error.rs
+++ b/crates/nvisy-document/src/error.rs
@@ -1,4 +1,4 @@
-//! Error types for document operations.
+//! Error types for document processing.
 
 use std::fmt;
 
@@ -7,17 +7,17 @@ use crate::format::region::RegionId;
 /// A boxed error type for wrapping source errors.
 pub type BoxError = Box<dyn std::error::Error + Send + Sync>;
 
-/// Result type for document operations.
+/// Result type for document processing.
 pub type Result<T> = std::result::Result<T, Error>;
 
-/// The error type for document operations.
+/// The error type for document processing.
 #[derive(Debug)]
 pub struct Error {
     kind: ErrorKind,
     source: Option<BoxError>,
 }
 
-/// The kind of error that occurred during a document operation.
+/// The kind of error that occurred during document processing.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum ErrorKind {
     /// The document format is not supported.
@@ -26,25 +26,19 @@ pub enum ErrorKind {
     /// The document could not be parsed.
     Parse { message: String },
 
-    /// The requested operation is not supported by this format.
-    OperationNotSupported { operation: String },
-
     /// A referenced region was not found.
     RegionNotFound { id: RegionId },
 
     /// A referenced page was not found.
     PageNotFound { page: u32 },
 
-    /// An operation would result in invalid document state.
-    InvalidOperation { message: String },
-
     /// An I/O error occurred.
     Io { message: String },
 
     /// Serialization/deserialization error.
     Serialization { message: String },
 
-    /// The operation was cancelled.
+    /// The processing was cancelled.
     Cancelled,
 
     /// A timeout occurred.
@@ -53,13 +47,10 @@ pub enum ErrorKind {
     /// Resource limit exceeded.
     ResourceLimit { resource: String },
 
-    /// Session error (e.g., invalid session state).
-    Session { message: String },
-
     /// Conversion error.
     Conversion { message: String },
 
-    /// Metadata error.
+    /// Metadata extraction error.
     Metadata { message: String },
 
     /// Thumbnail generation error.
@@ -108,10 +99,7 @@ impl Error {
     pub fn is_user_error(&self) -> bool {
         matches!(
             self.kind,
-            ErrorKind::RegionNotFound { .. }
-                | ErrorKind::PageNotFound { .. }
-                | ErrorKind::InvalidOperation { .. }
-                | ErrorKind::OperationNotSupported { .. }
+            ErrorKind::RegionNotFound { .. } | ErrorKind::PageNotFound { .. }
         )
     }
 
@@ -144,13 +132,6 @@ impl Error {
         })
     }
 
-    /// Creates an operation not supported error.
-    pub fn operation_not_supported(operation: impl Into<String>) -> Self {
-        Self::new(ErrorKind::OperationNotSupported {
-            operation: operation.into(),
-        })
-    }
-
     /// Creates a region not found error.
     pub fn region_not_found(id: RegionId) -> Self {
         Self::new(ErrorKind::RegionNotFound { id })
@@ -161,13 +142,6 @@ impl Error {
         Self::new(ErrorKind::PageNotFound { page })
     }
 
-    /// Creates an invalid operation error.
-    pub fn invalid_operation(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::InvalidOperation {
-            message: message.into(),
-        })
-    }
-
     /// Creates an I/O error.
     pub fn io(message: impl Into<String>) -> Self {
         Self::new(ErrorKind::Io {
@@ -195,13 +169,6 @@ impl Error {
         })
     }
 
-    /// Creates a session error.
-    pub fn session(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::Session {
-            message: message.into(),
-        })
-    }
-
     /// Creates a timeout error.
     pub fn timeout(duration_ms: u64) -> Self {
         Self::new(ErrorKind::Timeout { duration_ms })
@@ -292,22 +259,17 @@ impl fmt::Display for Error {
         match &self.kind {
             ErrorKind::UnsupportedFormat { format } => write!(f, "unsupported format: {format}"),
             ErrorKind::Parse { message } => write!(f, "parse error: {message}"),
-            ErrorKind::OperationNotSupported { operation } => {
-                write!(f, "operation not supported: {operation}")
-            }
             ErrorKind::RegionNotFound { id } => write!(f, "region not found: {id}"),
             ErrorKind::PageNotFound { page } => write!(f, "page not found: {page}"),
-            ErrorKind::InvalidOperation { message } => write!(f, "invalid operation: {message}"),
             ErrorKind::Io { message } => write!(f, "I/O error: {message}"),
             ErrorKind::Serialization { message } => write!(f, "serialization error: {message}"),
-            ErrorKind::Cancelled => write!(f, "operation cancelled"),
+            ErrorKind::Cancelled => write!(f, "processing cancelled"),
             ErrorKind::Timeout { duration_ms } => {
-                write!(f, "operation timed out after {duration_ms}ms")
+                write!(f, "processing timed out after {duration_ms}ms")
             }
             ErrorKind::ResourceLimit { resource } => {
                 write!(f, "resource limit exceeded: {resource}")
             }
-            ErrorKind::Session { message } => write!(f, "session error: {message}"),
             ErrorKind::Conversion { message } => write!(f, "conversion error: {message}"),
             ErrorKind::Metadata { message } => write!(f, "metadata error: {message}"),
             ErrorKind::Thumbnail { message } => write!(f, "thumbnail error: {message}"),
diff --git a/crates/nvisy-document/src/format/capabilities.rs b/crates/nvisy-document/src/format/capabilities.rs
index a8983e9..3d8f248 100644
--- a/crates/nvisy-document/src/format/capabilities.rs
+++ b/crates/nvisy-document/src/format/capabilities.rs
@@ -1,80 +1,44 @@
 //! Document format capabilities.
 //!
-//! Different document formats support different operations. This module
-//! defines a capability matrix that allows querying what operations
-//! are supported by a given format.
+//! Different document formats support different features. This module
+//! defines a capability matrix for querying what a format supports.
 
 use serde::{Deserialize, Serialize};
 
-use crate::operation::{
-    ContentOperation, DocumentOperation, EditOperation, InsertOperation, MetadataOperation,
-    PageOperation, RedactStyle, StructuralOperation,
-};
-
 /// Describes the capabilities of a document format.
 #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct Capabilities {
-    /// Text editing capabilities.
+    /// Text extraction capabilities.
     pub text: TextCapabilities,
 
-    /// Image handling capabilities.
-    pub image: ImageCapabilities,
-
     /// Structural capabilities.
     pub structure: StructureCapabilities,
 
-    /// Page-level capabilities.
-    pub page: PageCapabilities,
-
     /// Metadata capabilities.
     pub metadata: MetadataCapabilities,
 }
 
-/// Text editing capabilities.
+/// Text extraction capabilities.
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct TextCapabilities {
-    /// Can read/extract text content.
-    pub can_read: bool,
-
-    /// Can replace text while preserving formatting.
-    pub can_replace_preserving_format: bool,
-
-    /// Can replace text (may lose formatting).
-    pub can_replace: bool,
-
-    /// Can insert new text.
-    pub can_insert: bool,
-
-    /// Can delete text regions.
-    pub can_delete: bool,
-
-    /// Supports rich text formatting.
-    pub supports_rich_text: bool,
-
-    /// Supports font embedding.
-    pub supports_font_embedding: bool,
-}
-
-/// Image handling capabilities.
-#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct ImageCapabilities {
-    /// Can extract images.
+    /// Can extract native text content.
     pub can_extract: bool,
 
-    /// Can replace images.
-    pub can_replace: bool,
-
-    /// Can insert new images.
-    pub can_insert: bool,
-
-    /// Can redact images with blur.
-    pub can_blur: bool,
+    /// Supports rich text (formatting, styles).
+    pub has_rich_text: bool,
 
-    /// Can redact images with pixelation.
-    pub can_pixelate: bool,
+    /// May require OCR for text extraction.
+    pub may_need_ocr: bool,
+}
 
-    /// Supported image formats for insertion.
-    pub supported_formats: Vec<String>,
+impl Default for TextCapabilities {
+    fn default() -> Self {
+        Self {
+            can_extract: true,
+            has_rich_text: false,
+            may_need_ocr: false,
+        }
+    }
 }
 
 /// Structural capabilities.
@@ -86,356 +50,102 @@ pub struct StructureCapabilities {
     /// Can detect tables.
     pub can_detect_tables: bool,
 
-    /// Can modify table structure.
-    pub can_modify_tables: bool,
-
-    /// Can merge regions.
-    pub can_merge: bool,
-
-    /// Can split regions.
-    pub can_split: bool,
-
-    /// Can move regions.
-    pub can_move: bool,
-
-    /// Can copy regions.
-    pub can_copy: bool,
-}
-
-/// Page-level capabilities.
-#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct PageCapabilities {
-    /// Document has pages (vs. flowing text).
+    /// Has page-based layout.
     pub has_pages: bool,
-
-    /// Can delete pages.
-    pub can_delete: bool,
-
-    /// Can reorder pages.
-    pub can_reorder: bool,
-
-    /// Can rotate pages.
-    pub can_rotate: bool,
-
-    /// Can extract pages to new document.
-    pub can_extract: bool,
-
-    /// Can split document at page boundaries.
-    pub can_split: bool,
-
-    /// Can merge multiple documents.
-    pub can_merge: bool,
 }
 
 /// Metadata capabilities.
 #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct MetadataCapabilities {
-    /// Can read document metadata.
-    pub can_read: bool,
-
-    /// Can modify document metadata.
-    pub can_modify: bool,
-
-    /// Can add annotations/comments.
-    pub can_annotate: bool,
+    /// Can extract document metadata.
+    pub can_extract: bool,
 
-    /// Annotations are preserved in output.
-    pub annotations_preserved: bool,
+    /// Has embedded annotations/comments.
+    pub has_annotations: bool,
 }
 
 impl Capabilities {
-    /// Returns capabilities for a format that supports everything.
+    /// Returns capabilities for a simple text format.
     #[must_use]
-    pub fn full() -> Self {
+    pub fn text() -> Self {
         Self {
             text: TextCapabilities {
-                can_read: true,
-                can_replace_preserving_format: true,
-                can_replace: true,
-                can_insert: true,
-                can_delete: true,
-                supports_rich_text: true,
-                supports_font_embedding: true,
-            },
-            image: ImageCapabilities {
                 can_extract: true,
-                can_replace: true,
-                can_insert: true,
-                can_blur: true,
-                can_pixelate: true,
-                supported_formats: vec![
-                    "image/png".to_string(),
-                    "image/jpeg".to_string(),
-                    "image/gif".to_string(),
-                ],
+                has_rich_text: false,
+                may_need_ocr: false,
             },
             structure: StructureCapabilities {
                 can_detect_structure: true,
-                can_detect_tables: true,
-                can_modify_tables: true,
-                can_merge: true,
-                can_split: true,
-                can_move: true,
-                can_copy: true,
-            },
-            page: PageCapabilities {
-                has_pages: true,
-                can_delete: true,
-                can_reorder: true,
-                can_rotate: true,
-                can_extract: true,
-                can_split: true,
-                can_merge: true,
+                can_detect_tables: false,
+                has_pages: false,
             },
             metadata: MetadataCapabilities {
-                can_read: true,
-                can_modify: true,
-                can_annotate: true,
-                annotations_preserved: true,
+                can_extract: false,
+                has_annotations: false,
             },
         }
     }
 
-    /// Returns capabilities for a read-only format.
+    /// Returns capabilities for a rich document format (PDF, DOCX).
     #[must_use]
-    pub fn read_only() -> Self {
+    pub fn rich_document() -> Self {
         Self {
             text: TextCapabilities {
-                can_read: true,
-                can_replace_preserving_format: false,
-                can_replace: false,
-                can_insert: false,
-                can_delete: false,
-                supports_rich_text: false,
-                supports_font_embedding: false,
-            },
-            image: ImageCapabilities {
                 can_extract: true,
-                ..Default::default()
+                has_rich_text: true,
+                may_need_ocr: false,
             },
             structure: StructureCapabilities {
                 can_detect_structure: true,
                 can_detect_tables: true,
-                ..Default::default()
-            },
-            page: PageCapabilities {
                 has_pages: true,
-                ..Default::default()
             },
             metadata: MetadataCapabilities {
-                can_read: true,
-                ..Default::default()
+                can_extract: true,
+                has_annotations: true,
             },
         }
     }
 
-    /// Checks if the format supports a specific operation.
+    /// Returns capabilities for an image format.
     #[must_use]
-    pub fn supports(&self, operation: &EditOperation) -> OperationSupport {
-        match operation {
-            EditOperation::Content(op) => self.supports_content(op),
-            EditOperation::Insert(op) => self.supports_insert(op),
-            EditOperation::Structural(op) => self.supports_structural(op),
-            EditOperation::Page(op) => self.supports_page(op),
-            EditOperation::Document(op) => self.supports_document(op),
-            EditOperation::Metadata(op) => self.supports_metadata(op),
-        }
-    }
-
-    fn supports_content(&self, op: &ContentOperation) -> OperationSupport {
-        match op {
-            ContentOperation::Redact { style, .. } => {
-                if !self.text.can_delete && !self.text.can_replace {
-                    return OperationSupport::NotSupported;
-                }
-                match style {
-                    RedactStyle::Blur { .. } if !self.image.can_blur => {
-                        OperationSupport::Degraded("Blur not supported, will use black box")
-                    }
-                    RedactStyle::Pixelate { .. } if !self.image.can_pixelate => {
-                        OperationSupport::Degraded("Pixelate not supported, will use black box")
-                    }
-                    _ => OperationSupport::Full,
-                }
-            }
-
-            ContentOperation::ReplaceText {
-                preserve_formatting,
-                ..
-            } => {
-                if !self.text.can_replace {
-                    OperationSupport::NotSupported
-                } else if *preserve_formatting && !self.text.can_replace_preserving_format {
-                    OperationSupport::Degraded("Formatting may not be fully preserved")
-                } else {
-                    OperationSupport::Full
-                }
-            }
-
-            ContentOperation::ReplaceSubstring { .. } => {
-                if self.text.can_replace {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-
-            ContentOperation::Delete { .. } => {
-                if self.text.can_delete {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-        }
-    }
-
-    fn supports_insert(&self, _op: &InsertOperation) -> OperationSupport {
-        if self.text.can_insert {
-            OperationSupport::Full
-        } else {
-            OperationSupport::NotSupported
-        }
-    }
-
-    fn supports_structural(&self, op: &StructuralOperation) -> OperationSupport {
-        match op {
-            StructuralOperation::Move { .. } => {
-                if self.structure.can_move {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-
-            StructuralOperation::Copy { .. } => {
-                if self.structure.can_copy {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-
-            StructuralOperation::Merge { .. } => {
-                if self.structure.can_merge {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-
-            StructuralOperation::SplitRegion { .. } => {
-                if self.structure.can_split {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-        }
-    }
-
-    fn supports_page(&self, op: &PageOperation) -> OperationSupport {
-        match op {
-            PageOperation::DeletePages { .. } => {
-                if self.page.has_pages && self.page.can_delete {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-
-            PageOperation::ReorderPages { .. } => {
-                if self.page.has_pages && self.page.can_reorder {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-
-            PageOperation::RotatePages { .. } => {
-                if self.page.has_pages && self.page.can_rotate {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-
-            PageOperation::ExtractPages { .. } => {
-                if self.page.has_pages && self.page.can_extract {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-        }
-    }
-
-    fn supports_document(&self, op: &DocumentOperation) -> OperationSupport {
-        match op {
-            DocumentOperation::Split { .. } => {
-                if self.page.can_split {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::NotSupported
-                }
-            }
-        }
-    }
-
-    fn supports_metadata(&self, op: &MetadataOperation) -> OperationSupport {
-        match op {
-            MetadataOperation::Reclassify { .. } | MetadataOperation::UpdateBounds { .. } => {
-                OperationSupport::Full
-            }
-
-            MetadataOperation::Annotate { .. } => {
-                if self.metadata.can_annotate {
-                    OperationSupport::Full
-                } else {
-                    OperationSupport::Degraded("Annotations won't be persisted in output")
-                }
-            }
+    pub fn image() -> Self {
+        Self {
+            text: TextCapabilities {
+                can_extract: false,
+                has_rich_text: false,
+                may_need_ocr: true,
+            },
+            structure: StructureCapabilities {
+                can_detect_structure: false,
+                can_detect_tables: false,
+                has_pages: false,
+            },
+            metadata: MetadataCapabilities {
+                can_extract: true, // EXIF
+                has_annotations: false,
+            },
         }
     }
-}
-
-/// Result of checking operation support.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum OperationSupport {
-    /// Operation is fully supported.
-    Full,
-
-    /// Operation is supported but may not work perfectly.
-    Degraded(&'static str),
-
-    /// Operation is not supported.
-    NotSupported,
-}
-
-impl OperationSupport {
-    /// Returns true if the operation can be attempted.
-    #[must_use]
-    pub const fn is_supported(&self) -> bool {
-        !matches!(self, Self::NotSupported)
-    }
 
-    /// Returns true if the operation is fully supported.
+    /// Returns capabilities for a spreadsheet format.
     #[must_use]
-    pub const fn is_full(&self) -> bool {
-        matches!(self, Self::Full)
-    }
-}
-
-impl Default for TextCapabilities {
-    fn default() -> Self {
+    pub fn spreadsheet() -> Self {
         Self {
-            can_read: true,
-            can_replace_preserving_format: false,
-            can_replace: false,
-            can_insert: false,
-            can_delete: false,
-            supports_rich_text: false,
-            supports_font_embedding: false,
+            text: TextCapabilities {
+                can_extract: true,
+                has_rich_text: false,
+                may_need_ocr: false,
+            },
+            structure: StructureCapabilities {
+                can_detect_structure: true,
+                can_detect_tables: true,
+                has_pages: true, // Sheets as pages
+            },
+            metadata: MetadataCapabilities {
+                can_extract: true,
+                has_annotations: false,
+            },
         }
     }
 }
@@ -443,44 +153,36 @@ impl Default for TextCapabilities {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::format::region::RegionId;
 
     #[test]
-    fn test_full_capabilities() {
-        let caps = Capabilities::full();
-        let region = RegionId::new();
-
-        assert!(caps.supports(&EditOperation::redact(region)).is_full());
-        assert!(caps.supports(&EditOperation::delete(region)).is_full());
+    fn test_text_capabilities() {
+        let caps = Capabilities::text();
+        assert!(caps.text.can_extract);
+        assert!(!caps.text.has_rich_text);
+        assert!(!caps.structure.can_detect_tables);
     }
 
     #[test]
-    fn test_read_only_capabilities() {
-        let caps = Capabilities::read_only();
-        let region = RegionId::new();
-
-        assert!(!caps.supports(&EditOperation::delete(region)).is_supported());
-        assert!(!caps
-            .supports(&EditOperation::replace_text(region, "test"))
-            .is_supported());
+    fn test_rich_document_capabilities() {
+        let caps = Capabilities::rich_document();
+        assert!(caps.text.can_extract);
+        assert!(caps.text.has_rich_text);
+        assert!(caps.structure.can_detect_tables);
+        assert!(caps.structure.has_pages);
     }
 
     #[test]
-    fn test_degraded_support() {
-        let mut caps = Capabilities::full();
-        caps.text.can_replace_preserving_format = false;
-
-        let region = RegionId::new();
-        let op: EditOperation = ContentOperation::ReplaceText {
-            target: region,
-            new_text: "test".to_string(),
-            preserve_formatting: true,
-        }
-        .into();
+    fn test_image_capabilities() {
+        let caps = Capabilities::image();
+        assert!(!caps.text.can_extract);
+        assert!(caps.text.may_need_ocr);
+        assert!(caps.metadata.can_extract);
+    }
 
-        let support = caps.supports(&op);
-        assert!(support.is_supported());
-        assert!(!support.is_full());
-        assert!(matches!(support, OperationSupport::Degraded(_)));
+    #[test]
+    fn test_spreadsheet_capabilities() {
+        let caps = Capabilities::spreadsheet();
+        assert!(caps.text.can_extract);
+        assert!(caps.structure.can_detect_tables);
     }
 }
diff --git a/crates/nvisy-document/src/format/mod.rs b/crates/nvisy-document/src/format/mod.rs
index 58d6296..58b1fa1 100644
--- a/crates/nvisy-document/src/format/mod.rs
+++ b/crates/nvisy-document/src/format/mod.rs
@@ -2,11 +2,8 @@
 //!
 //! This module defines the core traits for document handling:
 //!
-//! - [`DocumentFormat`]: A format handler (class/factory) that can load and create documents
+//! - [`DocumentFormat`]: A format handler (class/factory) that can load documents
 //! - [`Document`]: A loaded document instance for reading document content
-//! - [`EditableDocument`]: Extension trait for documents that support editing
-//!
-//! Think of `DocumentFormat` as a class and `Document` as an instance of that class.
 
 mod capabilities;
 mod info;
@@ -19,24 +16,22 @@ use std::future::Future;
 use async_trait::async_trait;
 use bytes::Bytes;
 pub use capabilities::{
-    Capabilities, ImageCapabilities, MetadataCapabilities, OperationSupport, PageCapabilities,
-    StructureCapabilities, TextCapabilities,
+    Capabilities, MetadataCapabilities, StructureCapabilities, TextCapabilities,
 };
 pub use info::DocumentInfo;
 pub use page::PageOptions;
 pub use region::{BoundingBox, Point, Region, RegionId, RegionKind, RegionSource, RegionStatus};
 
 use crate::error::Result;
-use crate::operation::{EditOperation, EditResult};
 
 /// Trait for document format handlers with an associated Document type.
 ///
-/// A `DocumentFormat` is like a class that knows how to load and create
+/// A `DocumentFormat` is like a class that knows how to load
 /// documents of a specific format. Each format implementation provides
 /// a concrete `Document` type.
 pub trait DocumentFormat: Send + Sync {
     /// The concrete document type produced by this format.
-    type Document: EditableDocument;
+    type Document: Document;
 
     /// Returns the format name (e.g., "pdf", "docx").
     fn name(&self) -> &'static str;
@@ -52,15 +47,11 @@ pub trait DocumentFormat: Send + Sync {
 
     /// Loads a document from bytes.
     fn load(&self, data: Bytes) -> impl Future<Output = Result<Self::Document>> + Send;
-
-    /// Creates a new empty document.
-    fn create_empty(&self) -> impl Future<Output = Result<Self::Document>> + Send;
 }
 
 /// A loaded document instance (read-only access).
 ///
 /// Documents provide read access to document content and structure.
-/// For editing capabilities, see [`EditableDocument`].
 #[async_trait]
 pub trait Document: Send + Sync {
     /// Returns document information.
@@ -76,22 +67,5 @@ pub trait Document: Send + Sync {
     fn find_region(&self, id: RegionId) -> Option<&Region>;
 
     /// Serializes the document to bytes.
-    async fn serialize(&self) -> Result<Bytes>;
-}
-
-/// Extension trait for documents that support editing.
-///
-/// This trait extends [`Document`] with mutation capabilities.
-/// Not all document formats support editing - check the format's
-/// [`Capabilities`] to determine what operations are supported.
-#[async_trait]
-pub trait EditableDocument: Document {
-    /// Applies an edit operation to the document.
-    async fn apply(&mut self, operation: &EditOperation) -> Result<EditResult>;
-
-    /// Returns whether the document has unsaved changes.
-    fn is_modified(&self) -> bool;
-
-    /// Extracts regions for specific pages (for streaming/pagination).
-    async fn extract_page_regions(&mut self, options: &PageOptions) -> Result<Vec<Region>>;
+    async fn to_bytes(&self) -> Result<Bytes>;
 }
diff --git a/crates/nvisy-document/src/format/page.rs b/crates/nvisy-document/src/format/page.rs
index ac11c74..bd8e129 100644
--- a/crates/nvisy-document/src/format/page.rs
+++ b/crates/nvisy-document/src/format/page.rs
@@ -1,4 +1,4 @@
-//! Page-related types for document operations.
+//! Page-related types for document processing.
 
 /// Page extraction options.
 #[derive(Debug, Clone, Default)]
diff --git a/crates/nvisy-document/src/format/region/core.rs b/crates/nvisy-document/src/format/region/core.rs
index 2e10c4e..46a26c5 100644
--- a/crates/nvisy-document/src/format/region/core.rs
+++ b/crates/nvisy-document/src/format/region/core.rs
@@ -6,10 +6,10 @@ use serde::{Deserialize, Serialize};
 
 use super::{BoundingBox, RegionId, RegionKind, RegionSource, RegionStatus};
 
-/// A region within a document that can be referenced and modified.
+/// A region within a document that can be referenced.
 ///
-/// Regions are the fundamental unit for VLM-driven document editing.
-/// Each region has a stable ID, spatial bounds, and optional text content.
+/// Regions represent semantically meaningful parts of a document
+/// (paragraphs, tables, images, etc.) with stable IDs and spatial bounds.
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Region {
     /// Unique identifier for this region.
@@ -27,10 +27,10 @@ pub struct Region {
     /// Semantic type of this region.
     pub kind: RegionKind,
 
-    /// Current status within the edit session (None means Active).
+    /// Current status of the region.
     pub status: Option<RegionStatus>,
 
-    /// How this region was identified/created.
+    /// How this region was identified.
     pub source: RegionSource,
 
     /// Parent region ID, if this is a nested region.
@@ -118,8 +118,10 @@ impl Region {
     }
 
     /// Adds a child region ID.
-    pub fn add_child(&mut self, child: RegionId) {
+    #[must_use]
+    pub fn with_child(mut self, child: RegionId) -> Self {
         self.children.push(child);
+        self
     }
 
     /// Returns the effective status (defaults to Active if None).
@@ -128,12 +130,6 @@ impl Region {
         self.status.unwrap_or(RegionStatus::Active)
     }
 
-    /// Returns true if this region is still valid for operations.
-    #[must_use]
-    pub fn is_valid(&self) -> bool {
-        self.effective_status().is_valid()
-    }
-
     /// Returns true if this region has text content.
     #[must_use]
     pub fn has_text(&self) -> bool {
@@ -146,34 +142,10 @@ impl Region {
         self.kind.is_container() || !self.children.is_empty()
     }
 
-    /// Returns true if this region can have its text edited.
+    /// Returns true if this region kind typically contains text.
     #[must_use]
-    pub fn is_text_editable(&self) -> bool {
-        self.kind.is_text_editable() && self.is_valid()
-    }
-
-    /// Marks the region as modified.
-    pub fn mark_modified(&mut self) {
-        if self.effective_status() == RegionStatus::Active {
-            self.status = Some(RegionStatus::Modified);
-        }
-    }
-
-    /// Marks the region as deleted.
-    pub fn mark_deleted(&mut self) {
-        self.status = Some(RegionStatus::Deleted);
-    }
-
-    /// Updates the text content and marks as modified.
-    pub fn update_text(&mut self, new_text: String) {
-        self.text = Some(new_text);
-        self.mark_modified();
-    }
-
-    /// Updates the bounds and marks as modified.
-    pub fn update_bounds(&mut self, new_bounds: BoundingBox) {
-        self.bounds = new_bounds;
-        self.mark_modified();
+    pub fn has_text_content(&self) -> bool {
+        self.kind.has_text_content()
     }
 }
 
@@ -192,7 +164,6 @@ mod tests {
         let bounds = BoundingBox::new(0.1, 0.2, 0.3, 0.4);
         let region = Region::new(bounds);
 
-        assert!(region.is_valid());
         assert!(!region.has_text());
         assert_eq!(region.kind, RegionKind::Unknown);
         assert_eq!(region.effective_status(), RegionStatus::Active);
@@ -212,27 +183,15 @@ mod tests {
     }
 
     #[test]
-    fn test_region_modification() {
-        let mut region = Region::new(BoundingBox::default()).with_text("Original");
-
-        assert!(region.status.is_none());
-        assert_eq!(region.effective_status(), RegionStatus::Active);
-
-        region.update_text("Modified".to_string());
-
-        assert_eq!(region.status, Some(RegionStatus::Modified));
-        assert_eq!(region.text.as_deref(), Some("Modified"));
-    }
-
-    #[test]
-    fn test_region_deletion() {
-        let mut region = Region::new(BoundingBox::default());
-        assert!(region.is_valid());
+    fn test_region_has_text() {
+        let region = Region::new(BoundingBox::default()).with_text("Some text");
+        assert!(region.has_text());
 
-        region.mark_deleted();
+        let empty_region = Region::new(BoundingBox::default());
+        assert!(!empty_region.has_text());
 
-        assert!(!region.is_valid());
-        assert_eq!(region.status, Some(RegionStatus::Deleted));
+        let empty_text_region = Region::new(BoundingBox::default()).with_text("");
+        assert!(!empty_text_region.has_text());
     }
 
     #[test]
diff --git a/crates/nvisy-document/src/format/region/kind.rs b/crates/nvisy-document/src/format/region/kind.rs
index 2d5182d..1cae528 100644
--- a/crates/nvisy-document/src/format/region/kind.rs
+++ b/crates/nvisy-document/src/format/region/kind.rs
@@ -4,8 +4,7 @@ use serde::{Deserialize, Serialize};
 
 /// Classification of a document region by its semantic type.
 ///
-/// This helps VLMs understand the context of each region and
-/// guides appropriate editing operations.
+/// Helps understand the context of each region and what type of content it contains.
 #[derive(
     Debug,
     Default,
@@ -76,9 +75,9 @@ pub enum RegionKind {
 }
 
 impl RegionKind {
-    /// Returns true if this region typically contains editable text.
+    /// Returns true if this region typically contains extractable text.
     #[must_use]
-    pub const fn is_text_editable(&self) -> bool {
+    pub const fn has_text_content(&self) -> bool {
         matches!(
             self,
             Self::Text
@@ -99,16 +98,13 @@ impl RegionKind {
         matches!(self, Self::Table | Self::TableRow | Self::List)
     }
 
-    /// Returns true if this region can be redacted.
+    /// Returns true if this region represents structural content.
     #[must_use]
-    pub const fn is_redactable(&self) -> bool {
-        !matches!(self, Self::Unknown)
-    }
-
-    /// Returns true if this region can be deleted.
-    #[must_use]
-    pub const fn is_deletable(&self) -> bool {
-        true
+    pub const fn is_structural(&self) -> bool {
+        matches!(
+            self,
+            Self::Table | Self::TableRow | Self::List | Self::Header | Self::Footer
+        )
     }
 }
 
@@ -117,12 +113,12 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_text_editable() {
-        assert!(RegionKind::Text.is_text_editable());
-        assert!(RegionKind::Heading.is_text_editable());
-        assert!(RegionKind::TableCell.is_text_editable());
-        assert!(!RegionKind::Image.is_text_editable());
-        assert!(!RegionKind::Table.is_text_editable());
+    fn test_has_text_content() {
+        assert!(RegionKind::Text.has_text_content());
+        assert!(RegionKind::Heading.has_text_content());
+        assert!(RegionKind::TableCell.has_text_content());
+        assert!(!RegionKind::Image.has_text_content());
+        assert!(!RegionKind::Table.has_text_content());
     }
 
     #[test]
diff --git a/crates/nvisy-document/src/format/region/mod.rs b/crates/nvisy-document/src/format/region/mod.rs
index adf896e..e9bde8e 100644
--- a/crates/nvisy-document/src/format/region/mod.rs
+++ b/crates/nvisy-document/src/format/region/mod.rs
@@ -1,8 +1,7 @@
-//! Region types for document manipulation.
+//! Region types for document structure.
 //!
-//! Regions are the fundamental unit for VLM-driven document editing.
-//! Each region represents a semantically meaningful part of a document
-//! (paragraph, table, image, etc.) that can be referenced and modified.
+//! Regions represent semantically meaningful parts of a document
+//! (paragraphs, tables, images, etc.) that can be referenced and extracted.
 
 mod bounds;
 mod core;
@@ -11,9 +10,8 @@ mod kind;
 mod source;
 mod status;
 
-pub use core::Region;
-
 pub use bounds::{BoundingBox, Point};
+pub use core::Region;
 pub use id::RegionId;
 pub use kind::RegionKind;
 pub use source::RegionSource;
diff --git a/crates/nvisy-document/src/format/region/status.rs b/crates/nvisy-document/src/format/region/status.rs
index 7402926..1a3b0d5 100644
--- a/crates/nvisy-document/src/format/region/status.rs
+++ b/crates/nvisy-document/src/format/region/status.rs
@@ -2,10 +2,9 @@
 
 use serde::{Deserialize, Serialize};
 
-/// Status of a region within an edit session.
+/// Status of a region within a document.
 ///
-/// Tracks the lifecycle of regions as edits are applied,
-/// enabling stable references across multi-turn VLM interactions.
+/// Describes how a region was detected or its state in the document.
 #[derive(
     Debug,
     Default,
@@ -19,43 +18,28 @@ use serde::{Deserialize, Serialize};
 )]
 #[serde(rename_all = "snake_case")]
 pub enum RegionStatus {
-    /// Region is active and unchanged from its original state.
+    /// Region is active and valid.
     #[default]
     Active,
 
-    /// Region content has been modified.
-    Modified,
+    /// Region is hidden or collapsed.
+    Hidden,
 
-    /// Region has been deleted.
-    Deleted,
-
-    /// Region was split into multiple regions.
-    Split,
-
-    /// Region was merged with another region.
-    Merged,
-
-    /// Region was created during this session (not in original document).
-    Created,
+    /// Region content is empty.
+    Empty,
 }
 
 impl RegionStatus {
-    /// Returns true if the region is still valid for operations.
-    #[must_use]
-    pub const fn is_valid(&self) -> bool {
-        matches!(self, Self::Active | Self::Modified | Self::Created)
-    }
-
-    /// Returns true if the region has been removed.
+    /// Returns true if the region is visible.
     #[must_use]
-    pub const fn is_removed(&self) -> bool {
-        matches!(self, Self::Deleted | Self::Merged)
+    pub const fn is_visible(&self) -> bool {
+        matches!(self, Self::Active | Self::Empty)
     }
 
-    /// Returns true if the region was changed from its original state.
+    /// Returns true if the region has content.
     #[must_use]
-    pub const fn is_changed(&self) -> bool {
-        !matches!(self, Self::Active)
+    pub const fn has_content(&self) -> bool {
+        matches!(self, Self::Active)
     }
 }
 
@@ -64,19 +48,17 @@ mod tests {
     use super::*;
 
     #[test]
-    fn test_status_validity() {
-        assert!(RegionStatus::Active.is_valid());
-        assert!(RegionStatus::Modified.is_valid());
-        assert!(RegionStatus::Created.is_valid());
-        assert!(!RegionStatus::Deleted.is_valid());
-        assert!(!RegionStatus::Merged.is_valid());
+    fn test_status_visibility() {
+        assert!(RegionStatus::Active.is_visible());
+        assert!(RegionStatus::Empty.is_visible());
+        assert!(!RegionStatus::Hidden.is_visible());
     }
 
     #[test]
-    fn test_status_removed() {
-        assert!(!RegionStatus::Active.is_removed());
-        assert!(RegionStatus::Deleted.is_removed());
-        assert!(RegionStatus::Merged.is_removed());
+    fn test_status_has_content() {
+        assert!(RegionStatus::Active.has_content());
+        assert!(!RegionStatus::Empty.has_content());
+        assert!(!RegionStatus::Hidden.has_content());
     }
 
     #[test]
diff --git a/crates/nvisy-document/src/lib.rs b/crates/nvisy-document/src/lib.rs
index 34435f8..ec53667 100644
--- a/crates/nvisy-document/src/lib.rs
+++ b/crates/nvisy-document/src/lib.rs
@@ -5,7 +5,6 @@
 // Core modules
 pub mod error;
 pub mod format;
-pub mod operation;
 
 // Extension trait modules
 pub mod conversion;
@@ -15,36 +14,36 @@ pub mod text;
 pub mod thumbnail;
 
 // Error re-exports
-// Conversion re-exports
-pub use conversion::{
-    Conversion, ConversionOptions, ConversionPath, ConversionResult, ConversionStep, FormatPair,
-    HtmlOptions, PageMargins, PageOrientation, PdfOptions, SkippedElement,
-};
 pub use error::{BoxError, Error, ErrorKind, Result};
+
 // Region re-exports (from format::region)
 pub use format::region::{
     BoundingBox, Point, Region, RegionId, RegionKind, RegionSource, RegionStatus,
 };
+
 // Format re-exports
 pub use format::{
-    Capabilities, Document, DocumentFormat, DocumentInfo, EditableDocument, ImageCapabilities,
-    MetadataCapabilities, OperationSupport, PageCapabilities, PageOptions, StructureCapabilities,
-    TextCapabilities,
+    Capabilities, Document, DocumentFormat, DocumentInfo, MetadataCapabilities, PageOptions,
+    StructureCapabilities, TextCapabilities,
+};
+
+// Conversion re-exports
+pub use conversion::{
+    Conversion, ConversionOptions, ConversionPath, ConversionResult, ConversionStep, FormatPair,
+    HtmlOptions, PageMargins, PageOrientation, PdfOptions, SkippedElement,
 };
+
 // Metadata re-exports
 pub use metadata::{
     CustomProperty, DocumentMetadata, Metadata, MetadataExtractOptions, MetadataField,
     PropertyValue,
 };
-// Operation re-exports
-pub use operation::{
-    ContentOperation, DocumentOperation, EditOperation, EditResult, InsertContent, InsertOperation,
-    MergeOrder, MetadataOperation, PageOperation, RedactStyle, SplitBoundary, StructuralOperation,
-    TextStyle,
-};
+
 // Table re-exports
 pub use table::{CellDataType, NormalizedCell, NormalizedRow, NormalizedTable, TableExtractor};
+
 // Text re-exports
 pub use text::{ExtractedText, TextExtractor};
+
 // Thumbnail re-exports
 pub use thumbnail::{ImageFormat, Thumbnail, ThumbnailGenerator, ThumbnailOptions, ThumbnailSize};
diff --git a/crates/nvisy-document/src/operation/insert.rs b/crates/nvisy-document/src/operation/insert.rs
deleted file mode 100644
index 40636b5..0000000
--- a/crates/nvisy-document/src/operation/insert.rs
+++ /dev/null
@@ -1,160 +0,0 @@
-//! Insert content types.
-
-use bytes::Bytes;
-use serde::{Deserialize, Serialize};
-
-use crate::format::region::RegionKind;
-
-/// Content to insert into a document.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", tag = "type")]
-pub enum InsertContent {
-    /// Plain text content.
-    Text {
-        /// The text to insert.
-        content: String,
-
-        /// Optional style hint.
-        style: Option<TextStyle>,
-    },
-
-    /// Image content.
-    Image {
-        /// Image data.
-        #[serde(with = "bytes_serde")]
-        data: Bytes,
-
-        /// MIME type (e.g., "image/png").
-        mime_type: String,
-
-        /// Optional alt text.
-        alt_text: Option<String>,
-    },
-
-    /// Page break.
-    PageBreak,
-
-    /// Section break.
-    SectionBreak,
-
-    /// Horizontal rule/divider.
-    HorizontalRule,
-}
-
-/// Text style hints for insertion.
-#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum TextStyle {
-    /// Normal paragraph text.
-    #[default]
-    Normal,
-
-    /// Heading level 1-6.
-    Heading(u8),
-
-    /// Bold text.
-    Bold,
-
-    /// Italic text.
-    Italic,
-
-    /// Code/monospace text.
-    Code,
-
-    /// Block quote.
-    Quote,
-}
-
-impl InsertContent {
-    /// Creates a text insert with the given content.
-    #[must_use]
-    pub fn text(content: impl Into<String>) -> Self {
-        Self::Text {
-            content: content.into(),
-            style: None,
-        }
-    }
-
-    /// Creates a text insert with style.
-    #[must_use]
-    pub fn styled_text(content: impl Into<String>, style: TextStyle) -> Self {
-        Self::Text {
-            content: content.into(),
-            style: Some(style),
-        }
-    }
-
-    /// Creates an image insert.
-    #[must_use]
-    pub fn image(data: Bytes, mime_type: impl Into<String>) -> Self {
-        Self::Image {
-            data,
-            mime_type: mime_type.into(),
-            alt_text: None,
-        }
-    }
-
-    /// Returns the region kind this content would create.
-    #[must_use]
-    pub fn region_kind(&self) -> RegionKind {
-        match self {
-            Self::Text { style, .. } => match style {
-                Some(TextStyle::Heading(_)) => RegionKind::Heading,
-                Some(TextStyle::Code) => RegionKind::Code,
-                Some(TextStyle::Quote) => RegionKind::Quote,
-                _ => RegionKind::Text,
-            },
-            Self::Image { .. } => RegionKind::Image,
-            Self::PageBreak | Self::SectionBreak | Self::HorizontalRule => RegionKind::Unknown,
-        }
-    }
-}
-
-/// Serde helper for Bytes.
-mod bytes_serde {
-    use bytes::Bytes;
-    use serde::{Deserialize, Deserializer, Serialize, Serializer};
-
-    pub fn serialize<S>(bytes: &Bytes, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        base64::Engine::encode(&base64::engine::general_purpose::STANDARD, bytes)
-            .serialize(serializer)
-    }
-
-    pub fn deserialize<'de, D>(deserializer: D) -> Result<Bytes, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        let s = String::deserialize(deserializer)?;
-        base64::Engine::decode(&base64::engine::general_purpose::STANDARD, &s)
-            .map(Bytes::from)
-            .map_err(serde::de::Error::custom)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_text_insert() {
-        let content = InsertContent::text("Hello, world!");
-        assert!(matches!(content, InsertContent::Text { .. }));
-        assert_eq!(content.region_kind(), RegionKind::Text);
-    }
-
-    #[test]
-    fn test_styled_text() {
-        let content = InsertContent::styled_text("Title", TextStyle::Heading(1));
-        assert_eq!(content.region_kind(), RegionKind::Heading);
-    }
-
-    #[test]
-    fn test_image_insert() {
-        let data = Bytes::from(vec![0u8; 10]);
-        let content = InsertContent::image(data, "image/png");
-        assert_eq!(content.region_kind(), RegionKind::Image);
-    }
-}
diff --git a/crates/nvisy-document/src/operation/mod.rs b/crates/nvisy-document/src/operation/mod.rs
deleted file mode 100644
index b523ecd..0000000
--- a/crates/nvisy-document/src/operation/mod.rs
+++ /dev/null
@@ -1,542 +0,0 @@
-//! Document edit operations.
-//!
-//! This module defines all the operations that can be performed on a document.
-//! Operations are designed to be:
-//! - Reversible (for undo/redo support)
-//! - Serializable (for persistence and VLM communication)
-//! - Format-agnostic (implementations handle format-specific details)
-
-mod insert;
-mod redact;
-mod result;
-mod split;
-
-use derive_more::From;
-pub use insert::{InsertContent, TextStyle};
-pub use redact::RedactStyle;
-pub use result::EditResult;
-use serde::{Deserialize, Serialize};
-pub use split::{MergeOrder, SplitBoundary};
-
-use crate::format::region::{BoundingBox, RegionId, RegionKind};
-
-/// Content modification operations.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", tag = "operation")]
-pub enum ContentOperation {
-    /// Redact content within a region.
-    Redact {
-        /// Target region to redact.
-        target: RegionId,
-
-        /// Redaction style.
-        #[serde(default)]
-        style: RedactStyle,
-    },
-
-    /// Replace text content in a region.
-    ReplaceText {
-        /// Target region.
-        target: RegionId,
-
-        /// New text content.
-        new_text: String,
-
-        /// Whether to preserve original formatting.
-        #[serde(default = "default_true")]
-        preserve_formatting: bool,
-    },
-
-    /// Replace a substring within a region's text.
-    ReplaceSubstring {
-        /// Target region.
-        target: RegionId,
-
-        /// Text to find (first occurrence).
-        find: String,
-
-        /// Text to replace with.
-        replace: String,
-
-        /// Replace all occurrences vs just the first.
-        #[serde(default)]
-        replace_all: bool,
-    },
-
-    /// Delete a region entirely.
-    Delete {
-        /// Target region to delete.
-        target: RegionId,
-
-        /// Whether to collapse space left by deletion.
-        #[serde(default = "default_true")]
-        collapse_space: bool,
-    },
-}
-
-/// Insertion operations.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", tag = "operation")]
-pub enum InsertOperation {
-    /// Insert content before a region.
-    InsertBefore {
-        /// Region to insert before.
-        target: RegionId,
-
-        /// Content to insert.
-        content: InsertContent,
-    },
-
-    /// Insert content after a region.
-    InsertAfter {
-        /// Region to insert after.
-        target: RegionId,
-
-        /// Content to insert.
-        content: InsertContent,
-    },
-
-    /// Insert content at the start of a region (for containers).
-    InsertStart {
-        /// Container region.
-        target: RegionId,
-
-        /// Content to insert.
-        content: InsertContent,
-    },
-
-    /// Insert content at the end of a region (for containers).
-    InsertEnd {
-        /// Container region.
-        target: RegionId,
-
-        /// Content to insert.
-        content: InsertContent,
-    },
-}
-
-/// Structural operations for moving, copying, merging, and splitting.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", tag = "operation")]
-pub enum StructuralOperation {
-    /// Move a region to a new location.
-    Move {
-        /// Region to move.
-        source: RegionId,
-
-        /// Target location (insert after this region).
-        target: RegionId,
-    },
-
-    /// Copy a region to a new location.
-    Copy {
-        /// Region to copy.
-        source: RegionId,
-
-        /// Target location (insert after this region).
-        target: RegionId,
-    },
-
-    /// Merge multiple regions into one.
-    Merge {
-        /// Regions to merge (in order).
-        regions: Vec<RegionId>,
-
-        /// Separator between merged content.
-        separator: Option<String>,
-    },
-
-    /// Split a region at a specific point.
-    SplitRegion {
-        /// Region to split.
-        target: RegionId,
-
-        /// Character offset to split at.
-        at_offset: usize,
-    },
-}
-
-/// Page-level operations.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", tag = "operation")]
-pub enum PageOperation {
-    /// Delete specific pages.
-    DeletePages {
-        /// Page numbers to delete (0-indexed).
-        pages: Vec<u32>,
-    },
-
-    /// Reorder pages.
-    ReorderPages {
-        /// New page order (each value is the old page index).
-        new_order: Vec<u32>,
-    },
-
-    /// Rotate pages.
-    RotatePages {
-        /// Page numbers to rotate (0-indexed).
-        pages: Vec<u32>,
-
-        /// Rotation in degrees (90, 180, 270).
-        degrees: i16,
-    },
-
-    /// Extract pages to a new document.
-    ExtractPages {
-        /// Page numbers to extract (0-indexed).
-        pages: Vec<u32>,
-    },
-}
-
-/// Document-level operations.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", tag = "operation")]
-pub enum DocumentOperation {
-    /// Split document at specified boundaries.
-    Split {
-        /// Split boundary definitions.
-        boundaries: Vec<SplitBoundary>,
-    },
-}
-
-/// Metadata operations for classification, bounds, and annotations.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", tag = "operation")]
-pub enum MetadataOperation {
-    /// Change region kind/classification.
-    Reclassify {
-        /// Target region.
-        target: RegionId,
-
-        /// New region kind.
-        new_kind: RegionKind,
-    },
-
-    /// Update region bounds (for layout adjustments).
-    UpdateBounds {
-        /// Target region.
-        target: RegionId,
-
-        /// New bounding box.
-        new_bounds: BoundingBox,
-    },
-
-    /// Add annotation/comment to a region.
-    Annotate {
-        /// Target region.
-        target: RegionId,
-
-        /// Annotation text.
-        annotation: String,
-
-        /// Annotation author (optional).
-        author: Option<String>,
-    },
-}
-
-/// An edit operation to be applied to a document.
-///
-/// Operations target specific regions by their stable IDs, allowing
-/// VLM-driven workflows to reference regions across multiple turns.
-#[derive(Debug, Clone, PartialEq, From, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", untagged)]
-pub enum EditOperation {
-    /// Content modification operations.
-    Content(ContentOperation),
-
-    /// Insertion operations.
-    Insert(InsertOperation),
-
-    /// Structural operations.
-    Structural(StructuralOperation),
-
-    /// Page-level operations.
-    Page(PageOperation),
-
-    /// Document-level operations.
-    Document(DocumentOperation),
-
-    /// Metadata operations.
-    Metadata(MetadataOperation),
-}
-
-fn default_true() -> bool {
-    true
-}
-
-impl EditOperation {
-    /// Returns the primary target region of this operation, if any.
-    #[must_use]
-    pub fn target(&self) -> Option<RegionId> {
-        match self {
-            Self::Content(op) => op.target(),
-            Self::Insert(op) => op.target(),
-            Self::Structural(op) => op.target(),
-            Self::Page(_) => None,
-            Self::Document(_) => None,
-            Self::Metadata(op) => op.target(),
-        }
-    }
-
-    /// Returns all region IDs referenced by this operation.
-    #[must_use]
-    pub fn referenced_regions(&self) -> Vec<RegionId> {
-        match self {
-            Self::Content(op) => op.referenced_regions(),
-            Self::Insert(op) => op.referenced_regions(),
-            Self::Structural(op) => op.referenced_regions(),
-            Self::Page(_) => vec![],
-            Self::Document(op) => op.referenced_regions(),
-            Self::Metadata(op) => op.referenced_regions(),
-        }
-    }
-
-    /// Returns true if this operation modifies content (vs. metadata only).
-    #[must_use]
-    pub const fn modifies_content(&self) -> bool {
-        match self {
-            Self::Content(_)
-            | Self::Insert(_)
-            | Self::Structural(_)
-            | Self::Page(_)
-            | Self::Document(_) => true,
-            Self::Metadata(_) => false,
-        }
-    }
-
-    /// Returns true if this operation is reversible.
-    #[must_use]
-    pub const fn is_reversible(&self) -> bool {
-        true
-    }
-
-    /// Creates a redact operation with default style.
-    #[must_use]
-    pub fn redact(target: RegionId) -> Self {
-        ContentOperation::Redact {
-            target,
-            style: RedactStyle::default(),
-        }
-        .into()
-    }
-
-    /// Creates a redact operation with custom style.
-    #[must_use]
-    pub fn redact_with_style(target: RegionId, style: RedactStyle) -> Self {
-        ContentOperation::Redact { target, style }.into()
-    }
-
-    /// Creates a replace text operation.
-    #[must_use]
-    pub fn replace_text(target: RegionId, new_text: impl Into<String>) -> Self {
-        ContentOperation::ReplaceText {
-            target,
-            new_text: new_text.into(),
-            preserve_formatting: true,
-        }
-        .into()
-    }
-
-    /// Creates a delete operation.
-    #[must_use]
-    pub fn delete(target: RegionId) -> Self {
-        ContentOperation::Delete {
-            target,
-            collapse_space: true,
-        }
-        .into()
-    }
-
-    /// Creates an insert after operation.
-    #[must_use]
-    pub fn insert_after(target: RegionId, content: InsertContent) -> Self {
-        InsertOperation::InsertAfter { target, content }.into()
-    }
-
-    /// Creates an insert before operation.
-    #[must_use]
-    pub fn insert_before(target: RegionId, content: InsertContent) -> Self {
-        InsertOperation::InsertBefore { target, content }.into()
-    }
-}
-
-impl ContentOperation {
-    /// Returns the target region of this operation.
-    #[must_use]
-    pub fn target(&self) -> Option<RegionId> {
-        match self {
-            Self::Redact { target, .. }
-            | Self::ReplaceText { target, .. }
-            | Self::ReplaceSubstring { target, .. }
-            | Self::Delete { target, .. } => Some(*target),
-        }
-    }
-
-    /// Returns all region IDs referenced by this operation.
-    #[must_use]
-    pub fn referenced_regions(&self) -> Vec<RegionId> {
-        self.target().into_iter().collect()
-    }
-}
-
-impl InsertOperation {
-    /// Returns the target region of this operation.
-    #[must_use]
-    pub fn target(&self) -> Option<RegionId> {
-        match self {
-            Self::InsertBefore { target, .. }
-            | Self::InsertAfter { target, .. }
-            | Self::InsertStart { target, .. }
-            | Self::InsertEnd { target, .. } => Some(*target),
-        }
-    }
-
-    /// Returns all region IDs referenced by this operation.
-    #[must_use]
-    pub fn referenced_regions(&self) -> Vec<RegionId> {
-        self.target().into_iter().collect()
-    }
-}
-
-impl StructuralOperation {
-    /// Returns the primary target region of this operation.
-    #[must_use]
-    pub fn target(&self) -> Option<RegionId> {
-        match self {
-            Self::Move { source, .. } | Self::Copy { source, .. } => Some(*source),
-            Self::Merge { regions, .. } => regions.first().copied(),
-            Self::SplitRegion { target, .. } => Some(*target),
-        }
-    }
-
-    /// Returns all region IDs referenced by this operation.
-    #[must_use]
-    pub fn referenced_regions(&self) -> Vec<RegionId> {
-        match self {
-            Self::Move { source, target } | Self::Copy { source, target } => vec![*source, *target],
-            Self::Merge { regions, .. } => regions.clone(),
-            Self::SplitRegion { target, .. } => vec![*target],
-        }
-    }
-}
-
-impl DocumentOperation {
-    /// Returns all region IDs referenced by this operation.
-    #[must_use]
-    pub fn referenced_regions(&self) -> Vec<RegionId> {
-        match self {
-            Self::Split { boundaries } => boundaries
-                .iter()
-                .filter_map(|b| match b {
-                    SplitBoundary::AfterRegion { region } => Some(*region),
-                    _ => None,
-                })
-                .collect(),
-        }
-    }
-}
-
-impl MetadataOperation {
-    /// Returns the target region of this operation.
-    #[must_use]
-    pub fn target(&self) -> Option<RegionId> {
-        match self {
-            Self::Reclassify { target, .. }
-            | Self::UpdateBounds { target, .. }
-            | Self::Annotate { target, .. } => Some(*target),
-        }
-    }
-
-    /// Returns all region IDs referenced by this operation.
-    #[must_use]
-    pub fn referenced_regions(&self) -> Vec<RegionId> {
-        self.target().into_iter().collect()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_target_extraction() {
-        let region = RegionId::new();
-        let op = EditOperation::redact(region);
-        assert_eq!(op.target(), Some(region));
-    }
-
-    #[test]
-    fn test_referenced_regions() {
-        let r1 = RegionId::new();
-        let r2 = RegionId::new();
-
-        let op: EditOperation = StructuralOperation::Move {
-            source: r1,
-            target: r2,
-        }
-        .into();
-        let refs = op.referenced_regions();
-        assert_eq!(refs.len(), 2);
-        assert!(refs.contains(&r1));
-        assert!(refs.contains(&r2));
-    }
-
-    #[test]
-    fn test_modifies_content() {
-        let region = RegionId::new();
-
-        assert!(EditOperation::redact(region).modifies_content());
-        assert!(EditOperation::delete(region).modifies_content());
-
-        let annotate: EditOperation = MetadataOperation::Annotate {
-            target: region,
-            annotation: "test".to_string(),
-            author: None,
-        }
-        .into();
-        assert!(!annotate.modifies_content());
-    }
-
-    #[test]
-    fn test_from_impls() {
-        let region = RegionId::new();
-
-        let _: EditOperation = ContentOperation::Delete {
-            target: region,
-            collapse_space: true,
-        }
-        .into();
-
-        let _: EditOperation = InsertOperation::InsertAfter {
-            target: region,
-            content: InsertContent::text("test"),
-        }
-        .into();
-
-        let _: EditOperation = StructuralOperation::SplitRegion {
-            target: region,
-            at_offset: 10,
-        }
-        .into();
-
-        let _: EditOperation = PageOperation::DeletePages { pages: vec![0] }.into();
-
-        let _: EditOperation = DocumentOperation::Split { boundaries: vec![] }.into();
-
-        let _: EditOperation = MetadataOperation::Reclassify {
-            target: region,
-            new_kind: RegionKind::Text,
-        }
-        .into();
-    }
-
-    #[test]
-    fn test_serde() {
-        let region = RegionId::new();
-        let op = EditOperation::replace_text(region, "Hello, world!");
-
-        let json = serde_json::to_string_pretty(&op).unwrap();
-        let parsed: EditOperation = serde_json::from_str(&json).unwrap();
-        assert_eq!(op, parsed);
-    }
-}
diff --git a/crates/nvisy-document/src/operation/redact.rs b/crates/nvisy-document/src/operation/redact.rs
deleted file mode 100644
index 9776971..0000000
--- a/crates/nvisy-document/src/operation/redact.rs
+++ /dev/null
@@ -1,103 +0,0 @@
-//! Redaction styles and options.
-
-use serde::{Deserialize, Serialize};
-
-/// Style for redacting content.
-#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum RedactStyle {
-    /// Black box overlay (content hidden but space preserved).
-    #[default]
-    BlackBox,
-
-    /// White box overlay (content hidden, blends with background).
-    WhiteBox,
-
-    /// Replace with placeholder text.
-    Placeholder {
-        /// The placeholder text to show.
-        text: String,
-    },
-
-    /// Blur effect (for images, if supported).
-    Blur {
-        /// Blur intensity (1-10).
-        intensity: u8,
-    },
-
-    /// Pixelate effect (for images, if supported).
-    Pixelate {
-        /// Block size in pixels.
-        block_size: u8,
-    },
-
-    /// Complete removal (content and space removed).
-    Remove,
-}
-
-impl RedactStyle {
-    /// Creates a placeholder redaction with the given text.
-    #[must_use]
-    pub fn placeholder(text: impl Into<String>) -> Self {
-        Self::Placeholder { text: text.into() }
-    }
-
-    /// Creates a blur redaction with the given intensity.
-    #[must_use]
-    pub fn blur(intensity: u8) -> Self {
-        Self::Blur {
-            intensity: intensity.clamp(1, 10),
-        }
-    }
-
-    /// Creates a pixelate redaction with the given block size.
-    #[must_use]
-    pub fn pixelate(block_size: u8) -> Self {
-        Self::Pixelate {
-            block_size: block_size.max(1),
-        }
-    }
-
-    /// Returns true if this style preserves the original space.
-    #[must_use]
-    pub const fn preserves_space(&self) -> bool {
-        !matches!(self, Self::Remove)
-    }
-
-    /// Returns true if this style is suitable for images.
-    #[must_use]
-    pub const fn is_image_style(&self) -> bool {
-        matches!(self, Self::Blur { .. } | Self::Pixelate { .. })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_redact_style_default() {
-        assert_eq!(RedactStyle::default(), RedactStyle::BlackBox);
-    }
-
-    #[test]
-    fn test_placeholder() {
-        let style = RedactStyle::placeholder("[REDACTED]");
-        assert!(matches!(style, RedactStyle::Placeholder { text } if text == "[REDACTED]"));
-    }
-
-    #[test]
-    fn test_preserves_space() {
-        assert!(RedactStyle::BlackBox.preserves_space());
-        assert!(RedactStyle::placeholder("X").preserves_space());
-        assert!(!RedactStyle::Remove.preserves_space());
-    }
-
-    #[test]
-    fn test_serde() {
-        let style = RedactStyle::Blur { intensity: 5 };
-        let json = serde_json::to_string(&style).unwrap();
-        let parsed: RedactStyle = serde_json::from_str(&json).unwrap();
-        assert_eq!(style, parsed);
-    }
-}
diff --git a/crates/nvisy-document/src/operation/result.rs b/crates/nvisy-document/src/operation/result.rs
deleted file mode 100644
index 92d083e..0000000
--- a/crates/nvisy-document/src/operation/result.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-//! Edit operation result types.
-
-use super::EditOperation;
-use crate::format::region::{Region, RegionId};
-
-/// Result of applying an edit operation.
-#[derive(Debug, Clone)]
-pub struct EditResult {
-    /// Whether the operation succeeded.
-    pub success: bool,
-
-    /// New regions created by the operation.
-    pub created_regions: Vec<Region>,
-
-    /// Regions modified by the operation.
-    pub modified_regions: Vec<Region>,
-
-    /// Regions deleted by the operation.
-    pub deleted_region_ids: Vec<RegionId>,
-
-    /// Reverse operation for undo support.
-    pub reverse_operation: Option<EditOperation>,
-
-    /// Warnings generated during the operation.
-    pub warnings: Vec<String>,
-}
-
-impl EditResult {
-    /// Creates a successful edit result with no changes.
-    #[must_use]
-    pub fn success() -> Self {
-        Self {
-            success: true,
-            created_regions: vec![],
-            modified_regions: vec![],
-            deleted_region_ids: vec![],
-            reverse_operation: None,
-            warnings: vec![],
-        }
-    }
-
-    /// Creates a failed edit result.
-    #[must_use]
-    pub fn failed() -> Self {
-        Self {
-            success: false,
-            created_regions: vec![],
-            modified_regions: vec![],
-            deleted_region_ids: vec![],
-            reverse_operation: None,
-            warnings: vec![],
-        }
-    }
-
-    /// Adds a created region.
-    #[must_use]
-    pub fn with_created(mut self, region: Region) -> Self {
-        self.created_regions.push(region);
-        self
-    }
-
-    /// Adds a modified region.
-    #[must_use]
-    pub fn with_modified(mut self, region: Region) -> Self {
-        self.modified_regions.push(region);
-        self
-    }
-
-    /// Adds a deleted region ID.
-    #[must_use]
-    pub fn with_deleted(mut self, id: RegionId) -> Self {
-        self.deleted_region_ids.push(id);
-        self
-    }
-
-    /// Sets the reverse operation.
-    #[must_use]
-    pub fn with_reverse(mut self, op: EditOperation) -> Self {
-        self.reverse_operation = Some(op);
-        self
-    }
-
-    /// Adds a warning.
-    #[must_use]
-    pub fn with_warning(mut self, warning: impl Into<String>) -> Self {
-        self.warnings.push(warning.into());
-        self
-    }
-
-    /// Returns true if any regions were affected.
-    #[must_use]
-    pub fn has_changes(&self) -> bool {
-        !self.created_regions.is_empty()
-            || !self.modified_regions.is_empty()
-            || !self.deleted_region_ids.is_empty()
-    }
-
-    /// Returns the total number of affected regions.
-    #[must_use]
-    pub fn affected_count(&self) -> usize {
-        self.created_regions.len() + self.modified_regions.len() + self.deleted_region_ids.len()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_edit_result_success() {
-        let result = EditResult::success();
-        assert!(result.success);
-        assert!(!result.has_changes());
-        assert_eq!(result.affected_count(), 0);
-    }
-
-    #[test]
-    fn test_edit_result_failed() {
-        let result = EditResult::failed();
-        assert!(!result.success);
-    }
-
-    #[test]
-    fn test_edit_result_builder() {
-        let region = Region::text("test");
-        let result = EditResult::success()
-            .with_created(region)
-            .with_warning("Minor issue");
-
-        assert!(result.success);
-        assert_eq!(result.created_regions.len(), 1);
-        assert_eq!(result.warnings.len(), 1);
-        assert!(result.has_changes());
-        assert_eq!(result.affected_count(), 1);
-    }
-}
diff --git a/crates/nvisy-document/src/operation/split.rs b/crates/nvisy-document/src/operation/split.rs
deleted file mode 100644
index db7eb29..0000000
--- a/crates/nvisy-document/src/operation/split.rs
+++ /dev/null
@@ -1,105 +0,0 @@
-//! Split operation types.
-
-use serde::{Deserialize, Serialize};
-
-use crate::format::region::RegionId;
-
-/// Defines where to split a document.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case", tag = "type")]
-pub enum SplitBoundary {
-    /// Split after a specific page.
-    AfterPage {
-        /// Page number (0-indexed).
-        page: u32,
-    },
-
-    /// Split after a specific region.
-    AfterRegion {
-        /// Region ID to split after.
-        region: RegionId,
-    },
-
-    /// Split at page intervals.
-    EveryNPages {
-        /// Number of pages per split.
-        n: u32,
-    },
-
-    /// Split by heading level (each heading starts a new document).
-    ByHeading {
-        /// Heading level to split on (1-6).
-        level: u8,
-    },
-}
-
-impl SplitBoundary {
-    /// Creates a split after a specific page.
-    #[must_use]
-    pub fn after_page(page: u32) -> Self {
-        Self::AfterPage { page }
-    }
-
-    /// Creates a split after a specific region.
-    #[must_use]
-    pub fn after_region(region: RegionId) -> Self {
-        Self::AfterRegion { region }
-    }
-
-    /// Creates splits every N pages.
-    #[must_use]
-    pub fn every_n_pages(n: u32) -> Self {
-        Self::EveryNPages { n: n.max(1) }
-    }
-
-    /// Creates splits at heading level.
-    #[must_use]
-    pub fn by_heading(level: u8) -> Self {
-        Self::ByHeading {
-            level: level.clamp(1, 6),
-        }
-    }
-}
-
-/// Order for merging documents.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum MergeOrder {
-    /// Merge in the order provided.
-    #[default]
-    Sequential,
-
-    /// Interleave pages from each document.
-    Interleaved,
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_split_boundary() {
-        let split = SplitBoundary::after_page(5);
-        assert!(matches!(split, SplitBoundary::AfterPage { page: 5 }));
-    }
-
-    #[test]
-    fn test_every_n_pages_minimum() {
-        let split = SplitBoundary::every_n_pages(0);
-        assert!(matches!(split, SplitBoundary::EveryNPages { n: 1 }));
-    }
-
-    #[test]
-    fn test_heading_level_clamped() {
-        let split = SplitBoundary::by_heading(10);
-        assert!(matches!(split, SplitBoundary::ByHeading { level: 6 }));
-    }
-
-    #[test]
-    fn test_serde() {
-        let split = SplitBoundary::after_page(3);
-        let json = serde_json::to_string(&split).unwrap();
-        let parsed: SplitBoundary = serde_json::from_str(&json).unwrap();
-        assert_eq!(split, parsed);
-    }
-}
diff --git a/crates/nvisy-docx/Cargo.toml b/crates/nvisy-docx/Cargo.toml
index 871f217..f4b66cd 100644
--- a/crates/nvisy-docx/Cargo.toml
+++ b/crates/nvisy-docx/Cargo.toml
@@ -2,20 +2,20 @@
 
 [package]
 name = "nvisy-docx"
+description = "DOCX document format support for nvisy"
+readme = "./README.md"
+
 version = { workspace = true }
 rust-version = { workspace = true }
 edition = { workspace = true }
 license = { workspace = true }
 publish = { workspace = true }
-readme = "./README.md"
 
 authors = { workspace = true }
 repository = { workspace = true }
 homepage = { workspace = true }
 documentation = { workspace = true }
 
-description = "DOCX document format support for nvisy"
-
 [package.metadata.docs.rs]
 all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
diff --git a/crates/nvisy-docx/src/document.rs b/crates/nvisy-docx/src/document.rs
index d8ad4a4..cee2059 100644
--- a/crates/nvisy-docx/src/document.rs
+++ b/crates/nvisy-docx/src/document.rs
@@ -2,28 +2,26 @@
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{
-    Document, DocumentInfo, EditOperation, EditResult, EditableDocument, Error, PageOptions,
-    Region, RegionId, Result,
-};
+use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
 
 /// A loaded DOCX document.
 #[derive(Debug)]
 pub struct DocxDocument {
     info: DocumentInfo,
     regions: Vec<Region>,
-    modified: bool,
+    #[allow(dead_code)]
+    data: Bytes,
 }
 
 impl DocxDocument {
     /// Creates a new DOCX document (internal use).
     #[must_use]
     #[allow(dead_code)] // Will be used when load() is implemented
-    pub(crate) fn new(info: DocumentInfo) -> Self {
+    pub(crate) fn new(info: DocumentInfo, data: Bytes) -> Self {
         Self {
             info,
             regions: Vec::new(),
-            modified: false,
+            data,
         }
     }
 }
@@ -49,31 +47,10 @@ impl Document for DocxDocument {
         self.regions.iter().find(|r| r.id == id)
     }
 
-    async fn serialize(&self) -> Result<Bytes> {
+    async fn to_bytes(&self) -> Result<Bytes> {
         // TODO: Implement DOCX serialization
         Err(Error::unsupported_format(
             "DOCX serialization not yet implemented",
         ))
     }
 }
-
-#[async_trait]
-impl EditableDocument for DocxDocument {
-    async fn apply(&mut self, _operation: &EditOperation) -> Result<EditResult> {
-        // TODO: Implement DOCX editing
-        Err(Error::unsupported_format(
-            "DOCX editing not yet implemented",
-        ))
-    }
-
-    fn is_modified(&self) -> bool {
-        self.modified
-    }
-
-    async fn extract_page_regions(&mut self, _options: &PageOptions) -> Result<Vec<Region>> {
-        // TODO: Implement page region extraction
-        Err(Error::unsupported_format(
-            "DOCX page extraction not yet implemented",
-        ))
-    }
-}
diff --git a/crates/nvisy-docx/src/format.rs b/crates/nvisy-docx/src/format.rs
index e378bcd..b4a1287 100644
--- a/crates/nvisy-docx/src/format.rs
+++ b/crates/nvisy-docx/src/format.rs
@@ -16,7 +16,7 @@ impl DocxFormat {
     #[must_use]
     pub fn new() -> Self {
         Self {
-            capabilities: Capabilities::read_only(),
+            capabilities: Capabilities::rich_document(),
         }
     }
 }
@@ -46,13 +46,6 @@ impl DocumentFormat for DocxFormat {
             "DOCX loading not yet implemented",
         ))
     }
-
-    async fn create_empty(&self) -> Result<Self::Document> {
-        // TODO: Implement empty DOCX creation
-        Err(Error::unsupported_format(
-            "DOCX creation not yet implemented",
-        ))
-    }
 }
 
 #[cfg(test)]
@@ -63,9 +56,22 @@ mod tests {
     fn test_format_metadata() {
         let format = DocxFormat::new();
         assert_eq!(format.name(), "docx");
-        assert!(format
-            .mime_types()
-            .contains(&"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
+        assert!(
+            format.mime_types().contains(
+                &"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+            )
+        );
         assert!(format.extensions().contains(&"docx"));
     }
+
+    #[test]
+    fn test_capabilities() {
+        let format = DocxFormat::new();
+        let caps = format.capabilities();
+
+        assert!(caps.text.can_extract);
+        assert!(caps.text.has_rich_text);
+        assert!(caps.structure.can_detect_tables);
+        assert!(caps.structure.has_pages);
+    }
 }
diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml
index 2073535..f22977a 100644
--- a/crates/nvisy-engine/Cargo.toml
+++ b/crates/nvisy-engine/Cargo.toml
@@ -2,12 +2,14 @@
 
 [package]
 name = "nvisy-engine"
+description = "Document processing engine for nvisy"
+readme = "./README.md"
+
 version = { workspace = true }
 rust-version = { workspace = true }
 edition = { workspace = true }
 license = { workspace = true }
 publish = { workspace = true }
-readme = "./README.md"
 
 authors = { workspace = true }
 repository = { workspace = true }
@@ -25,16 +27,21 @@ docx = ["dep:nvisy-docx"]
 text = ["dep:nvisy-text"]
 
 [dependencies]
+# Internal crates
 nvisy-archive = { workspace = true }
 nvisy-document = { workspace = true }
 nvisy-docx = { workspace = true, optional = true }
 nvisy-pdf = { workspace = true, optional = true }
 nvisy-text = { workspace = true, optional = true }
 
+# Data types
 bytes = { workspace = true }
+uuid = { workspace = true, features = ["v4"] }
 jiff = { workspace = true, features = ["std"] }
+
+# Serialization
 serde = { workspace = true, features = ["std", "derive"] }
-uuid = { workspace = true, features = ["v4"] }
 
 [dev-dependencies]
 serde_json = { workspace = true, features = ["std"] }
+tokio = { workspace = true, features = ["rt", "macros"] }
diff --git a/crates/nvisy-engine/src/engine/mod.rs b/crates/nvisy-engine/src/engine/mod.rs
index 4f27ddc..f16805d 100644
--- a/crates/nvisy-engine/src/engine/mod.rs
+++ b/crates/nvisy-engine/src/engine/mod.rs
@@ -9,20 +9,16 @@ use std::path::Path;
 
 use bytes::Bytes;
 pub use config::EngineConfig;
-use nvisy_document::{DocumentFormat, Error, Result};
-#[cfg(feature = "docx")]
-use nvisy_docx::{DocxDocument, DocxFormat};
-#[cfg(feature = "pdf")]
-use nvisy_pdf::{PdfDocument, PdfFormat};
-#[cfg(feature = "text")]
-use nvisy_text::{TextDocument, TextFormat};
+use nvisy_document::Result;
+
+use crate::registry::{BoxDocument, FormatRegistry};
 
 /// The central document processing engine.
 ///
 /// `Engine` provides a unified interface for:
 /// - Loading documents from various formats (PDF, DOCX, plain text, etc.)
-/// - Managing format handlers
-/// - Processing archives containing documents
+/// - Managing format handlers via a dynamic registry
+/// - Auto-detecting formats from file extensions or MIME types
 ///
 /// # Example
 ///
@@ -30,38 +26,32 @@ use nvisy_text::{TextDocument, TextFormat};
 /// use nvisy_engine::Engine;
 ///
 /// let engine = Engine::new();
-/// let doc = engine.load_pdf(data).await?;
+///
+/// // Load by file path (auto-detect format)
+/// let doc = engine.load_file("document.pdf").await?;
+///
+/// // Load by extension
+/// let doc = engine.load_by_extension("json", data).await?;
+///
+/// // Load with specific format (when you need the concrete type)
+/// let doc = engine.pdf().load(data).await?;
 /// ```
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub struct Engine {
     /// Configuration for the engine.
     config: EngineConfig,
 
-    /// PDF format handler.
-    #[cfg(feature = "pdf")]
-    pdf: PdfFormat,
-
-    /// DOCX format handler.
-    #[cfg(feature = "docx")]
-    docx: DocxFormat,
-
-    /// Plain text format handler.
-    #[cfg(feature = "text")]
-    text: TextFormat,
+    /// Format registry for dynamic loading.
+    registry: FormatRegistry,
 }
 
 impl Engine {
-    /// Creates a new engine with default configuration.
+    /// Creates a new engine with default configuration and all default formats.
     #[must_use]
     pub fn new() -> Self {
         Self {
             config: EngineConfig::default(),
-            #[cfg(feature = "pdf")]
-            pdf: PdfFormat::new(),
-            #[cfg(feature = "docx")]
-            docx: DocxFormat::new(),
-            #[cfg(feature = "text")]
-            text: TextFormat::new(),
+            registry: FormatRegistry::with_defaults(),
         }
     }
 
@@ -70,165 +60,99 @@ impl Engine {
     pub fn with_config(config: EngineConfig) -> Self {
         Self {
             config,
-            #[cfg(feature = "pdf")]
-            pdf: PdfFormat::new(),
-            #[cfg(feature = "docx")]
-            docx: DocxFormat::new(),
-            #[cfg(feature = "text")]
-            text: TextFormat::new(),
+            registry: FormatRegistry::with_defaults(),
         }
     }
 
-    /// Returns a reference to the engine configuration.
+    /// Creates a new engine with a custom registry.
     #[must_use]
-    pub fn config(&self) -> &EngineConfig {
-        &self.config
+    pub fn with_registry(registry: FormatRegistry) -> Self {
+        Self {
+            config: EngineConfig::default(),
+            registry,
+        }
     }
 
-    /// Returns the PDF format handler.
-    #[cfg(feature = "pdf")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "pdf")))]
+    /// Creates a new engine with custom configuration and registry.
     #[must_use]
-    pub fn pdf(&self) -> &PdfFormat {
-        &self.pdf
+    pub fn with_config_and_registry(config: EngineConfig, registry: FormatRegistry) -> Self {
+        Self { config, registry }
     }
 
-    /// Returns the DOCX format handler.
-    #[cfg(feature = "docx")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "docx")))]
+    /// Returns a reference to the engine configuration.
     #[must_use]
-    pub fn docx(&self) -> &DocxFormat {
-        &self.docx
+    pub fn config(&self) -> &EngineConfig {
+        &self.config
     }
 
-    /// Returns the text format handler.
-    #[cfg(feature = "text")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "text")))]
+    /// Returns a reference to the format registry.
     #[must_use]
-    pub fn text(&self) -> &TextFormat {
-        &self.text
+    pub fn registry(&self) -> &FormatRegistry {
+        &self.registry
     }
 
-    /// Loads a PDF document from bytes.
-    #[cfg(feature = "pdf")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "pdf")))]
-    pub async fn load_pdf(&self, data: Bytes) -> Result<PdfDocument> {
-        self.pdf.load(data).await
+    /// Returns a mutable reference to the format registry.
+    ///
+    /// Use this to register custom formats.
+    pub fn registry_mut(&mut self) -> &mut FormatRegistry {
+        &mut self.registry
     }
 
-    /// Loads a DOCX document from bytes.
-    #[cfg(feature = "docx")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "docx")))]
-    pub async fn load_docx(&self, data: Bytes) -> Result<DocxDocument> {
-        self.docx.load(data).await
+    /// Loads a document from a file path.
+    ///
+    /// The format is automatically detected from the file extension.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file cannot be read
+    /// - The file has no extension
+    /// - The extension is not supported
+    /// - The document fails to load
+    pub async fn load_file<P: AsRef<Path>>(&self, path: P) -> Result<BoxDocument> {
+        self.registry.load_file(path).await
     }
 
-    /// Loads a text document from bytes.
-    #[cfg(feature = "text")]
-    #[cfg_attr(docsrs, doc(cfg(feature = "text")))]
-    pub async fn load_text(&self, data: Bytes) -> Result<TextDocument> {
-        self.text.load(data).await
+    /// Loads a document by file extension.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the extension is not supported or loading fails.
+    pub async fn load_by_extension(&self, ext: &str, data: Bytes) -> Result<BoxDocument> {
+        self.registry.load_by_extension(ext, data).await
     }
 
-    /// Reads a file and returns its contents along with the file extension.
+    /// Loads a document by MIME type.
     ///
     /// # Errors
     ///
-    /// Returns an error if:
-    /// - The file cannot be read
-    /// - The file has no extension
-    pub fn read_file<P: AsRef<Path>>(&self, path: P) -> Result<(Bytes, String)> {
-        let path = path.as_ref();
-        let data = std::fs::read(path)
-            .map_err(|e| Error::io(format!("Failed to read file '{}': {}", path.display(), e)))?;
-
-        let ext = path
-            .extension()
-            .and_then(|e| e.to_str())
-            .ok_or_else(|| Error::unsupported_format("No file extension"))?
-            .to_owned();
-
-        Ok((Bytes::from(data), ext))
+    /// Returns an error if the MIME type is not supported or loading fails.
+    pub async fn load_by_mime(&self, mime: &str, data: Bytes) -> Result<BoxDocument> {
+        self.registry.load_by_mime(mime, data).await
     }
 
     /// Checks if a file extension is supported.
     #[must_use]
     pub fn supports_extension(&self, ext: &str) -> bool {
-        let ext = ext.trim_start_matches('.').to_lowercase();
-
-        #[cfg(feature = "pdf")]
-        if self.pdf.extensions().contains(&ext.as_str()) {
-            return true;
-        }
-
-        #[cfg(feature = "docx")]
-        if self.docx.extensions().contains(&ext.as_str()) {
-            return true;
-        }
-
-        #[cfg(feature = "text")]
-        if self.text.extensions().contains(&ext.as_str()) {
-            return true;
-        }
-
-        false
+        self.registry.supports_extension(ext)
     }
 
     /// Checks if a MIME type is supported.
     #[must_use]
-    pub fn supports_mime(&self, mime_type: &str) -> bool {
-        let mime = mime_type.to_lowercase();
-
-        #[cfg(feature = "pdf")]
-        if self.pdf.mime_types().contains(&mime.as_str()) {
-            return true;
-        }
-
-        #[cfg(feature = "docx")]
-        if self.docx.mime_types().contains(&mime.as_str()) {
-            return true;
-        }
-
-        #[cfg(feature = "text")]
-        if self.text.mime_types().contains(&mime.as_str()) {
-            return true;
-        }
-
-        false
+    pub fn supports_mime(&self, mime: &str) -> bool {
+        self.registry.supports_mime(mime)
     }
 
     /// Returns all supported file extensions.
     #[must_use]
     pub fn supported_extensions(&self) -> Vec<&'static str> {
-        let mut exts = Vec::new();
-
-        #[cfg(feature = "pdf")]
-        exts.extend(self.pdf.extensions());
-
-        #[cfg(feature = "docx")]
-        exts.extend(self.docx.extensions());
-
-        #[cfg(feature = "text")]
-        exts.extend(self.text.extensions());
-
-        exts
+        self.registry.supported_extensions()
     }
 
     /// Returns all supported MIME types.
     #[must_use]
     pub fn supported_mime_types(&self) -> Vec<&'static str> {
-        let mut mimes = Vec::new();
-
-        #[cfg(feature = "pdf")]
-        mimes.extend(self.pdf.mime_types());
-
-        #[cfg(feature = "docx")]
-        mimes.extend(self.docx.mime_types());
-
-        #[cfg(feature = "text")]
-        mimes.extend(self.text.mime_types());
-
-        mimes
+        self.registry.supported_mime_types()
     }
 }
 
@@ -238,6 +162,15 @@ impl Default for Engine {
     }
 }
 
+impl Clone for Engine {
+    fn clone(&self) -> Self {
+        Self {
+            config: self.config.clone(),
+            registry: FormatRegistry::with_defaults(),
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -272,6 +205,12 @@ mod tests {
         {
             assert!(engine.supports_extension("txt"));
             assert!(engine.supports_extension("md"));
+            assert!(engine.supports_extension("json"));
+            assert!(engine.supports_extension("csv"));
+            assert!(engine.supports_extension("xml"));
+            assert!(engine.supports_extension("yaml"));
+            assert!(engine.supports_extension("toml"));
+            assert!(engine.supports_extension("ini"));
         }
 
         assert!(!engine.supports_extension("xyz"));
@@ -285,8 +224,74 @@ mod tests {
         assert!(engine.supports_mime("application/pdf"));
 
         #[cfg(feature = "text")]
-        assert!(engine.supports_mime("text/plain"));
+        {
+            assert!(engine.supports_mime("text/plain"));
+            assert!(engine.supports_mime("text/markdown"));
+            assert!(engine.supports_mime("application/json"));
+            assert!(engine.supports_mime("text/csv"));
+            assert!(engine.supports_mime("application/xml"));
+            assert!(engine.supports_mime("application/x-yaml"));
+            assert!(engine.supports_mime("application/toml"));
+        }
 
         assert!(!engine.supports_mime("application/unknown"));
     }
+
+    #[cfg(feature = "text")]
+    #[tokio::test]
+    async fn test_load_by_extension() {
+        let engine = Engine::new();
+
+        let doc = engine
+            .load_by_extension("json", Bytes::from(r#"{"key": "value"}"#))
+            .await
+            .unwrap();
+        assert!(!doc.regions().is_empty());
+
+        let doc = engine
+            .load_by_extension("md", Bytes::from("# Title\n\nParagraph"))
+            .await
+            .unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[cfg(feature = "text")]
+    #[tokio::test]
+    async fn test_load_by_mime() {
+        let engine = Engine::new();
+
+        let doc = engine
+            .load_by_mime("application/json", Bytes::from(r#"{"key": "value"}"#))
+            .await
+            .unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[test]
+    fn test_registry_access() {
+        let engine = Engine::new();
+        let registry = engine.registry();
+
+        #[cfg(feature = "text")]
+        {
+            let format = registry.get_by_extension("json").unwrap();
+            assert_eq!(format.name(), "json");
+        }
+    }
+
+    #[test]
+    fn test_custom_registry() {
+        let mut registry = FormatRegistry::new();
+
+        #[cfg(feature = "text")]
+        registry.register(nvisy_text::JsonFormat::new());
+
+        let engine = Engine::with_registry(registry);
+
+        #[cfg(feature = "text")]
+        {
+            assert!(engine.supports_extension("json"));
+            assert!(!engine.supports_extension("xml")); // Not registered
+        }
+    }
 }
diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs
index 1093cf1..c4d83a1 100644
--- a/crates/nvisy-engine/src/lib.rs
+++ b/crates/nvisy-engine/src/lib.rs
@@ -3,21 +3,13 @@
 #![doc = include_str!("../README.md")]
 
 pub mod engine;
+pub mod registry;
 pub mod session;
 
 pub use engine::{Engine, EngineConfig};
 pub use nvisy_document::{
-    self as doc, BoundingBox, Capabilities, DocumentFormat, EditOperation, Point, Region, RegionId,
+    self as doc, BoundingBox, Capabilities, Document, DocumentFormat, Point, Region, RegionId,
     RegionKind,
 };
-// Re-export format types for convenience
-#[cfg(feature = "docx")]
-#[cfg_attr(docsrs, doc(cfg(feature = "docx")))]
-pub use nvisy_docx::{DocxDocument, DocxFormat};
-#[cfg(feature = "pdf")]
-#[cfg_attr(docsrs, doc(cfg(feature = "pdf")))]
-pub use nvisy_pdf::{PdfDocument, PdfFormat};
-#[cfg(feature = "text")]
-#[cfg_attr(docsrs, doc(cfg(feature = "text")))]
-pub use nvisy_text::{TextDocument, TextFormat};
-pub use session::{EditHistory, EditSession, HistoryEntry, SessionConfig, SessionId};
+pub use registry::{AnyFormat, BoxDocument, FormatRegistry};
+pub use session::{AccessEntry, AccessHistory, ReadSession, SessionConfig, SessionId};
diff --git a/crates/nvisy-engine/src/registry/mod.rs b/crates/nvisy-engine/src/registry/mod.rs
new file mode 100644
index 0000000..514a50d
--- /dev/null
+++ b/crates/nvisy-engine/src/registry/mod.rs
@@ -0,0 +1,375 @@
+//! Format registry for dynamic document loading.
+//!
+//! The registry provides type-erased format handling, allowing documents
+//! to be loaded by extension or MIME type without knowing the concrete
+//! format at compile time.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use bytes::Bytes;
+use nvisy_document::{Capabilities, Document, Error, Result};
+
+/// A type-erased document that can be used for common operations.
+pub type BoxDocument = Box<dyn Document + Send + Sync>;
+
+/// A type-erased format handler.
+///
+/// This trait provides a common interface for all format handlers,
+/// enabling dynamic dispatch and runtime format selection.
+pub trait AnyFormat: Send + Sync {
+    /// Returns the format name.
+    fn name(&self) -> &'static str;
+
+    /// Returns supported MIME types.
+    fn mime_types(&self) -> &'static [&'static str];
+
+    /// Returns supported file extensions.
+    fn extensions(&self) -> &'static [&'static str];
+
+    /// Returns the format capabilities.
+    fn capabilities(&self) -> &Capabilities;
+
+    /// Loads a document from bytes, returning a type-erased document.
+    fn load_boxed(
+        &self,
+        data: Bytes,
+    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<BoxDocument>> + Send + '_>>;
+}
+
+/// Wrapper that implements AnyFormat for any DocumentFormat.
+struct FormatWrapper<F> {
+    inner: F,
+}
+
+impl<F> AnyFormat for FormatWrapper<F>
+where
+    F: nvisy_document::DocumentFormat + Send + Sync + 'static,
+    F::Document: Send + Sync + 'static,
+{
+    fn name(&self) -> &'static str {
+        nvisy_document::DocumentFormat::name(&self.inner)
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        nvisy_document::DocumentFormat::mime_types(&self.inner)
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        nvisy_document::DocumentFormat::extensions(&self.inner)
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        nvisy_document::DocumentFormat::capabilities(&self.inner)
+    }
+
+    fn load_boxed(
+        &self,
+        data: Bytes,
+    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<BoxDocument>> + Send + '_>> {
+        Box::pin(async move {
+            let doc = nvisy_document::DocumentFormat::load(&self.inner, data).await?;
+            Ok(Box::new(doc) as BoxDocument)
+        })
+    }
+}
+
+/// Registry entry containing a format handler.
+struct RegistryEntry {
+    format: Arc<dyn AnyFormat>,
+}
+
+/// A registry of document formats.
+///
+/// The registry maintains mappings from file extensions and MIME types
+/// to format handlers, enabling dynamic document loading.
+///
+/// # Example
+///
+/// ```ignore
+/// use nvisy_engine::FormatRegistry;
+///
+/// let registry = FormatRegistry::with_defaults();
+///
+/// // Load by file path
+/// let doc = registry.load_file("document.pdf").await?;
+///
+/// // Load by extension
+/// let doc = registry.load_by_extension("json", data).await?;
+/// ```
+#[derive(Default)]
+pub struct FormatRegistry {
+    /// All registered formats.
+    formats: Vec<RegistryEntry>,
+
+    /// Extension to format index mapping.
+    by_extension: HashMap<&'static str, usize>,
+
+    /// MIME type to format index mapping.
+    by_mime: HashMap<&'static str, usize>,
+}
+
+impl FormatRegistry {
+    /// Creates an empty registry.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            formats: Vec::new(),
+            by_extension: HashMap::new(),
+            by_mime: HashMap::new(),
+        }
+    }
+
+    /// Creates a registry with all default formats registered.
+    #[must_use]
+    pub fn with_defaults() -> Self {
+        let mut registry = Self::new();
+        registry.register_defaults();
+        registry
+    }
+
+    /// Registers all default formats based on enabled features.
+    pub fn register_defaults(&mut self) {
+        #[cfg(feature = "pdf")]
+        self.register(nvisy_pdf::PdfFormat::new());
+
+        #[cfg(feature = "docx")]
+        self.register(nvisy_docx::DocxFormat::new());
+
+        #[cfg(feature = "text")]
+        {
+            self.register(nvisy_text::PlainTextFormat::new());
+            self.register(nvisy_text::MarkdownFormat::new());
+            self.register(nvisy_text::JsonFormat::new());
+            self.register(nvisy_text::CsvFormat::new());
+            self.register(nvisy_text::XmlFormat::new());
+            self.register(nvisy_text::YamlFormat::new());
+            self.register(nvisy_text::TomlFormat::new());
+            self.register(nvisy_text::IniFormat::new());
+        }
+    }
+
+    /// Registers a format handler.
+    ///
+    /// Extensions and MIME types from the format are automatically indexed.
+    /// If an extension or MIME type is already registered, the new format
+    /// takes precedence.
+    pub fn register<F>(&mut self, format: F)
+    where
+        F: nvisy_document::DocumentFormat + Send + Sync + 'static,
+        F::Document: Send + Sync + 'static,
+    {
+        let wrapper = FormatWrapper { inner: format };
+        let index = self.formats.len();
+        let format: Arc<dyn AnyFormat> = Arc::new(wrapper);
+
+        // Index by extension
+        for ext in format.extensions() {
+            self.by_extension.insert(ext, index);
+        }
+
+        // Index by MIME type
+        for mime in format.mime_types() {
+            self.by_mime.insert(mime, index);
+        }
+
+        self.formats.push(RegistryEntry { format });
+    }
+
+    /// Returns the format handler for a file extension.
+    #[must_use]
+    pub fn get_by_extension(&self, ext: &str) -> Option<&dyn AnyFormat> {
+        let ext = ext.trim_start_matches('.').to_lowercase();
+        self.by_extension
+            .get(ext.as_str())
+            .and_then(|&idx| self.formats.get(idx))
+            .map(|e| e.format.as_ref())
+    }
+
+    /// Returns the format handler for a MIME type.
+    #[must_use]
+    pub fn get_by_mime(&self, mime: &str) -> Option<&dyn AnyFormat> {
+        let mime = mime.to_lowercase();
+        self.by_mime
+            .get(mime.as_str())
+            .and_then(|&idx| self.formats.get(idx))
+            .map(|e| e.format.as_ref())
+    }
+
+    /// Checks if an extension is supported.
+    #[must_use]
+    pub fn supports_extension(&self, ext: &str) -> bool {
+        let ext = ext.trim_start_matches('.').to_lowercase();
+        self.by_extension.contains_key(ext.as_str())
+    }
+
+    /// Checks if a MIME type is supported.
+    #[must_use]
+    pub fn supports_mime(&self, mime: &str) -> bool {
+        let mime = mime.to_lowercase();
+        self.by_mime.contains_key(mime.as_str())
+    }
+
+    /// Returns all supported file extensions.
+    #[must_use]
+    pub fn supported_extensions(&self) -> Vec<&'static str> {
+        self.by_extension.keys().copied().collect()
+    }
+
+    /// Returns all supported MIME types.
+    #[must_use]
+    pub fn supported_mime_types(&self) -> Vec<&'static str> {
+        self.by_mime.keys().copied().collect()
+    }
+
+    /// Returns all registered formats.
+    #[must_use]
+    pub fn formats(&self) -> Vec<&dyn AnyFormat> {
+        self.formats.iter().map(|e| e.format.as_ref()).collect()
+    }
+
+    /// Loads a document by file extension.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The extension is not supported
+    /// - The document fails to load
+    pub async fn load_by_extension(&self, ext: &str, data: Bytes) -> Result<BoxDocument> {
+        let ext_lower = ext.trim_start_matches('.').to_lowercase();
+
+        let format = self
+            .by_extension
+            .get(ext_lower.as_str())
+            .and_then(|&idx| self.formats.get(idx))
+            .ok_or_else(|| Error::unsupported_format(format!("Unsupported extension: {}", ext)))?;
+
+        format.format.load_boxed(data).await
+    }
+
+    /// Loads a document by MIME type.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The MIME type is not supported
+    /// - The document fails to load
+    pub async fn load_by_mime(&self, mime: &str, data: Bytes) -> Result<BoxDocument> {
+        let mime_lower = mime.to_lowercase();
+
+        let format = self
+            .by_mime
+            .get(mime_lower.as_str())
+            .and_then(|&idx| self.formats.get(idx))
+            .ok_or_else(|| Error::unsupported_format(format!("Unsupported MIME type: {}", mime)))?;
+
+        format.format.load_boxed(data).await
+    }
+
+    /// Loads a document from a file path.
+    ///
+    /// The format is determined by the file extension.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file cannot be read
+    /// - The file has no extension
+    /// - The extension is not supported
+    /// - The document fails to load
+    pub async fn load_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<BoxDocument> {
+        let path = path.as_ref();
+
+        let ext = path
+            .extension()
+            .and_then(|e| e.to_str())
+            .ok_or_else(|| Error::unsupported_format("File has no extension"))?;
+
+        let data = std::fs::read(path)
+            .map_err(|e| Error::io(format!("Failed to read file '{}': {}", path.display(), e)))?;
+
+        self.load_by_extension(ext, Bytes::from(data)).await
+    }
+}
+
+impl std::fmt::Debug for FormatRegistry {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("FormatRegistry")
+            .field("formats", &self.formats.len())
+            .field("extensions", &self.by_extension.keys().collect::<Vec<_>>())
+            .field("mime_types", &self.by_mime.keys().collect::<Vec<_>>())
+            .finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_registry_creation() {
+        let registry = FormatRegistry::new();
+        assert!(registry.formats().is_empty());
+    }
+
+    #[test]
+    fn test_registry_with_defaults() {
+        let registry = FormatRegistry::with_defaults();
+        assert!(!registry.formats().is_empty());
+
+        #[cfg(feature = "text")]
+        {
+            assert!(registry.supports_extension("txt"));
+            assert!(registry.supports_extension("json"));
+            assert!(registry.supports_extension("md"));
+        }
+    }
+
+    #[test]
+    fn test_get_by_extension() {
+        let registry = FormatRegistry::with_defaults();
+
+        #[cfg(feature = "text")]
+        {
+            let format = registry.get_by_extension("json").unwrap();
+            assert_eq!(format.name(), "json");
+
+            let format = registry.get_by_extension(".JSON").unwrap();
+            assert_eq!(format.name(), "json");
+        }
+
+        assert!(registry.get_by_extension("xyz").is_none());
+    }
+
+    #[cfg(feature = "text")]
+    #[tokio::test]
+    async fn test_load_by_extension() {
+        let registry = FormatRegistry::with_defaults();
+
+        let doc = registry
+            .load_by_extension("json", Bytes::from(r#"{"key": "value"}"#))
+            .await
+            .unwrap();
+
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[cfg(feature = "text")]
+    #[tokio::test]
+    async fn test_load_by_mime() {
+        let registry = FormatRegistry::with_defaults();
+
+        let doc = registry
+            .load_by_mime("application/json", Bytes::from(r#"{"key": "value"}"#))
+            .await
+            .unwrap();
+
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[test]
+    fn test_unsupported_extension() {
+        let registry = FormatRegistry::with_defaults();
+        assert!(!registry.supports_extension("xyz"));
+    }
+}
diff --git a/crates/nvisy-engine/src/session/history.rs b/crates/nvisy-engine/src/session/history.rs
index 63bd15d..5c80bb4 100644
--- a/crates/nvisy-engine/src/session/history.rs
+++ b/crates/nvisy-engine/src/session/history.rs
@@ -1,225 +1,100 @@
-//! Edit history for undo/redo support.
+//! Session history tracking (for audit/logging purposes).
 
 use jiff::Timestamp;
-use nvisy_document::EditOperation;
 
-/// A single entry in the edit history.
+/// A single entry tracking document access.
 #[derive(Debug, Clone)]
-pub struct HistoryEntry {
-    /// The operation that was applied.
-    pub operation: EditOperation,
-
-    /// The reverse operation for undoing.
-    pub reverse: EditOperation,
-
-    /// When the operation was applied.
+pub struct AccessEntry {
+    /// When the access occurred.
     pub timestamp: Timestamp,
 
-    /// Optional description of the operation.
-    pub description: Option<String>,
+    /// Description of the access.
+    pub description: String,
 }
 
-impl HistoryEntry {
-    /// Creates a new history entry.
-    #[must_use]
-    pub fn new(operation: EditOperation, reverse: EditOperation) -> Self {
-        Self {
-            operation,
-            reverse,
-            timestamp: Timestamp::now(),
-            description: None,
-        }
-    }
-
-    /// Creates a new history entry with a description.
+impl AccessEntry {
+    /// Creates a new access entry.
     #[must_use]
-    pub fn with_description(
-        operation: EditOperation,
-        reverse: EditOperation,
-        description: impl Into<String>,
-    ) -> Self {
+    pub fn new(description: impl Into<String>) -> Self {
         Self {
-            operation,
-            reverse,
             timestamp: Timestamp::now(),
-            description: Some(description.into()),
+            description: description.into(),
         }
     }
 }
 
-/// Manages edit history with undo/redo support.
+/// Tracks document access history for audit purposes.
 #[derive(Debug, Default)]
-pub struct EditHistory {
-    /// Stack of operations that can be undone.
-    undo_stack: Vec<HistoryEntry>,
-
-    /// Stack of operations that can be redone.
-    redo_stack: Vec<HistoryEntry>,
+pub struct AccessHistory {
+    /// List of access entries.
+    entries: Vec<AccessEntry>,
 }
 
-impl EditHistory {
+impl AccessHistory {
     /// Creates a new empty history.
     #[must_use]
     pub fn new() -> Self {
         Self::default()
     }
 
-    /// Records a new operation in the history.
-    ///
-    /// This clears the redo stack since we're diverging from the previous future.
-    pub fn record(&mut self, entry: HistoryEntry) {
-        self.redo_stack.clear();
-        self.undo_stack.push(entry);
-    }
-
-    /// Returns true if there are operations that can be undone.
-    #[must_use]
-    pub fn can_undo(&self) -> bool {
-        !self.undo_stack.is_empty()
-    }
-
-    /// Returns true if there are operations that can be redone.
-    #[must_use]
-    pub fn can_redo(&self) -> bool {
-        !self.redo_stack.is_empty()
-    }
-
-    /// Returns the number of operations that can be undone.
-    #[must_use]
-    pub fn undo_count(&self) -> usize {
-        self.undo_stack.len()
+    /// Records a new access entry.
+    pub fn record(&mut self, description: impl Into<String>) {
+        self.entries.push(AccessEntry::new(description));
     }
 
-    /// Returns the number of operations that can be redone.
+    /// Returns the number of entries.
     #[must_use]
-    pub fn redo_count(&self) -> usize {
-        self.redo_stack.len()
+    pub fn len(&self) -> usize {
+        self.entries.len()
     }
 
-    /// Pops the most recent operation for undoing.
-    ///
-    /// Returns the entry that should be reversed.
-    pub fn pop_undo(&mut self) -> Option<HistoryEntry> {
-        self.undo_stack.pop().inspect(|entry| {
-            self.redo_stack.push(entry.clone());
-        })
-    }
-
-    /// Pops the most recently undone operation for redoing.
-    ///
-    /// Returns the entry that should be reapplied.
-    pub fn pop_redo(&mut self) -> Option<HistoryEntry> {
-        self.redo_stack.pop().inspect(|entry| {
-            self.undo_stack.push(entry.clone());
-        })
-    }
-
-    /// Peeks at the most recent undoable operation without removing it.
+    /// Returns true if there are no entries.
     #[must_use]
-    pub fn peek_undo(&self) -> Option<&HistoryEntry> {
-        self.undo_stack.last()
+    pub fn is_empty(&self) -> bool {
+        self.entries.is_empty()
     }
 
-    /// Peeks at the most recent redoable operation without removing it.
+    /// Returns all entries.
     #[must_use]
-    pub fn peek_redo(&self) -> Option<&HistoryEntry> {
-        self.redo_stack.last()
-    }
-
-    /// Returns all entries in the undo stack (oldest first).
-    #[must_use]
-    pub fn undo_entries(&self) -> &[HistoryEntry] {
-        &self.undo_stack
-    }
-
-    /// Returns all entries in the redo stack (oldest first).
-    #[must_use]
-    pub fn redo_entries(&self) -> &[HistoryEntry] {
-        &self.redo_stack
+    pub fn entries(&self) -> &[AccessEntry] {
+        &self.entries
     }
 
     /// Clears all history.
     pub fn clear(&mut self) {
-        self.undo_stack.clear();
-        self.redo_stack.clear();
-    }
-
-    /// Clears the redo stack only.
-    pub fn clear_redo(&mut self) {
-        self.redo_stack.clear();
+        self.entries.clear();
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use nvisy_document::{InsertContent, RegionId};
-
     use super::*;
 
-    fn make_entry() -> HistoryEntry {
-        let region = RegionId::new();
-        HistoryEntry::new(
-            EditOperation::delete(region),
-            EditOperation::insert_after(region, InsertContent::text("original")),
-        )
-    }
-
     #[test]
     fn test_empty_history() {
-        let history = EditHistory::new();
-        assert!(!history.can_undo());
-        assert!(!history.can_redo());
+        let history = AccessHistory::new();
+        assert!(history.is_empty());
+        assert_eq!(history.len(), 0);
     }
 
     #[test]
-    fn test_record_and_undo() {
-        let mut history = EditHistory::new();
+    fn test_record_access() {
+        let mut history = AccessHistory::new();
 
-        history.record(make_entry());
-        assert!(history.can_undo());
-        assert!(!history.can_redo());
+        history.record("Loaded document");
+        history.record("Extracted text");
 
-        let entry = history.pop_undo();
-        assert!(entry.is_some());
-        assert!(!history.can_undo());
-        assert!(history.can_redo());
+        assert_eq!(history.len(), 2);
+        assert!(!history.is_empty());
+        assert_eq!(history.entries()[0].description, "Loaded document");
+        assert_eq!(history.entries()[1].description, "Extracted text");
     }
 
     #[test]
-    fn test_redo() {
-        let mut history = EditHistory::new();
-
-        history.record(make_entry());
-        history.pop_undo();
-
-        assert!(history.can_redo());
-
-        let entry = history.pop_redo();
-        assert!(entry.is_some());
-        assert!(history.can_undo());
-        assert!(!history.can_redo());
-    }
-
-    #[test]
-    fn test_new_record_clears_redo() {
-        let mut history = EditHistory::new();
-
-        history.record(make_entry());
-        history.pop_undo();
-        assert!(history.can_redo());
-
-        history.record(make_entry());
-        assert!(!history.can_redo());
-    }
-
-    #[test]
-    fn test_unlimited_entries() {
-        let mut history = EditHistory::new();
-
-        for _ in 0..1000 {
-            history.record(make_entry());
-        }
-
-        assert_eq!(history.undo_count(), 1000);
+    fn test_clear() {
+        let mut history = AccessHistory::new();
+        history.record("test");
+        history.clear();
+        assert!(history.is_empty());
     }
 }
diff --git a/crates/nvisy-engine/src/session/mod.rs b/crates/nvisy-engine/src/session/mod.rs
index ccdb983..2599aec 100644
--- a/crates/nvisy-engine/src/session/mod.rs
+++ b/crates/nvisy-engine/src/session/mod.rs
@@ -1,9 +1,8 @@
-//! Document editing sessions.
+//! Document reading sessions.
 //!
-//! An `EditSession` wraps a document and provides:
-//! - Stable region IDs across edits
-//! - Undo/redo support
-//! - Operation validation
+//! A `ReadSession` wraps a document and provides:
+//! - Stable region IDs for referencing
+//! - Access history tracking
 //! - Streaming/pagination for large documents
 
 mod history;
@@ -12,15 +11,12 @@ use std::collections::HashMap;
 use std::num::NonZeroU32;
 
 use bytes::Bytes;
-pub use history::{EditHistory, HistoryEntry};
+pub use history::{AccessEntry, AccessHistory};
 use jiff::Timestamp;
-use nvisy_document::{
-    Capabilities, EditOperation, EditResult, EditableDocument, Error, PageOptions, Region,
-    RegionId, RegionStatus, Result,
-};
+use nvisy_document::{Capabilities, Document, PageOptions, Region, RegionId, Result};
 use uuid::Uuid;
 
-/// Unique identifier for an edit session.
+/// Unique identifier for a read session.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub struct SessionId(Uuid);
 
@@ -50,45 +46,41 @@ impl std::fmt::Display for SessionId {
     }
 }
 
-/// Configuration for an edit session.
+/// Configuration for a read session.
 #[derive(Debug, Clone)]
 pub struct SessionConfig {
-    /// Whether to auto-extract regions on load.
-    pub auto_extract_regions: bool,
-
     /// Page batch size for streaming.
     pub page_batch_size: u32,
 
-    /// Whether to validate operations before applying.
-    pub validate_operations: bool,
+    /// Whether to track access history.
+    pub track_history: bool,
 }
 
 impl Default for SessionConfig {
     fn default() -> Self {
         Self {
-            auto_extract_regions: true,
             page_batch_size: 10,
-            validate_operations: true,
+            track_history: false,
         }
     }
 }
 
-/// An edit session for a document.
+/// A read session for a document.
 ///
-/// Sessions provide stable region IDs, undo/redo, and streaming support.
+/// Sessions provide stable region IDs and streaming support.
 /// The session is generic over the document type `D`.
-pub struct EditSession<D> {
+pub struct ReadSession<D> {
     /// Unique session identifier.
     id: SessionId,
 
-    /// The underlying document (must support editing).
+    /// The underlying document.
     document: D,
 
     /// Format capabilities.
     capabilities: Capabilities,
 
-    /// Edit history for undo/redo.
-    history: EditHistory,
+    /// Access history for auditing.
+    history: AccessHistory,
 
     /// Session configuration.
     config: SessionConfig,
@@ -99,18 +91,18 @@ pub struct EditSession<D> {
     /// Region cache for quick lookup.
     region_cache: HashMap<RegionId, Region>,
 
-    /// Pages that have been loaded (for lazy loading).
-    loaded_pages: Vec<u32>,
+    /// Pages that have been accessed (for streaming).
+    accessed_pages: Vec<u32>,
 
     /// Total number of pages in the document.
     total_pages: Option<u32>,
 }
 
-impl<D: EditableDocument> EditSession<D> {
-    /// Creates a new edit session from a loaded document.
+impl<D: Document> ReadSession<D> {
+    /// Creates a new read session from a loaded document.
     #[must_use]
     pub fn new(document: D, capabilities: Capabilities, config: SessionConfig) -> Self {
-        let history = EditHistory::new();
+        let history = AccessHistory::new();
         let total_pages = document.info().page_count;
 
         let mut region_cache = HashMap::new();
@@ -118,7 +110,7 @@ impl<D: EditableDocument> EditSession<D> {
             region_cache.insert(region.id, region.clone());
         }
 
-        let loaded_pages = if total_pages.is_some() {
+        let accessed_pages = if total_pages.is_some() {
             document
                 .regions()
                 .iter()
@@ -138,7 +130,7 @@ impl<D: EditableDocument> EditSession<D> {
             config,
             created_at: Timestamp::now(),
             region_cache,
-            loaded_pages,
+            accessed_pages,
             total_pages,
         }
     }
@@ -155,11 +147,6 @@ impl<D: EditableDocument> EditSession<D> {
         &self.document
     }
 
-    /// Returns a mutable reference to the underlying document.
-    pub fn document_mut(&mut self) -> &mut D {
-        &mut self.document
-    }
-
     /// Returns the format capabilities.
     #[must_use]
     pub fn capabilities(&self) -> &Capabilities {
@@ -172,22 +159,17 @@ impl<D: EditableDocument> EditSession<D> {
         self.created_at
     }
 
-    /// Returns the edit history.
+    /// Returns the access history.
     #[must_use]
-    pub fn history(&self) -> &EditHistory {
+    pub fn history(&self) -> &AccessHistory {
         &self.history
     }
 
-    /// Returns whether there are undoable operations.
-    #[must_use]
-    pub fn can_undo(&self) -> bool {
-        self.history.can_undo()
-    }
-
-    /// Returns whether there are redoable operations.
-    #[must_use]
-    pub fn can_redo(&self) -> bool {
-        self.history.can_redo()
+    /// Records an access event.
+    pub fn record_access(&mut self, description: impl Into<String>) {
+        if self.config.track_history {
+            self.history.record(description);
+        }
     }
 
     /// Returns all regions (from cache).
@@ -217,162 +199,31 @@ impl<D: EditableDocument> EditSession<D> {
         self.total_pages
     }
 
-    /// Returns which pages have been loaded.
+    /// Returns which pages have been accessed.
     #[must_use]
-    pub fn loaded_pages(&self) -> &[u32] {
-        &self.loaded_pages
+    pub fn accessed_pages(&self) -> &[u32] {
+        &self.accessed_pages
     }
 
-    /// Checks if a page has been loaded.
+    /// Checks if a page has been accessed.
     #[must_use]
-    pub fn is_page_loaded(&self, page: u32) -> bool {
-        self.loaded_pages.contains(&page)
+    pub fn is_page_accessed(&self, page: u32) -> bool {
+        self.accessed_pages.contains(&page)
     }
 
-    /// Validates an operation before applying.
-    fn validate_operation(&self, operation: &EditOperation) -> Result<()> {
-        let support = self.capabilities.supports(operation);
-        if !support.is_supported() {
-            return Err(Error::operation_not_supported(format!("{operation:?}")));
-        }
-
-        for region_id in operation.referenced_regions() {
-            if !self.region_cache.contains_key(&region_id) {
-                return Err(Error::region_not_found(region_id));
-            }
-        }
-
-        for region_id in operation.referenced_regions() {
-            if let Some(region) = self.region_cache.get(&region_id) {
-                if region.effective_status() == RegionStatus::Deleted {
-                    return Err(Error::invalid_operation(format!(
-                        "region {region_id} is deleted"
-                    )));
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Applies an edit operation.
-    pub async fn apply(&mut self, operation: EditOperation) -> Result<EditResult> {
-        if self.config.validate_operations {
-            self.validate_operation(&operation)?;
-        }
-
-        let result = self.document.apply(&operation).await?;
-
-        if result.success {
-            for region in &result.created_regions {
-                self.region_cache.insert(region.id, region.clone());
-            }
-
-            for region in &result.modified_regions {
-                self.region_cache.insert(region.id, region.clone());
-            }
-
-            for id in &result.deleted_region_ids {
-                if let Some(region) = self.region_cache.get_mut(id) {
-                    region.status = Some(RegionStatus::Deleted);
-                }
-            }
-
-            if let Some(reverse) = result.reverse_operation.clone() {
-                self.history.record(HistoryEntry::new(operation, reverse));
-            }
-        }
-
-        Ok(result)
-    }
-
-    /// Undoes the most recent operation.
-    pub async fn undo(&mut self) -> Result<Option<EditResult>> {
-        let Some(entry) = self.history.pop_undo() else {
-            return Ok(None);
-        };
-
-        let result = self.document.apply(&entry.reverse).await?;
-
-        if result.success {
-            for region in &result.created_regions {
-                self.region_cache.insert(region.id, region.clone());
-            }
-
-            for region in &result.modified_regions {
-                self.region_cache.insert(region.id, region.clone());
-            }
-
-            for id in &result.deleted_region_ids {
-                if let Some(region) = self.region_cache.get_mut(id) {
-                    region.status = Some(RegionStatus::Deleted);
-                }
-            }
-        }
-
-        Ok(Some(result))
-    }
-
-    /// Redoes the most recently undone operation.
-    pub async fn redo(&mut self) -> Result<Option<EditResult>> {
-        let Some(entry) = self.history.pop_redo() else {
-            return Ok(None);
-        };
-
-        let result = self.document.apply(&entry.operation).await?;
-
-        if result.success {
-            for region in &result.created_regions {
-                self.region_cache.insert(region.id, region.clone());
-            }
-
-            for region in &result.modified_regions {
-                self.region_cache.insert(region.id, region.clone());
-            }
-
-            for id in &result.deleted_region_ids {
-                if let Some(region) = self.region_cache.get_mut(id) {
-                    region.status = Some(RegionStatus::Deleted);
-                }
-            }
-        }
-
-        Ok(Some(result))
-    }
-
-    /// Loads regions for additional pages (streaming support).
-    pub async fn load_pages(&mut self, start_page: u32, count: u32) -> Result<()> {
-        let options = PageOptions {
+    /// Gets page options for a range of pages.
+    #[must_use]
+    pub fn page_options(&self, start_page: u32, count: u32) -> PageOptions {
+        PageOptions {
             start_page,
             page_count: Some(count),
             extract_regions: true,
-        };
-
-        let regions = self.document.extract_page_regions(&options).await?;
-
-        for region in regions {
-            if let Some(page) = region.page {
-                if !self.loaded_pages.contains(&page.get()) {
-                    self.loaded_pages.push(page.get());
-                }
-            }
-            self.region_cache.insert(region.id, region);
         }
-
-        self.loaded_pages.sort_unstable();
-
-        Ok(())
     }
 
     /// Serializes the document to bytes.
-    pub async fn serialize(&self) -> Result<Bytes> {
-        self.document.serialize().await
-    }
-
-    /// Returns whether the document has unsaved changes.
-    #[must_use]
-    pub fn is_modified(&self) -> bool {
-        self.document.is_modified()
+    pub async fn to_bytes(&self) -> Result<Bytes> {
+        self.document.to_bytes().await
     }
 
     /// Consumes the session and returns the underlying document.
@@ -398,8 +249,7 @@ mod tests {
     #[test]
     fn test_session_config_default() {
         let config = SessionConfig::default();
-        assert!(config.auto_extract_regions);
         assert_eq!(config.page_batch_size, 10);
-        assert!(config.validate_operations);
+        assert!(!config.track_history);
     }
 }
diff --git a/crates/nvisy-image/Cargo.toml b/crates/nvisy-image/Cargo.toml
new file mode 100644
index 0000000..d84ecb1
--- /dev/null
+++ b/crates/nvisy-image/Cargo.toml
@@ -0,0 +1,30 @@
+# https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[package]
+name = "nvisy-image"
+description = "Image format support for nvisy"
+readme = "./README.md"
+
+version = { workspace = true }
+rust-version = { workspace = true }
+edition = { workspace = true }
+license = { workspace = true }
+publish = { workspace = true }
+
+authors = { workspace = true }
+repository = { workspace = true }
+homepage = { workspace = true }
+documentation = { workspace = true }
+
+[package.metadata.docs.rs]
+all-features = true
+rustdoc-args = ["--cfg", "docsrs"]
+
+[dependencies]
+nvisy-document = { workspace = true }
+
+async-trait = { workspace = true }
+bytes = { workspace = true }
+thiserror = { workspace = true }
+
+[dev-dependencies]
diff --git a/crates/nvisy-image/README.md b/crates/nvisy-image/README.md
new file mode 100644
index 0000000..b5e1574
--- /dev/null
+++ b/crates/nvisy-image/README.md
@@ -0,0 +1,13 @@
+# nvisy-image
+
+Image format support for nvisy.
+
+This crate provides a `DocumentFormat` implementation for image files (PNG, JPEG, GIF, WebP, etc.).
+
+## Status
+
+This crate is currently a stub. Image parsing and manipulation are not yet implemented.
+
+## License
+
+MIT
diff --git a/crates/nvisy-image/src/document.rs b/crates/nvisy-image/src/document.rs
new file mode 100644
index 0000000..092b45c
--- /dev/null
+++ b/crates/nvisy-image/src/document.rs
@@ -0,0 +1,56 @@
+//! Image document implementation.
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
+
+/// A loaded image document.
+#[derive(Debug)]
+pub struct ImageDocument {
+    info: DocumentInfo,
+    regions: Vec<Region>,
+    #[allow(dead_code)]
+    data: Bytes,
+}
+
+impl ImageDocument {
+    /// Creates a new image document (internal use).
+    #[must_use]
+    #[allow(dead_code)] // Will be used when load() is implemented
+    pub(crate) fn new(info: DocumentInfo, data: Bytes) -> Self {
+        Self {
+            info,
+            regions: Vec::new(),
+            data,
+        }
+    }
+}
+
+#[async_trait]
+impl Document for ImageDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        self.regions
+            .iter()
+            .filter(|r| r.page.map(|p| p.get()) == Some(page))
+            .collect()
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        // TODO: Implement image serialization
+        Err(Error::unsupported_format(
+            "Image serialization not yet implemented",
+        ))
+    }
+}
diff --git a/crates/nvisy-image/src/format.rs b/crates/nvisy-image/src/format.rs
new file mode 100644
index 0000000..e479706
--- /dev/null
+++ b/crates/nvisy-image/src/format.rs
@@ -0,0 +1,82 @@
+//! Image format handler implementation.
+
+use bytes::Bytes;
+use nvisy_document::{Capabilities, DocumentFormat, Error, Result};
+
+use crate::ImageDocument;
+
+/// Image document format handler.
+#[derive(Debug, Clone, Default)]
+pub struct ImageFormat {
+    capabilities: Capabilities,
+}
+
+impl ImageFormat {
+    /// Creates a new image format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities::image(),
+        }
+    }
+}
+
+impl DocumentFormat for ImageFormat {
+    type Document = ImageDocument;
+
+    fn name(&self) -> &'static str {
+        "image"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &[
+            "image/png",
+            "image/jpeg",
+            "image/gif",
+            "image/webp",
+            "image/bmp",
+            "image/tiff",
+        ]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["png", "jpg", "jpeg", "gif", "webp", "bmp", "tiff", "tif"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, _data: Bytes) -> Result<Self::Document> {
+        // TODO: Implement image loading
+        Err(Error::unsupported_format(
+            "Image loading not yet implemented",
+        ))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = ImageFormat::new();
+        assert_eq!(format.name(), "image");
+        assert!(format.mime_types().contains(&"image/png"));
+        assert!(format.mime_types().contains(&"image/jpeg"));
+        assert!(format.extensions().contains(&"png"));
+        assert!(format.extensions().contains(&"jpg"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = ImageFormat::new();
+        let caps = format.capabilities();
+
+        assert!(!caps.text.can_extract);
+        assert!(caps.text.may_need_ocr);
+        assert!(!caps.structure.has_pages);
+        assert!(caps.metadata.can_extract); // EXIF support
+    }
+}
diff --git a/crates/nvisy-image/src/lib.rs b/crates/nvisy-image/src/lib.rs
new file mode 100644
index 0000000..4b9608e
--- /dev/null
+++ b/crates/nvisy-image/src/lib.rs
@@ -0,0 +1,9 @@
+#![forbid(unsafe_code)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc = include_str!("../README.md")]
+
+mod document;
+mod format;
+
+pub use document::ImageDocument;
+pub use format::ImageFormat;
diff --git a/crates/nvisy-pdf/Cargo.toml b/crates/nvisy-pdf/Cargo.toml
index 77348cd..c3ac1a0 100644
--- a/crates/nvisy-pdf/Cargo.toml
+++ b/crates/nvisy-pdf/Cargo.toml
@@ -2,20 +2,20 @@
 
 [package]
 name = "nvisy-pdf"
+description = "PDF document format support for nvisy"
+readme = "./README.md"
+
 version = { workspace = true }
 rust-version = { workspace = true }
 edition = { workspace = true }
 license = { workspace = true }
 publish = { workspace = true }
-readme = "./README.md"
 
 authors = { workspace = true }
 repository = { workspace = true }
 homepage = { workspace = true }
 documentation = { workspace = true }
 
-description = "PDF document format support for nvisy"
-
 [package.metadata.docs.rs]
 all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
diff --git a/crates/nvisy-pdf/src/document.rs b/crates/nvisy-pdf/src/document.rs
index dc0638b..71ad404 100644
--- a/crates/nvisy-pdf/src/document.rs
+++ b/crates/nvisy-pdf/src/document.rs
@@ -2,28 +2,26 @@
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{
-    Document, DocumentInfo, EditOperation, EditResult, EditableDocument, Error, PageOptions,
-    Region, RegionId, Result,
-};
+use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
 
 /// A loaded PDF document.
 #[derive(Debug)]
 pub struct PdfDocument {
     info: DocumentInfo,
     regions: Vec<Region>,
-    modified: bool,
+    #[allow(dead_code)]
+    data: Bytes,
 }
 
 impl PdfDocument {
     /// Creates a new PDF document (internal use).
     #[must_use]
     #[allow(dead_code)] // Will be used when load() is implemented
-    pub(crate) fn new(info: DocumentInfo) -> Self {
+    pub(crate) fn new(info: DocumentInfo, data: Bytes) -> Self {
         Self {
             info,
             regions: Vec::new(),
-            modified: false,
+            data,
         }
     }
 }
@@ -49,29 +47,10 @@ impl Document for PdfDocument {
         self.regions.iter().find(|r| r.id == id)
     }
 
-    async fn serialize(&self) -> Result<Bytes> {
+    async fn to_bytes(&self) -> Result<Bytes> {
         // TODO: Implement PDF serialization
         Err(Error::unsupported_format(
             "PDF serialization not yet implemented",
         ))
     }
 }
-
-#[async_trait]
-impl EditableDocument for PdfDocument {
-    async fn apply(&mut self, _operation: &EditOperation) -> Result<EditResult> {
-        // TODO: Implement PDF editing
-        Err(Error::unsupported_format("PDF editing not yet implemented"))
-    }
-
-    fn is_modified(&self) -> bool {
-        self.modified
-    }
-
-    async fn extract_page_regions(&mut self, _options: &PageOptions) -> Result<Vec<Region>> {
-        // TODO: Implement page region extraction
-        Err(Error::unsupported_format(
-            "PDF page extraction not yet implemented",
-        ))
-    }
-}
diff --git a/crates/nvisy-pdf/src/format.rs b/crates/nvisy-pdf/src/format.rs
index f48345a..f36167f 100644
--- a/crates/nvisy-pdf/src/format.rs
+++ b/crates/nvisy-pdf/src/format.rs
@@ -16,7 +16,7 @@ impl PdfFormat {
     #[must_use]
     pub fn new() -> Self {
         Self {
-            capabilities: Capabilities::read_only(),
+            capabilities: Capabilities::rich_document(),
         }
     }
 }
@@ -44,13 +44,6 @@ impl DocumentFormat for PdfFormat {
         // TODO: Implement PDF loading
         Err(Error::unsupported_format("PDF loading not yet implemented"))
     }
-
-    async fn create_empty(&self) -> Result<Self::Document> {
-        // TODO: Implement empty PDF creation
-        Err(Error::unsupported_format(
-            "PDF creation not yet implemented",
-        ))
-    }
 }
 
 #[cfg(test)]
@@ -64,4 +57,15 @@ mod tests {
         assert!(format.mime_types().contains(&"application/pdf"));
         assert!(format.extensions().contains(&"pdf"));
     }
+
+    #[test]
+    fn test_capabilities() {
+        let format = PdfFormat::new();
+        let caps = format.capabilities();
+
+        assert!(caps.text.can_extract);
+        assert!(caps.text.has_rich_text);
+        assert!(caps.structure.can_detect_tables);
+        assert!(caps.structure.has_pages);
+    }
 }
diff --git a/crates/nvisy-text/Cargo.toml b/crates/nvisy-text/Cargo.toml
index d653a15..80ab4ff 100644
--- a/crates/nvisy-text/Cargo.toml
+++ b/crates/nvisy-text/Cargo.toml
@@ -2,20 +2,20 @@
 
 [package]
 name = "nvisy-text"
+description = "Plain text document format support for nvisy"
+readme = "./README.md"
+
 version = { workspace = true }
 rust-version = { workspace = true }
 edition = { workspace = true }
 license = { workspace = true }
 publish = { workspace = true }
-readme = "./README.md"
 
 authors = { workspace = true }
 repository = { workspace = true }
 homepage = { workspace = true }
 documentation = { workspace = true }
 
-description = "Plain text document format support for nvisy"
-
 [package.metadata.docs.rs]
 all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
@@ -25,6 +25,11 @@ nvisy-document = { workspace = true }
 
 async-trait = { workspace = true }
 bytes = { workspace = true }
+csv = { workspace = true }
+markdown = { workspace = true }
+serde_json = { workspace = true }
 thiserror = { workspace = true }
 
 [dev-dependencies]
+tokio = { workspace = true, features = ["rt", "macros"] }
+tokio-test = { workspace = true }
diff --git a/crates/nvisy-text/README.md b/crates/nvisy-text/README.md
index f7b701a..10590d2 100644
--- a/crates/nvisy-text/README.md
+++ b/crates/nvisy-text/README.md
@@ -1,12 +1,104 @@
 # nvisy-text
 
-Plain text document format support for nvisy.
+Text-based document format support for nvisy.
 
-This crate provides a `DocumentFormat` implementation for plain text files (.txt, .md, .rst, etc.).
+This crate provides support for loading and extracting text from
+various text-based file formats:
 
-## Status
+- **Plain text** (`.txt`, `.text`)
+- **Markdown** (`.md`, `.markdown`, `.mdx`)
+- **JSON** (`.json`)
+- **CSV/TSV** (`.csv`, `.tsv`)
+- **XML** (`.xml`, `.xsd`, `.xsl`, `.xslt`, `.svg`, `.xhtml`, `.plist`)
+- **YAML** (`.yaml`, `.yml`)
+- **TOML** (`.toml`)
+- **INI** (`.ini`, `.cfg`, `.conf`, `.config`)
 
-This crate is currently a stub. Text document handling is not yet fully implemented.
+## Usage
+
+```rust
+use nvisy_text::{PlainTextFormat, PlainTextDocument};
+use nvisy_document::{DocumentFormat, Document, TextExtractor};
+use bytes::Bytes;
+
+# tokio_test::block_on(async {
+let format = PlainTextFormat::new();
+let data = Bytes::from("Hello, world!\n\nThis is a paragraph.");
+
+let doc = format.load(data).await.unwrap();
+assert_eq!(doc.regions().len(), 2);
+
+let text = doc.extract_text().await.unwrap();
+assert_eq!(text.word_count(), 6);
+# });
+```
+
+## Formats
+
+### Plain Text
+
+Basic plain text with paragraph detection.
+
+```rust
+use nvisy_text::PlainTextFormat;
+```
+
+### Markdown
+
+Full Markdown parsing using pulldown-cmark with support for headings, lists, code blocks, blockquotes, and more.
+
+```rust
+use nvisy_text::MarkdownFormat;
+```
+
+### JSON
+
+JSON parsing with structure detection using serde_json.
+
+```rust
+use nvisy_text::JsonFormat;
+```
+
+### CSV/TSV
+
+CSV and TSV parsing using the csv crate. Implements `TableExtractor` for structured table access.
+
+```rust
+use nvisy_text::CsvFormat;
+use nvisy_document::TableExtractor;
+```
+
+### XML
+
+XML parsing with hierarchical structure detection.
+
+```rust
+use nvisy_text::XmlFormat;
+```
+
+### YAML
+
+YAML parsing with list and key-value detection.
+
+```rust
+use nvisy_text::YamlFormat;
+```
+
+### TOML
+
+TOML parsing with section and array table detection.
+
+```rust
+use nvisy_text::TomlFormat;
+```
+
+### INI
+
+INI/config file parsing with section grouping.
+
+```rust
+use nvisy_text::IniFormat;
+```
 
 ## License
 
diff --git a/crates/nvisy-text/src/document.rs b/crates/nvisy-text/src/document.rs
deleted file mode 100644
index baf041b..0000000
--- a/crates/nvisy-text/src/document.rs
+++ /dev/null
@@ -1,79 +0,0 @@
-//! Plain text document implementation.
-
-use async_trait::async_trait;
-use bytes::Bytes;
-use nvisy_document::{
-    Document, DocumentInfo, EditOperation, EditResult, EditableDocument, Error, PageOptions,
-    Region, RegionId, Result,
-};
-
-/// A loaded plain text document.
-#[derive(Debug)]
-pub struct TextDocument {
-    info: DocumentInfo,
-    regions: Vec<Region>,
-    modified: bool,
-}
-
-impl TextDocument {
-    /// Creates a new text document (internal use).
-    #[must_use]
-    #[allow(dead_code)] // Will be used when load() is implemented
-    pub(crate) fn new(info: DocumentInfo) -> Self {
-        Self {
-            info,
-            regions: Vec::new(),
-            modified: false,
-        }
-    }
-}
-
-#[async_trait]
-impl Document for TextDocument {
-    fn info(&self) -> &DocumentInfo {
-        &self.info
-    }
-
-    fn regions(&self) -> &[Region] {
-        &self.regions
-    }
-
-    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
-        self.regions
-            .iter()
-            .filter(|r| r.page.map(|p| p.get()) == Some(page))
-            .collect()
-    }
-
-    fn find_region(&self, id: RegionId) -> Option<&Region> {
-        self.regions.iter().find(|r| r.id == id)
-    }
-
-    async fn serialize(&self) -> Result<Bytes> {
-        // TODO: Implement text serialization
-        Err(Error::unsupported_format(
-            "Text serialization not yet implemented",
-        ))
-    }
-}
-
-#[async_trait]
-impl EditableDocument for TextDocument {
-    async fn apply(&mut self, _operation: &EditOperation) -> Result<EditResult> {
-        // TODO: Implement text editing
-        Err(Error::unsupported_format(
-            "Text editing not yet implemented",
-        ))
-    }
-
-    fn is_modified(&self) -> bool {
-        self.modified
-    }
-
-    async fn extract_page_regions(&mut self, _options: &PageOptions) -> Result<Vec<Region>> {
-        // TODO: Implement page region extraction
-        Err(Error::unsupported_format(
-            "Text page extraction not yet implemented",
-        ))
-    }
-}
diff --git a/crates/nvisy-text/src/documents/csv.rs b/crates/nvisy-text/src/documents/csv.rs
new file mode 100644
index 0000000..d766a71
--- /dev/null
+++ b/crates/nvisy-text/src/documents/csv.rs
@@ -0,0 +1,355 @@
+//! CSV/TSV document type.
+
+use std::num::NonZeroU32;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use csv::{ReaderBuilder, Terminator};
+use nvisy_document::{
+    BoundingBox, Document, DocumentInfo, ExtractedText, NormalizedCell, NormalizedRow,
+    NormalizedTable, Region, RegionId, RegionKind, RegionSource, Result, TableExtractor,
+    TextExtractor,
+};
+
+/// A loaded CSV document.
+#[derive(Debug, Clone)]
+pub struct CsvDocument {
+    info: DocumentInfo,
+    content: String,
+    delimiter: u8,
+    headers: Vec<String>,
+    rows: Vec<Vec<String>>,
+    regions: Vec<Region>,
+    table_region_id: RegionId,
+}
+
+impl CsvDocument {
+    /// Creates a new CSV document from content.
+    #[must_use]
+    pub fn new(content: String, delimiter: u8) -> Self {
+        let (headers, rows) = Self::parse_csv(&content, delimiter);
+        let (regions, table_region_id) = Self::build_regions(&headers, &rows);
+        let size = content.len() as u64;
+        let info = DocumentInfo::new("text/csv", size).with_page_count(1);
+
+        Self {
+            info,
+            content,
+            delimiter,
+            headers,
+            rows,
+            regions,
+            table_region_id,
+        }
+    }
+
+    /// Creates a CSV document (comma-separated).
+    #[must_use]
+    pub fn csv(content: String) -> Self {
+        Self::new(content, b',')
+    }
+
+    /// Creates a TSV document (tab-separated).
+    #[must_use]
+    pub fn tsv(content: String) -> Self {
+        Self::new(content, b'\t')
+    }
+
+    /// Returns the raw content.
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    /// Returns the delimiter byte.
+    #[must_use]
+    pub fn delimiter(&self) -> u8 {
+        self.delimiter
+    }
+
+    /// Returns the headers.
+    #[must_use]
+    pub fn headers(&self) -> &[String] {
+        &self.headers
+    }
+
+    /// Returns the data rows (excluding headers).
+    #[must_use]
+    pub fn rows(&self) -> &[Vec<String>] {
+        &self.rows
+    }
+
+    /// Returns the number of columns.
+    #[must_use]
+    pub fn column_count(&self) -> usize {
+        self.headers.len()
+    }
+
+    /// Returns the number of data rows (excluding headers).
+    #[must_use]
+    pub fn row_count(&self) -> usize {
+        self.rows.len()
+    }
+
+    /// Gets a cell value by row and column index.
+    #[must_use]
+    pub fn get(&self, row: usize, col: usize) -> Option<&str> {
+        self.rows
+            .get(row)
+            .and_then(|r| r.get(col))
+            .map(|s| s.as_str())
+    }
+
+    /// Gets a cell value by row index and column name.
+    #[must_use]
+    pub fn get_by_name(&self, row: usize, col_name: &str) -> Option<&str> {
+        let col_idx = self.headers.iter().position(|h| h == col_name)?;
+        self.get(row, col_idx)
+    }
+
+    fn parse_csv(content: &str, delimiter: u8) -> (Vec<String>, Vec<Vec<String>>) {
+        let mut reader = ReaderBuilder::new()
+            .delimiter(delimiter)
+            .has_headers(true)
+            .flexible(true)
+            .trim(csv::Trim::All)
+            .terminator(Terminator::Any(b'\n'))
+            .from_reader(content.as_bytes());
+
+        let headers: Vec<String> = reader
+            .headers()
+            .map(|h| h.iter().map(String::from).collect())
+            .unwrap_or_default();
+
+        let rows: Vec<Vec<String>> = reader
+            .records()
+            .filter_map(|r| r.ok())
+            .map(|record| record.iter().map(String::from).collect())
+            .collect();
+
+        (headers, rows)
+    }
+
+    fn build_regions(headers: &[String], rows: &[Vec<String>]) -> (Vec<Region>, RegionId) {
+        let mut regions = Vec::new();
+        let total_rows = rows.len() + 1;
+        let row_height = 1.0 / total_rows.max(1) as f64;
+
+        // Create table container region
+        let table_region = Region::on_page(
+            NonZeroU32::new(1).unwrap(),
+            BoundingBox::new(0.0, 0.0, 1.0, 1.0),
+        )
+        .with_kind(RegionKind::Table)
+        .with_source(RegionSource::Parser);
+        let table_id = table_region.id;
+        regions.push(table_region);
+
+        // Header row
+        let header_text = headers.join(" | ");
+        regions.push(
+            Region::on_page(
+                NonZeroU32::new(1).unwrap(),
+                BoundingBox::new(0.0, 0.0, 1.0, row_height),
+            )
+            .with_text(header_text)
+            .with_kind(RegionKind::TableRow)
+            .with_source(RegionSource::Parser)
+            .with_parent(table_id),
+        );
+
+        // Data rows
+        for (i, row) in rows.iter().enumerate() {
+            let y = (i + 1) as f64 * row_height;
+            let row_text = row.join(" | ");
+            regions.push(
+                Region::on_page(
+                    NonZeroU32::new(1).unwrap(),
+                    BoundingBox::new(0.0, y, 1.0, row_height),
+                )
+                .with_text(row_text)
+                .with_kind(RegionKind::TableRow)
+                .with_source(RegionSource::Parser)
+                .with_parent(table_id),
+            );
+        }
+
+        (regions, table_id)
+    }
+
+    /// Builds a normalized table from this CSV document.
+    fn build_table(&self) -> NormalizedTable {
+        let mut table = NormalizedTable::new(self.table_region_id)
+            .with_column_count(self.headers.len())
+            .with_header_rows(1);
+
+        // Header row
+        let mut header_row = NormalizedRow::header();
+        for h in &self.headers {
+            header_row.add_text(h);
+        }
+        table.add_row(header_row);
+
+        // Data rows
+        for row_data in &self.rows {
+            let mut row = NormalizedRow::new();
+            for value in row_data {
+                row.add_cell(NormalizedCell::text(value));
+            }
+            table.add_row(row);
+        }
+
+        table
+    }
+}
+
+#[async_trait]
+impl Document for CsvDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        if page == 1 {
+            self.regions.iter().collect()
+        } else {
+            Vec::new()
+        }
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        Ok(Bytes::from(self.content.clone()))
+    }
+}
+
+#[async_trait]
+impl TextExtractor for CsvDocument {
+    async fn extract_text(&self) -> Result<ExtractedText> {
+        let mut text = String::new();
+
+        text.push_str(&self.headers.join(" | "));
+        text.push('\n');
+        text.push_str(&"-".repeat(self.headers.iter().map(|h| h.len() + 3).sum::<usize>()));
+        text.push('\n');
+
+        for row in &self.rows {
+            text.push_str(&row.join(" | "));
+            text.push('\n');
+        }
+
+        let mut extracted = ExtractedText::from_raw(&text).with_page(1, &text);
+
+        for region in &self.regions {
+            if let Some(t) = &region.text {
+                extracted = extracted.with_region(region.id, t);
+            }
+        }
+
+        Ok(extracted)
+    }
+
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>> {
+        if page == 1 {
+            let text = self.extract_text().await?;
+            Ok(Some(text.raw))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn needs_ocr(&self) -> bool {
+        false
+    }
+}
+
+#[async_trait]
+impl TableExtractor for CsvDocument {
+    async fn extract_tables(&self) -> Result<Vec<NormalizedTable>> {
+        Ok(vec![self.build_table()])
+    }
+
+    async fn extract_table(&self, region_id: RegionId) -> Result<Option<NormalizedTable>> {
+        if region_id == self.table_region_id {
+            Ok(Some(self.build_table()))
+        } else {
+            Ok(None)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_simple_csv() {
+        let csv = "name,age,city\nAlice,30,NYC\nBob,25,LA";
+        let doc = CsvDocument::csv(csv.to_string());
+
+        assert_eq!(doc.headers(), &["name", "age", "city"]);
+        assert_eq!(doc.row_count(), 2);
+        assert_eq!(doc.get(0, 0), Some("Alice"));
+        assert_eq!(doc.get(1, 1), Some("25"));
+    }
+
+    #[test]
+    fn test_parse_quoted_fields() {
+        let csv = r#"name,description
+"Smith, John","A ""great"" person"
+Bob,Simple value"#;
+        let doc = CsvDocument::csv(csv.to_string());
+
+        assert_eq!(doc.get(0, 0), Some("Smith, John"));
+        assert_eq!(doc.get(0, 1), Some(r#"A "great" person"#));
+    }
+
+    #[test]
+    fn test_get_by_name() {
+        let csv = "name,age,city\nAlice,30,NYC";
+        let doc = CsvDocument::csv(csv.to_string());
+
+        assert_eq!(doc.get_by_name(0, "name"), Some("Alice"));
+        assert_eq!(doc.get_by_name(0, "age"), Some("30"));
+        assert_eq!(doc.get_by_name(0, "unknown"), None);
+    }
+
+    #[tokio::test]
+    async fn test_table_extraction() {
+        let csv = "name,age\nAlice,30\nBob,25";
+        let doc = CsvDocument::csv(csv.to_string());
+        let tables = doc.extract_tables().await.unwrap();
+
+        assert_eq!(tables.len(), 1);
+        assert_eq!(tables[0].column_count, 2);
+        assert_eq!(tables[0].row_count(), 3); // 1 header + 2 data
+    }
+
+    #[test]
+    fn test_tsv_parsing() {
+        let tsv = "name\tage\tcity\nAlice\t30\tNYC\nBob\t25\tLA";
+        let doc = CsvDocument::tsv(tsv.to_string());
+
+        assert_eq!(doc.headers(), &["name", "age", "city"]);
+        assert_eq!(doc.row_count(), 2);
+        assert_eq!(doc.get(0, 0), Some("Alice"));
+    }
+
+    #[test]
+    fn test_flexible_columns() {
+        // csv crate's flexible mode handles rows with different column counts
+        let csv = "a,b,c\n1,2\n1,2,3,4";
+        let doc = CsvDocument::csv(csv.to_string());
+
+        assert_eq!(doc.headers(), &["a", "b", "c"]);
+        assert_eq!(doc.rows()[0].len(), 2);
+        assert_eq!(doc.rows()[1].len(), 4);
+    }
+}
diff --git a/crates/nvisy-text/src/documents/ini.rs b/crates/nvisy-text/src/documents/ini.rs
new file mode 100644
index 0000000..91c1b3d
--- /dev/null
+++ b/crates/nvisy-text/src/documents/ini.rs
@@ -0,0 +1,229 @@
+//! INI document type.
+
+use std::num::NonZeroU32;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nvisy_document::{
+    BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
+    Result, TextExtractor,
+};
+
+/// A loaded INI document.
+#[derive(Debug, Clone)]
+pub struct IniDocument {
+    info: DocumentInfo,
+    content: String,
+    regions: Vec<Region>,
+}
+
+impl IniDocument {
+    /// Creates a new INI document from content.
+    #[must_use]
+    pub fn new(content: String) -> Self {
+        let regions = Self::parse_regions(&content);
+        let size = content.len() as u64;
+        let info = DocumentInfo::new("text/plain", size).with_page_count(1);
+
+        Self {
+            info,
+            content,
+            regions,
+        }
+    }
+
+    /// Returns the raw content.
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    fn parse_regions(content: &str) -> Vec<Region> {
+        let mut regions = Vec::new();
+        let total_len = content.len().max(1) as f64;
+        let mut pos = 0_usize;
+        let mut current_section: Option<RegionId> = None;
+
+        for line in content.lines() {
+            let line_start = pos;
+            let line_end = pos + line.len();
+            let trimmed = line.trim();
+
+            if trimmed.is_empty() {
+                pos = line_end + 1;
+                continue;
+            }
+
+            let y_start = line_start as f64 / total_len;
+            let height = ((line_end - line_start) as f64 / total_len).max(0.02);
+
+            // Section headers: [section]
+            if trimmed.starts_with('[') && trimmed.ends_with(']') {
+                let section = Region::on_page(
+                    NonZeroU32::new(1).unwrap(),
+                    BoundingBox::new(0.0, y_start, 1.0, height),
+                )
+                .with_text(trimmed)
+                .with_kind(RegionKind::Heading)
+                .with_source(RegionSource::Parser);
+                current_section = Some(section.id);
+                regions.push(section);
+            } else if trimmed.starts_with('#') || trimmed.starts_with(';') {
+                // Comments (both # and ; style)
+                regions.push(
+                    Region::on_page(
+                        NonZeroU32::new(1).unwrap(),
+                        BoundingBox::new(0.02, y_start, 0.98, height),
+                    )
+                    .with_text(trimmed)
+                    .with_kind(RegionKind::Annotation)
+                    .with_source(RegionSource::Parser),
+                );
+            } else {
+                // Key-value pairs
+                let mut region = Region::on_page(
+                    NonZeroU32::new(1).unwrap(),
+                    BoundingBox::new(0.02, y_start, 0.98, height),
+                )
+                .with_text(trimmed)
+                .with_kind(RegionKind::Code)
+                .with_source(RegionSource::Parser);
+
+                if let Some(parent) = current_section {
+                    region = region.with_parent(parent);
+                }
+                regions.push(region);
+            }
+
+            pos = line_end + 1;
+        }
+
+        regions
+    }
+}
+
+#[async_trait]
+impl Document for IniDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        if page == 1 {
+            self.regions.iter().collect()
+        } else {
+            Vec::new()
+        }
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        Ok(Bytes::from(self.content.clone()))
+    }
+}
+
+#[async_trait]
+impl TextExtractor for IniDocument {
+    async fn extract_text(&self) -> Result<ExtractedText> {
+        let mut extracted = ExtractedText::from_raw(&self.content).with_page(1, &self.content);
+
+        for region in &self.regions {
+            if let Some(text) = &region.text {
+                extracted = extracted.with_region(region.id, text);
+            }
+        }
+
+        Ok(extracted)
+    }
+
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>> {
+        if page == 1 {
+            Ok(Some(self.content.clone()))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn needs_ocr(&self) -> bool {
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_simple_ini() {
+        let ini = "[section]\nkey=value\nfoo=bar";
+        let doc = IniDocument::new(ini.to_string());
+
+        let sections: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Heading)
+            .collect();
+        assert_eq!(sections.len(), 1);
+        assert_eq!(sections[0].text.as_deref(), Some("[section]"));
+    }
+
+    #[test]
+    fn test_multiple_sections() {
+        let ini = "[section1]\nkey1=value1\n\n[section2]\nkey2=value2";
+        let doc = IniDocument::new(ini.to_string());
+
+        let sections: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Heading)
+            .collect();
+        assert_eq!(sections.len(), 2);
+    }
+
+    #[test]
+    fn test_parse_comments() {
+        let ini = "; Comment style 1\n# Comment style 2\nkey=value";
+        let doc = IniDocument::new(ini.to_string());
+
+        let comments: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Annotation)
+            .collect();
+        assert_eq!(comments.len(), 2);
+    }
+
+    #[test]
+    fn test_parent_child_relationship() {
+        let ini = "[section]\nkey=value";
+        let doc = IniDocument::new(ini.to_string());
+
+        let section = doc
+            .regions()
+            .iter()
+            .find(|r| r.kind == RegionKind::Heading)
+            .unwrap();
+        let key_value = doc
+            .regions()
+            .iter()
+            .find(|r| r.kind == RegionKind::Code)
+            .unwrap();
+
+        assert_eq!(key_value.parent, Some(section.id));
+    }
+
+    #[tokio::test]
+    async fn test_text_extraction() {
+        let ini = "[section]\nkey=value";
+        let doc = IniDocument::new(ini.to_string());
+        let text = doc.extract_text().await.unwrap();
+        assert!(text.raw.contains("key=value"));
+    }
+}
diff --git a/crates/nvisy-text/src/documents/json.rs b/crates/nvisy-text/src/documents/json.rs
new file mode 100644
index 0000000..e7c6603
--- /dev/null
+++ b/crates/nvisy-text/src/documents/json.rs
@@ -0,0 +1,261 @@
+//! JSON document type.
+
+use std::num::NonZeroU32;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nvisy_document::{
+    BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
+    Result, TextExtractor,
+};
+use serde_json::Value;
+
+/// A loaded JSON document.
+#[derive(Debug, Clone)]
+pub struct JsonDocument {
+    info: DocumentInfo,
+    content: String,
+    parsed: Value,
+    regions: Vec<Region>,
+}
+
+impl JsonDocument {
+    /// Creates a new JSON document from content.
+    pub fn new(content: String) -> Result<Self> {
+        let parsed: Value = serde_json::from_str(&content)
+            .map_err(|e| nvisy_document::Error::parse(format!("Invalid JSON: {e}")))?;
+
+        let regions = Self::extract_regions(&parsed);
+        let size = content.len() as u64;
+        let info = DocumentInfo::new("application/json", size).with_page_count(1);
+
+        Ok(Self {
+            info,
+            content,
+            parsed,
+            regions,
+        })
+    }
+
+    /// Returns the raw JSON content.
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    /// Returns the parsed JSON value.
+    #[must_use]
+    pub fn value(&self) -> &Value {
+        &self.parsed
+    }
+
+    /// Returns the JSON pretty-printed.
+    #[must_use]
+    pub fn pretty(&self) -> String {
+        serde_json::to_string_pretty(&self.parsed).unwrap_or_else(|_| self.content.clone())
+    }
+
+    fn extract_regions(value: &Value) -> Vec<Region> {
+        let mut regions = Vec::new();
+        Self::extract_regions_recursive(value, "", &mut regions, 0);
+        regions
+    }
+
+    fn extract_regions_recursive(
+        value: &Value,
+        path: &str,
+        regions: &mut Vec<Region>,
+        depth: usize,
+    ) {
+        let y_pos = regions.len() as f64 * 0.05;
+        let indent = depth as f64 * 0.02;
+
+        match value {
+            Value::Object(map) => {
+                let text = if path.is_empty() {
+                    "{...}".to_string()
+                } else {
+                    format!("{path}: {{...}}")
+                };
+
+                regions.push(
+                    Region::on_page(
+                        NonZeroU32::new(1).unwrap(),
+                        BoundingBox::new(indent, y_pos, 1.0 - indent, 0.03),
+                    )
+                    .with_text(text)
+                    .with_kind(RegionKind::Code)
+                    .with_source(RegionSource::Parser),
+                );
+
+                for (key, val) in map {
+                    let new_path = if path.is_empty() {
+                        key.clone()
+                    } else {
+                        format!("{path}.{key}")
+                    };
+                    Self::extract_regions_recursive(val, &new_path, regions, depth + 1);
+                }
+            }
+            Value::Array(arr) => {
+                let text = if path.is_empty() {
+                    format!("[{} items]", arr.len())
+                } else {
+                    format!("{path}: [{} items]", arr.len())
+                };
+
+                regions.push(
+                    Region::on_page(
+                        NonZeroU32::new(1).unwrap(),
+                        BoundingBox::new(indent, y_pos, 1.0 - indent, 0.03),
+                    )
+                    .with_text(text)
+                    .with_kind(RegionKind::Code)
+                    .with_source(RegionSource::Parser),
+                );
+
+                for (i, val) in arr.iter().enumerate() {
+                    let new_path = format!("{path}[{i}]");
+                    Self::extract_regions_recursive(val, &new_path, regions, depth + 1);
+                }
+            }
+            Value::String(s) => {
+                let text = format!("{path}: \"{s}\"");
+                regions.push(
+                    Region::on_page(
+                        NonZeroU32::new(1).unwrap(),
+                        BoundingBox::new(indent, y_pos, 1.0 - indent, 0.03),
+                    )
+                    .with_text(text)
+                    .with_kind(RegionKind::Code)
+                    .with_source(RegionSource::Parser),
+                );
+            }
+            Value::Number(n) => {
+                let text = format!("{path}: {n}");
+                regions.push(
+                    Region::on_page(
+                        NonZeroU32::new(1).unwrap(),
+                        BoundingBox::new(indent, y_pos, 1.0 - indent, 0.03),
+                    )
+                    .with_text(text)
+                    .with_kind(RegionKind::Code)
+                    .with_source(RegionSource::Parser),
+                );
+            }
+            Value::Bool(b) => {
+                let text = format!("{path}: {b}");
+                regions.push(
+                    Region::on_page(
+                        NonZeroU32::new(1).unwrap(),
+                        BoundingBox::new(indent, y_pos, 1.0 - indent, 0.03),
+                    )
+                    .with_text(text)
+                    .with_kind(RegionKind::Code)
+                    .with_source(RegionSource::Parser),
+                );
+            }
+            Value::Null => {
+                let text = format!("{path}: null");
+                regions.push(
+                    Region::on_page(
+                        NonZeroU32::new(1).unwrap(),
+                        BoundingBox::new(indent, y_pos, 1.0 - indent, 0.03),
+                    )
+                    .with_text(text)
+                    .with_kind(RegionKind::Code)
+                    .with_source(RegionSource::Parser),
+                );
+            }
+        }
+    }
+}
+
+#[async_trait]
+impl Document for JsonDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        if page == 1 {
+            self.regions.iter().collect()
+        } else {
+            Vec::new()
+        }
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        Ok(Bytes::from(self.content.clone()))
+    }
+}
+
+#[async_trait]
+impl TextExtractor for JsonDocument {
+    async fn extract_text(&self) -> Result<ExtractedText> {
+        let pretty = self.pretty();
+        let mut extracted = ExtractedText::from_raw(&pretty).with_page(1, &pretty);
+
+        for region in &self.regions {
+            if let Some(text) = &region.text {
+                extracted = extracted.with_region(region.id, text);
+            }
+        }
+
+        Ok(extracted)
+    }
+
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>> {
+        if page == 1 {
+            Ok(Some(self.pretty()))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn needs_ocr(&self) -> bool {
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_simple_json() {
+        let json = r#"{"name": "test", "value": 42}"#;
+        let doc = JsonDocument::new(json.to_string()).unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[test]
+    fn test_parse_nested_json() {
+        let json = r#"{"user": {"name": "Alice", "age": 30}, "active": true}"#;
+        let doc = JsonDocument::new(json.to_string()).unwrap();
+        assert!(doc.regions().len() > 3);
+    }
+
+    #[test]
+    fn test_invalid_json() {
+        let invalid = "not valid json {";
+        let result = JsonDocument::new(invalid.to_string());
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_text_extraction() {
+        let json = r#"{"message": "Hello, World!"}"#;
+        let doc = JsonDocument::new(json.to_string()).unwrap();
+        let extracted = doc.extract_text().await.unwrap();
+        assert!(extracted.raw.contains("Hello, World!"));
+    }
+}
diff --git a/crates/nvisy-text/src/documents/markdown.rs b/crates/nvisy-text/src/documents/markdown.rs
new file mode 100644
index 0000000..c30720f
--- /dev/null
+++ b/crates/nvisy-text/src/documents/markdown.rs
@@ -0,0 +1,343 @@
+//! Markdown document type.
+
+use std::num::NonZeroU32;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use markdown::{ParseOptions, mdast::Node, to_mdast};
+use nvisy_document::{
+    BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
+    Result, TextExtractor,
+};
+
+/// A loaded Markdown document.
+#[derive(Debug, Clone)]
+pub struct MarkdownDocument {
+    info: DocumentInfo,
+    content: String,
+    regions: Vec<Region>,
+    plain_text: String,
+}
+
+impl MarkdownDocument {
+    /// Creates a new Markdown document from content.
+    #[must_use]
+    pub fn new(content: String) -> Self {
+        let (regions, plain_text) = Self::parse_content(&content);
+        let size = content.len() as u64;
+        let info = DocumentInfo::new("text/markdown", size).with_page_count(1);
+
+        Self {
+            info,
+            content,
+            regions,
+            plain_text,
+        }
+    }
+
+    /// Returns the raw Markdown content.
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    /// Returns the extracted plain text.
+    #[must_use]
+    pub fn plain_text(&self) -> &str {
+        &self.plain_text
+    }
+
+    fn parse_content(content: &str) -> (Vec<Region>, String) {
+        let mut regions = Vec::new();
+        let mut plain_text = String::new();
+        let mut region_count = 0;
+
+        let options = ParseOptions::gfm();
+        if let Ok(ast) = to_mdast(content, &options) {
+            Self::process_node(&ast, &mut regions, &mut plain_text, &mut region_count);
+        }
+
+        (regions, plain_text.trim().to_string())
+    }
+
+    fn process_node(
+        node: &Node,
+        regions: &mut Vec<Region>,
+        plain_text: &mut String,
+        region_count: &mut usize,
+    ) {
+        match node {
+            Node::Root(root) => {
+                for child in &root.children {
+                    Self::process_node(child, regions, plain_text, region_count);
+                }
+            }
+            Node::Heading(heading) => {
+                let text = Self::extract_text_from_children(&heading.children);
+                if !text.is_empty() {
+                    Self::add_region(regions, RegionKind::Heading, &text, region_count);
+                    plain_text.push_str(&text);
+                    plain_text.push('\n');
+                }
+            }
+            Node::Paragraph(para) => {
+                let text = Self::extract_text_from_children(&para.children);
+                if !text.is_empty() {
+                    Self::add_region(regions, RegionKind::Text, &text, region_count);
+                    plain_text.push_str(&text);
+                    plain_text.push('\n');
+                }
+            }
+            Node::Code(code) => {
+                if !code.value.is_empty() {
+                    Self::add_region(regions, RegionKind::Code, &code.value, region_count);
+                    plain_text.push_str(&code.value);
+                    plain_text.push('\n');
+                }
+            }
+            Node::Blockquote(bq) => {
+                let text = Self::extract_text_from_children(&bq.children);
+                if !text.is_empty() {
+                    Self::add_region(regions, RegionKind::Quote, &text, region_count);
+                    plain_text.push_str(&text);
+                    plain_text.push('\n');
+                }
+            }
+            Node::List(list) => {
+                for item in &list.children {
+                    Self::process_node(item, regions, plain_text, region_count);
+                }
+            }
+            Node::ListItem(item) => {
+                let text = Self::extract_text_from_children(&item.children);
+                if !text.is_empty() {
+                    Self::add_region(regions, RegionKind::ListItem, &text, region_count);
+                    plain_text.push_str(&text);
+                    plain_text.push('\n');
+                }
+            }
+            Node::Table(table) => {
+                let text = Self::extract_text_from_children(&table.children);
+                if !text.is_empty() {
+                    Self::add_region(regions, RegionKind::Table, &text, region_count);
+                    plain_text.push_str(&text);
+                    plain_text.push('\n');
+                }
+            }
+            Node::Link(link) => {
+                let text = Self::extract_text_from_children(&link.children);
+                if !text.is_empty() {
+                    Self::add_region(regions, RegionKind::Link, &text, region_count);
+                    plain_text.push_str(&text);
+                }
+            }
+            Node::Image(img) => {
+                if !img.alt.is_empty() {
+                    Self::add_region(regions, RegionKind::Image, &img.alt, region_count);
+                    plain_text.push_str(&img.alt);
+                }
+            }
+            Node::ThematicBreak(_) => {
+                plain_text.push_str("\n---\n");
+            }
+            _ => {
+                // Process any children for other node types
+                if let Some(children) = node.children() {
+                    for child in children {
+                        Self::process_node(child, regions, plain_text, region_count);
+                    }
+                }
+            }
+        }
+    }
+
+    fn extract_text_from_children(children: &[Node]) -> String {
+        let mut text = String::new();
+        for child in children {
+            Self::extract_text_recursive(child, &mut text);
+        }
+        text.trim().to_string()
+    }
+
+    fn extract_text_recursive(node: &Node, text: &mut String) {
+        match node {
+            Node::Text(t) => text.push_str(&t.value),
+            Node::InlineCode(c) => text.push_str(&c.value),
+            Node::Code(c) => text.push_str(&c.value),
+            Node::Break(_) => text.push(' '),
+            _ => {
+                if let Some(children) = node.children() {
+                    for child in children {
+                        Self::extract_text_recursive(child, text);
+                    }
+                }
+            }
+        }
+    }
+
+    fn add_region(
+        regions: &mut Vec<Region>,
+        kind: RegionKind,
+        text: &str,
+        region_count: &mut usize,
+    ) {
+        let y_start = (*region_count as f64) * 0.05;
+        let height = 0.04;
+
+        regions.push(
+            Region::on_page(
+                NonZeroU32::new(1).unwrap(),
+                BoundingBox::new(0.0, y_start.min(0.95), 1.0, height),
+            )
+            .with_text(text.to_string())
+            .with_kind(kind)
+            .with_source(RegionSource::Parser),
+        );
+
+        *region_count += 1;
+    }
+}
+
+#[async_trait]
+impl Document for MarkdownDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        if page == 1 {
+            self.regions.iter().collect()
+        } else {
+            Vec::new()
+        }
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        Ok(Bytes::from(self.content.clone()))
+    }
+}
+
+#[async_trait]
+impl TextExtractor for MarkdownDocument {
+    async fn extract_text(&self) -> Result<ExtractedText> {
+        let mut extracted =
+            ExtractedText::from_raw(&self.plain_text).with_page(1, &self.plain_text);
+
+        for region in &self.regions {
+            if let Some(text) = &region.text {
+                extracted = extracted.with_region(region.id, text);
+            }
+        }
+
+        Ok(extracted)
+    }
+
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>> {
+        if page == 1 {
+            Ok(Some(self.plain_text.clone()))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn needs_ocr(&self) -> bool {
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_headings() {
+        let md = "# Title\n\n## Subtitle\n\nParagraph text.";
+        let doc = MarkdownDocument::new(md.to_string());
+
+        let headings: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Heading)
+            .collect();
+
+        assert_eq!(headings.len(), 2);
+        assert_eq!(headings[0].text.as_deref(), Some("Title"));
+        assert_eq!(headings[1].text.as_deref(), Some("Subtitle"));
+    }
+
+    #[test]
+    fn test_parse_list_items() {
+        let md = "- Item 1\n- Item 2\n\n1. Numbered\n2. List";
+        let doc = MarkdownDocument::new(md.to_string());
+
+        let list_items: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::ListItem)
+            .collect();
+
+        assert_eq!(list_items.len(), 4);
+    }
+
+    #[test]
+    fn test_code_block() {
+        let md = "# Title\n\n```rust\nfn main() {}\n```\n\nText.";
+        let doc = MarkdownDocument::new(md.to_string());
+
+        let code: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Code)
+            .collect();
+
+        assert_eq!(code.len(), 1);
+        assert!(code[0].text.as_deref().unwrap().contains("fn main()"));
+    }
+
+    #[test]
+    fn test_blockquote() {
+        let md = "> This is a quote\n\nNormal text.";
+        let doc = MarkdownDocument::new(md.to_string());
+
+        let quotes: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Quote)
+            .collect();
+
+        assert_eq!(quotes.len(), 1);
+        assert_eq!(quotes[0].text.as_deref(), Some("This is a quote"));
+    }
+
+    #[tokio::test]
+    async fn test_text_extraction() {
+        let md = "# Hello\n\nThis is **bold** text.";
+        let doc = MarkdownDocument::new(md.to_string());
+        let extracted = doc.extract_text().await.unwrap();
+
+        assert!(extracted.raw.contains("Hello"));
+        assert!(extracted.raw.contains("bold"));
+        // Formatting should be stripped
+        assert!(!extracted.raw.contains("**"));
+    }
+
+    #[test]
+    fn test_plain_text() {
+        let md = "# Title\n\nParagraph with **bold** and *italic*.";
+        let doc = MarkdownDocument::new(md.to_string());
+
+        assert!(doc.plain_text().contains("Title"));
+        assert!(doc.plain_text().contains("bold"));
+        assert!(doc.plain_text().contains("italic"));
+        assert!(!doc.plain_text().contains("**"));
+        assert!(!doc.plain_text().contains("*italic*"));
+    }
+}
diff --git a/crates/nvisy-text/src/documents/mod.rs b/crates/nvisy-text/src/documents/mod.rs
new file mode 100644
index 0000000..aef064e
--- /dev/null
+++ b/crates/nvisy-text/src/documents/mod.rs
@@ -0,0 +1,19 @@
+//! Text-based document types.
+
+mod csv;
+mod ini;
+mod json;
+mod markdown;
+mod plain;
+mod toml;
+mod xml;
+mod yaml;
+
+pub use self::csv::CsvDocument;
+pub use self::ini::IniDocument;
+pub use self::json::JsonDocument;
+pub use self::markdown::MarkdownDocument;
+pub use self::plain::PlainTextDocument;
+pub use self::toml::TomlDocument;
+pub use self::xml::XmlDocument;
+pub use self::yaml::YamlDocument;
diff --git a/crates/nvisy-text/src/documents/plain.rs b/crates/nvisy-text/src/documents/plain.rs
new file mode 100644
index 0000000..e11caa1
--- /dev/null
+++ b/crates/nvisy-text/src/documents/plain.rs
@@ -0,0 +1,207 @@
+//! Plain text document type.
+
+use std::num::NonZeroU32;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nvisy_document::{
+    BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
+    Result, TextExtractor,
+};
+
+/// A loaded plain text document.
+#[derive(Debug, Clone)]
+pub struct PlainTextDocument {
+    info: DocumentInfo,
+    content: String,
+    regions: Vec<Region>,
+}
+
+impl PlainTextDocument {
+    /// Creates a new plain text document from content.
+    #[must_use]
+    pub fn new(content: String) -> Self {
+        let regions = Self::parse_regions(&content);
+        let size = content.len() as u64;
+        let info = DocumentInfo::new("text/plain", size).with_page_count(1);
+
+        Self {
+            info,
+            content,
+            regions,
+        }
+    }
+
+    /// Creates an empty plain text document.
+    #[must_use]
+    pub fn empty() -> Self {
+        Self::new(String::new())
+    }
+
+    /// Returns the raw text content.
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    /// Parses text content into regions (paragraphs).
+    fn parse_regions(content: &str) -> Vec<Region> {
+        let mut regions = Vec::new();
+        let total_len = content.len().max(1) as f64;
+
+        let mut current_pos = 0_usize;
+        let mut paragraph_start = 0_usize;
+        let mut in_paragraph = false;
+
+        for (i, c) in content.char_indices() {
+            if c == '\n' {
+                let next_char = content[i + 1..].chars().next();
+                if next_char == Some('\n') || next_char.is_none() {
+                    if in_paragraph && paragraph_start < i {
+                        let text = content[paragraph_start..i].trim();
+                        if !text.is_empty() {
+                            let region =
+                                Self::create_paragraph_region(text, paragraph_start, i, total_len);
+                            regions.push(region);
+                        }
+                    }
+                    in_paragraph = false;
+                } else if !in_paragraph {
+                    paragraph_start = i + 1;
+                    in_paragraph = true;
+                }
+            } else if !in_paragraph {
+                paragraph_start = i;
+                in_paragraph = true;
+            }
+            current_pos = i + c.len_utf8();
+        }
+
+        if in_paragraph && paragraph_start < current_pos {
+            let text = content[paragraph_start..].trim();
+            if !text.is_empty() {
+                let region =
+                    Self::create_paragraph_region(text, paragraph_start, current_pos, total_len);
+                regions.push(region);
+            }
+        }
+
+        if regions.is_empty() && !content.trim().is_empty() {
+            let region = Self::create_paragraph_region(content.trim(), 0, content.len(), total_len);
+            regions.push(region);
+        }
+
+        regions
+    }
+
+    fn create_paragraph_region(
+        text: &str,
+        start_byte: usize,
+        end_byte: usize,
+        total_len: f64,
+    ) -> Region {
+        let y_start = start_byte as f64 / total_len;
+        let y_end = end_byte as f64 / total_len;
+        let height = (y_end - y_start).max(0.01);
+
+        Region::on_page(
+            NonZeroU32::new(1).unwrap(),
+            BoundingBox::new(0.0, y_start, 1.0, height),
+        )
+        .with_text(text)
+        .with_kind(RegionKind::Text)
+        .with_source(RegionSource::Parser)
+    }
+}
+
+#[async_trait]
+impl Document for PlainTextDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        if page == 1 {
+            self.regions.iter().collect()
+        } else {
+            Vec::new()
+        }
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        Ok(Bytes::from(self.content.clone()))
+    }
+}
+
+#[async_trait]
+impl TextExtractor for PlainTextDocument {
+    async fn extract_text(&self) -> Result<ExtractedText> {
+        let mut extracted = ExtractedText::from_raw(&self.content).with_page(1, &self.content);
+
+        for region in &self.regions {
+            if let Some(text) = &region.text {
+                extracted = extracted.with_region(region.id, text);
+            }
+        }
+
+        Ok(extracted)
+    }
+
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>> {
+        if page == 1 {
+            Ok(Some(self.content.clone()))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn needs_ocr(&self) -> bool {
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_new_document() {
+        let doc = PlainTextDocument::new("Hello, world!".to_string());
+        assert_eq!(doc.content(), "Hello, world!");
+        assert_eq!(doc.regions().len(), 1);
+    }
+
+    #[test]
+    fn test_empty_document() {
+        let doc = PlainTextDocument::empty();
+        assert!(doc.content().is_empty());
+        assert!(doc.regions().is_empty());
+    }
+
+    #[test]
+    fn test_paragraph_parsing() {
+        let content = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph.";
+        let doc = PlainTextDocument::new(content.to_string());
+
+        assert_eq!(doc.regions().len(), 3);
+        assert_eq!(doc.regions()[0].text.as_deref(), Some("First paragraph."));
+        assert_eq!(doc.regions()[1].text.as_deref(), Some("Second paragraph."));
+        assert_eq!(doc.regions()[2].text.as_deref(), Some("Third paragraph."));
+    }
+
+    #[tokio::test]
+    async fn test_to_bytes() {
+        let content = "Hello, world!";
+        let doc = PlainTextDocument::new(content.to_string());
+        let bytes = doc.to_bytes().await.unwrap();
+        assert_eq!(bytes.as_ref(), content.as_bytes());
+    }
+}
diff --git a/crates/nvisy-text/src/documents/toml.rs b/crates/nvisy-text/src/documents/toml.rs
new file mode 100644
index 0000000..f5ae371
--- /dev/null
+++ b/crates/nvisy-text/src/documents/toml.rs
@@ -0,0 +1,210 @@
+//! TOML document type.
+
+use std::num::NonZeroU32;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nvisy_document::{
+    BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
+    Result, TextExtractor,
+};
+
+/// A loaded TOML document.
+#[derive(Debug, Clone)]
+pub struct TomlDocument {
+    info: DocumentInfo,
+    content: String,
+    regions: Vec<Region>,
+}
+
+impl TomlDocument {
+    /// Creates a new TOML document from content.
+    #[must_use]
+    pub fn new(content: String) -> Self {
+        let regions = Self::parse_regions(&content);
+        let size = content.len() as u64;
+        let info = DocumentInfo::new("application/toml", size).with_page_count(1);
+
+        Self {
+            info,
+            content,
+            regions,
+        }
+    }
+
+    /// Returns the raw content.
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    fn parse_regions(content: &str) -> Vec<Region> {
+        let mut regions = Vec::new();
+        let total_len = content.len().max(1) as f64;
+        let mut pos = 0_usize;
+        let mut current_section: Option<RegionId> = None;
+
+        for line in content.lines() {
+            let line_start = pos;
+            let line_end = pos + line.len();
+            let trimmed = line.trim();
+
+            if trimmed.is_empty() {
+                pos = line_end + 1;
+                continue;
+            }
+
+            let y_start = line_start as f64 / total_len;
+            let height = ((line_end - line_start) as f64 / total_len).max(0.02);
+
+            // Table headers: [section] or [[array]]
+            if (trimmed.starts_with('[') && trimmed.ends_with(']'))
+                || (trimmed.starts_with("[[") && trimmed.ends_with("]]"))
+            {
+                let section = Region::on_page(
+                    NonZeroU32::new(1).unwrap(),
+                    BoundingBox::new(0.0, y_start, 1.0, height),
+                )
+                .with_text(trimmed)
+                .with_kind(RegionKind::Heading)
+                .with_source(RegionSource::Parser);
+                current_section = Some(section.id);
+                regions.push(section);
+            } else if trimmed.starts_with('#') {
+                regions.push(
+                    Region::on_page(
+                        NonZeroU32::new(1).unwrap(),
+                        BoundingBox::new(0.02, y_start, 0.98, height),
+                    )
+                    .with_text(trimmed)
+                    .with_kind(RegionKind::Annotation)
+                    .with_source(RegionSource::Parser),
+                );
+            } else {
+                let mut region = Region::on_page(
+                    NonZeroU32::new(1).unwrap(),
+                    BoundingBox::new(0.02, y_start, 0.98, height),
+                )
+                .with_text(trimmed)
+                .with_kind(RegionKind::Code)
+                .with_source(RegionSource::Parser);
+
+                if let Some(parent) = current_section {
+                    region = region.with_parent(parent);
+                }
+                regions.push(region);
+            }
+
+            pos = line_end + 1;
+        }
+
+        regions
+    }
+}
+
+#[async_trait]
+impl Document for TomlDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        if page == 1 {
+            self.regions.iter().collect()
+        } else {
+            Vec::new()
+        }
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        Ok(Bytes::from(self.content.clone()))
+    }
+}
+
+#[async_trait]
+impl TextExtractor for TomlDocument {
+    async fn extract_text(&self) -> Result<ExtractedText> {
+        let mut extracted = ExtractedText::from_raw(&self.content).with_page(1, &self.content);
+
+        for region in &self.regions {
+            if let Some(text) = &region.text {
+                extracted = extracted.with_region(region.id, text);
+            }
+        }
+
+        Ok(extracted)
+    }
+
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>> {
+        if page == 1 {
+            Ok(Some(self.content.clone()))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn needs_ocr(&self) -> bool {
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_simple_toml() {
+        let toml = "[package]\nname = \"test\"\nversion = \"1.0\"";
+        let doc = TomlDocument::new(toml.to_string());
+
+        let sections: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Heading)
+            .collect();
+        assert_eq!(sections.len(), 1);
+        assert_eq!(sections[0].text.as_deref(), Some("[package]"));
+    }
+
+    #[test]
+    fn test_parse_array_tables() {
+        let toml = "[[dependencies]]\nname = \"foo\"\n\n[[dependencies]]\nname = \"bar\"";
+        let doc = TomlDocument::new(toml.to_string());
+
+        let sections: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Heading)
+            .collect();
+        assert_eq!(sections.len(), 2);
+    }
+
+    #[test]
+    fn test_parse_comments() {
+        let toml = "# Comment\nkey = \"value\"";
+        let doc = TomlDocument::new(toml.to_string());
+
+        let comments: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Annotation)
+            .collect();
+        assert_eq!(comments.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_text_extraction() {
+        let toml = "[section]\nkey = \"value\"";
+        let doc = TomlDocument::new(toml.to_string());
+        let text = doc.extract_text().await.unwrap();
+        assert!(text.raw.contains("key = \"value\""));
+    }
+}
diff --git a/crates/nvisy-text/src/documents/xml.rs b/crates/nvisy-text/src/documents/xml.rs
new file mode 100644
index 0000000..87b2448
--- /dev/null
+++ b/crates/nvisy-text/src/documents/xml.rs
@@ -0,0 +1,174 @@
+//! XML document type.
+
+use std::num::NonZeroU32;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nvisy_document::{
+    BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
+    Result, TextExtractor,
+};
+
+/// A loaded XML document.
+#[derive(Debug, Clone)]
+pub struct XmlDocument {
+    info: DocumentInfo,
+    content: String,
+    regions: Vec<Region>,
+}
+
+impl XmlDocument {
+    /// Creates a new XML document from content.
+    #[must_use]
+    pub fn new(content: String) -> Self {
+        let regions = Self::parse_regions(&content);
+        let size = content.len() as u64;
+        let info = DocumentInfo::new("application/xml", size).with_page_count(1);
+
+        Self {
+            info,
+            content,
+            regions,
+        }
+    }
+
+    /// Returns the raw content.
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    fn parse_regions(content: &str) -> Vec<Region> {
+        let mut regions = Vec::new();
+        let total_len = content.len().max(1) as f64;
+        let mut depth = 0_usize;
+        let mut pos = 0_usize;
+
+        for line in content.lines() {
+            let line_start = pos;
+            let line_end = pos + line.len();
+            let trimmed = line.trim();
+
+            if trimmed.is_empty() {
+                pos = line_end + 1;
+                continue;
+            }
+
+            if trimmed.starts_with("</") {
+                depth = depth.saturating_sub(1);
+            }
+
+            let y_start = line_start as f64 / total_len;
+            let height = ((line_end - line_start) as f64 / total_len).max(0.02);
+            let indent = (depth as f64 * 0.02).min(0.2);
+
+            regions.push(
+                Region::on_page(
+                    NonZeroU32::new(1).unwrap(),
+                    BoundingBox::new(indent, y_start, 1.0 - indent, height),
+                )
+                .with_text(trimmed)
+                .with_kind(RegionKind::Code)
+                .with_source(RegionSource::Parser),
+            );
+
+            if trimmed.starts_with('<')
+                && !trimmed.starts_with("</")
+                && !trimmed.starts_with("<!")
+                && !trimmed.starts_with("<?")
+                && !trimmed.ends_with("/>")
+                && !trimmed.contains("</")
+            {
+                depth += 1;
+            }
+
+            pos = line_end + 1;
+        }
+
+        regions
+    }
+}
+
+#[async_trait]
+impl Document for XmlDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        if page == 1 {
+            self.regions.iter().collect()
+        } else {
+            Vec::new()
+        }
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        Ok(Bytes::from(self.content.clone()))
+    }
+}
+
+#[async_trait]
+impl TextExtractor for XmlDocument {
+    async fn extract_text(&self) -> Result<ExtractedText> {
+        let mut extracted = ExtractedText::from_raw(&self.content).with_page(1, &self.content);
+
+        for region in &self.regions {
+            if let Some(text) = &region.text {
+                extracted = extracted.with_region(region.id, text);
+            }
+        }
+
+        Ok(extracted)
+    }
+
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>> {
+        if page == 1 {
+            Ok(Some(self.content.clone()))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn needs_ocr(&self) -> bool {
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_simple_xml() {
+        let xml = "<root>\n  <child>text</child>\n</root>";
+        let doc = XmlDocument::new(xml.to_string());
+
+        assert_eq!(doc.regions().len(), 3);
+    }
+
+    #[test]
+    fn test_nested_depth() {
+        let xml = "<a>\n<b>\n<c/>\n</b>\n</a>";
+        let doc = XmlDocument::new(xml.to_string());
+
+        // Each element should create a region with increasing indent
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_text_extraction() {
+        let xml = "<root>content</root>";
+        let doc = XmlDocument::new(xml.to_string());
+        let text = doc.extract_text().await.unwrap();
+        assert!(text.raw.contains("content"));
+    }
+}
diff --git a/crates/nvisy-text/src/documents/yaml.rs b/crates/nvisy-text/src/documents/yaml.rs
new file mode 100644
index 0000000..7557513
--- /dev/null
+++ b/crates/nvisy-text/src/documents/yaml.rs
@@ -0,0 +1,189 @@
+//! YAML document type.
+
+use std::num::NonZeroU32;
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nvisy_document::{
+    BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
+    Result, TextExtractor,
+};
+
+/// A loaded YAML document.
+#[derive(Debug, Clone)]
+pub struct YamlDocument {
+    info: DocumentInfo,
+    content: String,
+    regions: Vec<Region>,
+}
+
+impl YamlDocument {
+    /// Creates a new YAML document from content.
+    #[must_use]
+    pub fn new(content: String) -> Self {
+        let regions = Self::parse_regions(&content);
+        let size = content.len() as u64;
+        let info = DocumentInfo::new("application/x-yaml", size).with_page_count(1);
+
+        Self {
+            info,
+            content,
+            regions,
+        }
+    }
+
+    /// Returns the raw content.
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    fn parse_regions(content: &str) -> Vec<Region> {
+        let mut regions = Vec::new();
+        let total_len = content.len().max(1) as f64;
+        let mut pos = 0_usize;
+
+        for line in content.lines() {
+            let line_start = pos;
+            let line_end = pos + line.len();
+            let trimmed = line.trim();
+
+            if trimmed.is_empty() {
+                pos = line_end + 1;
+                continue;
+            }
+
+            let indent_chars = line.len() - line.trim_start().len();
+            let indent = (indent_chars as f64 * 0.01).min(0.2);
+
+            let y_start = line_start as f64 / total_len;
+            let height = ((line_end - line_start) as f64 / total_len).max(0.02);
+
+            let kind = if trimmed.starts_with('#') {
+                RegionKind::Annotation
+            } else if trimmed.starts_with('-') {
+                RegionKind::ListItem
+            } else if trimmed.contains(':') {
+                RegionKind::Code
+            } else {
+                RegionKind::Text
+            };
+
+            regions.push(
+                Region::on_page(
+                    NonZeroU32::new(1).unwrap(),
+                    BoundingBox::new(indent, y_start, 1.0 - indent, height),
+                )
+                .with_text(trimmed)
+                .with_kind(kind)
+                .with_source(RegionSource::Parser),
+            );
+
+            pos = line_end + 1;
+        }
+
+        regions
+    }
+}
+
+#[async_trait]
+impl Document for YamlDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        if page == 1 {
+            self.regions.iter().collect()
+        } else {
+            Vec::new()
+        }
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        Ok(Bytes::from(self.content.clone()))
+    }
+}
+
+#[async_trait]
+impl TextExtractor for YamlDocument {
+    async fn extract_text(&self) -> Result<ExtractedText> {
+        let mut extracted = ExtractedText::from_raw(&self.content).with_page(1, &self.content);
+
+        for region in &self.regions {
+            if let Some(text) = &region.text {
+                extracted = extracted.with_region(region.id, text);
+            }
+        }
+
+        Ok(extracted)
+    }
+
+    async fn extract_text_for_page(&self, page: u32) -> Result<Option<String>> {
+        if page == 1 {
+            Ok(Some(self.content.clone()))
+        } else {
+            Ok(None)
+        }
+    }
+
+    fn needs_ocr(&self) -> bool {
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_simple_yaml() {
+        let yaml = "name: test\nvalue: 42";
+        let doc = YamlDocument::new(yaml.to_string());
+
+        assert_eq!(doc.regions().len(), 2);
+        assert!(doc.regions().iter().all(|r| r.kind == RegionKind::Code));
+    }
+
+    #[test]
+    fn test_parse_list() {
+        let yaml = "items:\n  - first\n  - second";
+        let doc = YamlDocument::new(yaml.to_string());
+
+        let list_items: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::ListItem)
+            .collect();
+        assert_eq!(list_items.len(), 2);
+    }
+
+    #[test]
+    fn test_parse_comments() {
+        let yaml = "# This is a comment\nkey: value";
+        let doc = YamlDocument::new(yaml.to_string());
+
+        let comments: Vec<_> = doc
+            .regions()
+            .iter()
+            .filter(|r| r.kind == RegionKind::Annotation)
+            .collect();
+        assert_eq!(comments.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_text_extraction() {
+        let yaml = "key: value";
+        let doc = YamlDocument::new(yaml.to_string());
+        let text = doc.extract_text().await.unwrap();
+        assert!(text.raw.contains("key: value"));
+    }
+}
diff --git a/crates/nvisy-text/src/format.rs b/crates/nvisy-text/src/format.rs
deleted file mode 100644
index 5458c74..0000000
--- a/crates/nvisy-text/src/format.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-//! Plain text format handler implementation.
-
-use bytes::Bytes;
-use nvisy_document::{Capabilities, DocumentFormat, Error, Result};
-
-use crate::TextDocument;
-
-/// Plain text document format handler.
-#[derive(Debug, Clone, Default)]
-pub struct TextFormat {
-    capabilities: Capabilities,
-}
-
-impl TextFormat {
-    /// Creates a new plain text format handler.
-    #[must_use]
-    pub fn new() -> Self {
-        Self {
-            capabilities: Capabilities::read_only(),
-        }
-    }
-}
-
-impl DocumentFormat for TextFormat {
-    type Document = TextDocument;
-
-    fn name(&self) -> &'static str {
-        "text"
-    }
-
-    fn mime_types(&self) -> &'static [&'static str] {
-        &["text/plain", "text/markdown", "text/x-rst"]
-    }
-
-    fn extensions(&self) -> &'static [&'static str] {
-        &["txt", "md", "markdown", "rst", "text"]
-    }
-
-    fn capabilities(&self) -> &Capabilities {
-        &self.capabilities
-    }
-
-    async fn load(&self, _data: Bytes) -> Result<Self::Document> {
-        // TODO: Implement text loading
-        Err(Error::unsupported_format(
-            "Text loading not yet implemented",
-        ))
-    }
-
-    async fn create_empty(&self) -> Result<Self::Document> {
-        // TODO: Implement empty text document creation
-        Err(Error::unsupported_format(
-            "Text creation not yet implemented",
-        ))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_format_metadata() {
-        let format = TextFormat::new();
-        assert_eq!(format.name(), "text");
-        assert!(format.mime_types().contains(&"text/plain"));
-        assert!(format.extensions().contains(&"txt"));
-        assert!(format.extensions().contains(&"md"));
-    }
-}
diff --git a/crates/nvisy-text/src/formats/csv.rs b/crates/nvisy-text/src/formats/csv.rs
new file mode 100644
index 0000000..e5533a7
--- /dev/null
+++ b/crates/nvisy-text/src/formats/csv.rs
@@ -0,0 +1,114 @@
+//! CSV format handler.
+
+use bytes::Bytes;
+use nvisy_document::{
+    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+};
+
+use crate::documents::CsvDocument;
+
+/// CSV document format handler.
+#[derive(Debug, Clone)]
+pub struct CsvFormat {
+    capabilities: Capabilities,
+}
+
+impl CsvFormat {
+    /// Creates a new CSV format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities {
+                text: TextCapabilities {
+                    can_extract: true,
+                    has_rich_text: false,
+                    may_need_ocr: false,
+                },
+                structure: StructureCapabilities {
+                    can_detect_structure: true,
+                    can_detect_tables: true,
+                    has_pages: false,
+                },
+                ..Default::default()
+            },
+        }
+    }
+}
+
+impl Default for CsvFormat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl DocumentFormat for CsvFormat {
+    type Document = CsvDocument;
+
+    fn name(&self) -> &'static str {
+        "csv"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["text/csv", "text/tab-separated-values"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["csv", "tsv"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, data: Bytes) -> Result<Self::Document> {
+        let content = String::from_utf8_lossy(&data).into_owned();
+        // Auto-detect delimiter
+        let first_line = content.lines().next().unwrap_or("");
+        let delimiter = if first_line.contains('\t') {
+            b'\t'
+        } else {
+            b','
+        };
+        Ok(CsvDocument::new(content, delimiter))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use nvisy_document::Document;
+
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = CsvFormat::new();
+        assert_eq!(format.name(), "csv");
+        assert!(format.mime_types().contains(&"text/csv"));
+        assert!(format.extensions().contains(&"csv"));
+        assert!(format.extensions().contains(&"tsv"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = CsvFormat::new();
+        let caps = format.capabilities();
+        assert!(caps.structure.can_detect_tables);
+    }
+
+    #[tokio::test]
+    async fn test_load_csv() {
+        let format = CsvFormat::new();
+        let data = Bytes::from("a,b,c\n1,2,3");
+        let doc = format.load(data).await.unwrap();
+        assert_eq!(doc.delimiter(), b',');
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_load_tsv() {
+        let format = CsvFormat::new();
+        let data = Bytes::from("a\tb\tc\n1\t2\t3");
+        let doc = format.load(data).await.unwrap();
+        assert_eq!(doc.delimiter(), b'\t');
+    }
+}
diff --git a/crates/nvisy-text/src/formats/ini.rs b/crates/nvisy-text/src/formats/ini.rs
new file mode 100644
index 0000000..bfe22ce
--- /dev/null
+++ b/crates/nvisy-text/src/formats/ini.rs
@@ -0,0 +1,98 @@
+//! INI format handler.
+
+use bytes::Bytes;
+use nvisy_document::{
+    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+};
+
+use crate::documents::IniDocument;
+
+/// INI document format handler.
+#[derive(Debug, Clone)]
+pub struct IniFormat {
+    capabilities: Capabilities,
+}
+
+impl IniFormat {
+    /// Creates a new INI format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities {
+                text: TextCapabilities {
+                    can_extract: true,
+                    has_rich_text: false,
+                    may_need_ocr: false,
+                },
+                structure: StructureCapabilities {
+                    can_detect_structure: true,
+                    can_detect_tables: false,
+                    has_pages: false,
+                },
+                ..Default::default()
+            },
+        }
+    }
+}
+
+impl Default for IniFormat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl DocumentFormat for IniFormat {
+    type Document = IniDocument;
+
+    fn name(&self) -> &'static str {
+        "ini"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["text/plain"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["ini", "cfg", "conf", "config"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, data: Bytes) -> Result<Self::Document> {
+        let content = String::from_utf8_lossy(&data).into_owned();
+        Ok(IniDocument::new(content))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use nvisy_document::Document;
+
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = IniFormat::new();
+        assert_eq!(format.name(), "ini");
+        assert!(format.extensions().contains(&"ini"));
+        assert!(format.extensions().contains(&"cfg"));
+        assert!(format.extensions().contains(&"conf"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = IniFormat::new();
+        let caps = format.capabilities();
+        assert!(caps.structure.can_detect_structure);
+    }
+
+    #[tokio::test]
+    async fn test_load_ini() {
+        let format = IniFormat::new();
+        let data = Bytes::from("[section]\nkey=value\nfoo=bar");
+        let doc = format.load(data).await.unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+}
diff --git a/crates/nvisy-text/src/formats/json.rs b/crates/nvisy-text/src/formats/json.rs
new file mode 100644
index 0000000..163135e
--- /dev/null
+++ b/crates/nvisy-text/src/formats/json.rs
@@ -0,0 +1,98 @@
+//! JSON format handler.
+
+use bytes::Bytes;
+use nvisy_document::{
+    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+};
+
+use crate::documents::JsonDocument;
+
+/// JSON document format handler.
+#[derive(Debug, Clone)]
+pub struct JsonFormat {
+    capabilities: Capabilities,
+}
+
+impl JsonFormat {
+    /// Creates a new JSON format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities {
+                text: TextCapabilities {
+                    can_extract: true,
+                    has_rich_text: false,
+                    may_need_ocr: false,
+                },
+                structure: StructureCapabilities {
+                    can_detect_structure: true,
+                    can_detect_tables: false,
+                    has_pages: false,
+                },
+                ..Default::default()
+            },
+        }
+    }
+}
+
+impl Default for JsonFormat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl DocumentFormat for JsonFormat {
+    type Document = JsonDocument;
+
+    fn name(&self) -> &'static str {
+        "json"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["application/json", "text/json"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["json"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, data: Bytes) -> Result<Self::Document> {
+        let content = String::from_utf8_lossy(&data).into_owned();
+        JsonDocument::new(content)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use nvisy_document::Document;
+
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = JsonFormat::new();
+        assert_eq!(format.name(), "json");
+        assert!(format.mime_types().contains(&"application/json"));
+        assert!(format.extensions().contains(&"json"));
+    }
+
+    #[tokio::test]
+    async fn test_load_document() {
+        let format = JsonFormat::new();
+        let data = Bytes::from(r#"{"hello": "world"}"#);
+        let doc = format.load(data).await.unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_load_invalid_json() {
+        let format = JsonFormat::new();
+        let data = Bytes::from("not valid json {");
+        let result = format.load(data).await;
+        assert!(result.is_err());
+    }
+}
diff --git a/crates/nvisy-text/src/formats/markdown.rs b/crates/nvisy-text/src/formats/markdown.rs
new file mode 100644
index 0000000..d040865
--- /dev/null
+++ b/crates/nvisy-text/src/formats/markdown.rs
@@ -0,0 +1,99 @@
+//! Markdown format handler.
+
+use bytes::Bytes;
+use nvisy_document::{
+    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+};
+
+use crate::documents::MarkdownDocument;
+
+/// Markdown document format handler.
+#[derive(Debug, Clone)]
+pub struct MarkdownFormat {
+    capabilities: Capabilities,
+}
+
+impl MarkdownFormat {
+    /// Creates a new Markdown format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities {
+                text: TextCapabilities {
+                    can_extract: true,
+                    has_rich_text: true,
+                    may_need_ocr: false,
+                },
+                structure: StructureCapabilities {
+                    can_detect_structure: true,
+                    can_detect_tables: false,
+                    has_pages: false,
+                },
+                ..Default::default()
+            },
+        }
+    }
+}
+
+impl Default for MarkdownFormat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl DocumentFormat for MarkdownFormat {
+    type Document = MarkdownDocument;
+
+    fn name(&self) -> &'static str {
+        "markdown"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["text/markdown", "text/x-markdown"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["md", "markdown", "mdown", "mkd"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, data: Bytes) -> Result<Self::Document> {
+        let content = String::from_utf8_lossy(&data).into_owned();
+        Ok(MarkdownDocument::new(content))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use nvisy_document::Document;
+
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = MarkdownFormat::new();
+        assert_eq!(format.name(), "markdown");
+        assert!(format.mime_types().contains(&"text/markdown"));
+        assert!(format.extensions().contains(&"md"));
+        assert!(format.extensions().contains(&"markdown"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = MarkdownFormat::new();
+        let caps = format.capabilities();
+        assert!(caps.text.has_rich_text);
+        assert!(caps.structure.can_detect_structure);
+    }
+
+    #[tokio::test]
+    async fn test_load_document() {
+        let format = MarkdownFormat::new();
+        let data = Bytes::from("# Test\n\nContent here.");
+        let doc = format.load(data).await.unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+}
diff --git a/crates/nvisy-text/src/formats/mod.rs b/crates/nvisy-text/src/formats/mod.rs
new file mode 100644
index 0000000..63010aa
--- /dev/null
+++ b/crates/nvisy-text/src/formats/mod.rs
@@ -0,0 +1,19 @@
+//! Text-based document format handlers.
+
+mod csv;
+mod ini;
+mod json;
+mod markdown;
+mod plain;
+mod toml;
+mod xml;
+mod yaml;
+
+pub use self::csv::CsvFormat;
+pub use self::ini::IniFormat;
+pub use self::json::JsonFormat;
+pub use self::markdown::MarkdownFormat;
+pub use self::plain::PlainTextFormat;
+pub use self::toml::TomlFormat;
+pub use self::xml::XmlFormat;
+pub use self::yaml::YamlFormat;
diff --git a/crates/nvisy-text/src/formats/plain.rs b/crates/nvisy-text/src/formats/plain.rs
new file mode 100644
index 0000000..5a03060
--- /dev/null
+++ b/crates/nvisy-text/src/formats/plain.rs
@@ -0,0 +1,74 @@
+//! Plain text format handler.
+
+use bytes::Bytes;
+use nvisy_document::{Capabilities, DocumentFormat, Result};
+
+use crate::documents::PlainTextDocument;
+
+/// Plain text document format handler.
+#[derive(Debug, Clone)]
+pub struct PlainTextFormat {
+    capabilities: Capabilities,
+}
+
+impl PlainTextFormat {
+    /// Creates a new plain text format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities::text(),
+        }
+    }
+}
+
+impl Default for PlainTextFormat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl DocumentFormat for PlainTextFormat {
+    type Document = PlainTextDocument;
+
+    fn name(&self) -> &'static str {
+        "plain-text"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["text/plain"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["txt", "text"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, data: Bytes) -> Result<Self::Document> {
+        let content = String::from_utf8_lossy(&data).into_owned();
+        Ok(PlainTextDocument::new(content))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = PlainTextFormat::new();
+        assert_eq!(format.name(), "plain-text");
+        assert!(format.mime_types().contains(&"text/plain"));
+        assert!(format.extensions().contains(&"txt"));
+    }
+
+    #[tokio::test]
+    async fn test_load_document() {
+        let format = PlainTextFormat::new();
+        let data = Bytes::from("Hello, world!");
+        let doc = format.load(data).await.unwrap();
+        assert_eq!(doc.content(), "Hello, world!");
+    }
+}
diff --git a/crates/nvisy-text/src/formats/toml.rs b/crates/nvisy-text/src/formats/toml.rs
new file mode 100644
index 0000000..b0411a5
--- /dev/null
+++ b/crates/nvisy-text/src/formats/toml.rs
@@ -0,0 +1,97 @@
+//! TOML format handler.
+
+use bytes::Bytes;
+use nvisy_document::{
+    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+};
+
+use crate::documents::TomlDocument;
+
+/// TOML document format handler.
+#[derive(Debug, Clone)]
+pub struct TomlFormat {
+    capabilities: Capabilities,
+}
+
+impl TomlFormat {
+    /// Creates a new TOML format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities {
+                text: TextCapabilities {
+                    can_extract: true,
+                    has_rich_text: false,
+                    may_need_ocr: false,
+                },
+                structure: StructureCapabilities {
+                    can_detect_structure: true,
+                    can_detect_tables: false,
+                    has_pages: false,
+                },
+                ..Default::default()
+            },
+        }
+    }
+}
+
+impl Default for TomlFormat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl DocumentFormat for TomlFormat {
+    type Document = TomlDocument;
+
+    fn name(&self) -> &'static str {
+        "toml"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["application/toml"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["toml"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, data: Bytes) -> Result<Self::Document> {
+        let content = String::from_utf8_lossy(&data).into_owned();
+        Ok(TomlDocument::new(content))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use nvisy_document::Document;
+
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = TomlFormat::new();
+        assert_eq!(format.name(), "toml");
+        assert!(format.mime_types().contains(&"application/toml"));
+        assert!(format.extensions().contains(&"toml"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = TomlFormat::new();
+        let caps = format.capabilities();
+        assert!(caps.structure.can_detect_structure);
+    }
+
+    #[tokio::test]
+    async fn test_load_toml() {
+        let format = TomlFormat::new();
+        let data = Bytes::from("[package]\nname = \"test\"\nversion = \"1.0\"");
+        let doc = format.load(data).await.unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+}
diff --git a/crates/nvisy-text/src/formats/xml.rs b/crates/nvisy-text/src/formats/xml.rs
new file mode 100644
index 0000000..2113191
--- /dev/null
+++ b/crates/nvisy-text/src/formats/xml.rs
@@ -0,0 +1,99 @@
+//! XML format handler.
+
+use bytes::Bytes;
+use nvisy_document::{
+    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+};
+
+use crate::documents::XmlDocument;
+
+/// XML document format handler.
+#[derive(Debug, Clone)]
+pub struct XmlFormat {
+    capabilities: Capabilities,
+}
+
+impl XmlFormat {
+    /// Creates a new XML format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities {
+                text: TextCapabilities {
+                    can_extract: true,
+                    has_rich_text: false,
+                    may_need_ocr: false,
+                },
+                structure: StructureCapabilities {
+                    can_detect_structure: true,
+                    can_detect_tables: false,
+                    has_pages: false,
+                },
+                ..Default::default()
+            },
+        }
+    }
+}
+
+impl Default for XmlFormat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl DocumentFormat for XmlFormat {
+    type Document = XmlDocument;
+
+    fn name(&self) -> &'static str {
+        "xml"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["application/xml", "text/xml"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["xml", "xsd", "xsl", "xslt", "svg", "xhtml", "plist"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, data: Bytes) -> Result<Self::Document> {
+        let content = String::from_utf8_lossy(&data).into_owned();
+        Ok(XmlDocument::new(content))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use nvisy_document::Document;
+
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = XmlFormat::new();
+        assert_eq!(format.name(), "xml");
+        assert!(format.mime_types().contains(&"application/xml"));
+        assert!(format.extensions().contains(&"xml"));
+        assert!(format.extensions().contains(&"svg"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = XmlFormat::new();
+        let caps = format.capabilities();
+        assert!(caps.structure.can_detect_structure);
+        assert!(!caps.structure.can_detect_tables);
+    }
+
+    #[tokio::test]
+    async fn test_load_xml() {
+        let format = XmlFormat::new();
+        let data = Bytes::from("<root>content</root>");
+        let doc = format.load(data).await.unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+}
diff --git a/crates/nvisy-text/src/formats/yaml.rs b/crates/nvisy-text/src/formats/yaml.rs
new file mode 100644
index 0000000..63d47a5
--- /dev/null
+++ b/crates/nvisy-text/src/formats/yaml.rs
@@ -0,0 +1,98 @@
+//! YAML format handler.
+
+use bytes::Bytes;
+use nvisy_document::{
+    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+};
+
+use crate::documents::YamlDocument;
+
+/// YAML document format handler.
+#[derive(Debug, Clone)]
+pub struct YamlFormat {
+    capabilities: Capabilities,
+}
+
+impl YamlFormat {
+    /// Creates a new YAML format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities {
+                text: TextCapabilities {
+                    can_extract: true,
+                    has_rich_text: false,
+                    may_need_ocr: false,
+                },
+                structure: StructureCapabilities {
+                    can_detect_structure: true,
+                    can_detect_tables: false,
+                    has_pages: false,
+                },
+                ..Default::default()
+            },
+        }
+    }
+}
+
+impl Default for YamlFormat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl DocumentFormat for YamlFormat {
+    type Document = YamlDocument;
+
+    fn name(&self) -> &'static str {
+        "yaml"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["application/x-yaml", "text/yaml", "application/yaml"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["yaml", "yml"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, data: Bytes) -> Result<Self::Document> {
+        let content = String::from_utf8_lossy(&data).into_owned();
+        Ok(YamlDocument::new(content))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use nvisy_document::Document;
+
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = YamlFormat::new();
+        assert_eq!(format.name(), "yaml");
+        assert!(format.mime_types().contains(&"application/x-yaml"));
+        assert!(format.extensions().contains(&"yaml"));
+        assert!(format.extensions().contains(&"yml"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = YamlFormat::new();
+        let caps = format.capabilities();
+        assert!(caps.structure.can_detect_structure);
+    }
+
+    #[tokio::test]
+    async fn test_load_yaml() {
+        let format = YamlFormat::new();
+        let data = Bytes::from("key: value\nlist:\n  - item1\n  - item2");
+        let doc = format.load(data).await.unwrap();
+        assert!(!doc.regions().is_empty());
+    }
+}
diff --git a/crates/nvisy-text/src/lib.rs b/crates/nvisy-text/src/lib.rs
index b8b6981..a54d6d0 100644
--- a/crates/nvisy-text/src/lib.rs
+++ b/crates/nvisy-text/src/lib.rs
@@ -2,8 +2,26 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-mod document;
-mod format;
+pub mod documents;
+pub mod formats;
 
-pub use document::TextDocument;
-pub use format::TextFormat;
+// Re-export document types
+pub use documents::{
+    CsvDocument, IniDocument, JsonDocument, MarkdownDocument, PlainTextDocument, TomlDocument,
+    XmlDocument, YamlDocument,
+};
+
+// Re-export format handlers
+pub use formats::{
+    CsvFormat, IniFormat, JsonFormat, MarkdownFormat, PlainTextFormat, TomlFormat, XmlFormat,
+    YamlFormat,
+};
+
+// Legacy aliases for backwards compatibility
+pub use PlainTextDocument as TextDocument;
+pub use PlainTextFormat as TextFormat;
+
+// Re-export commonly used types from nvisy-document
+pub use nvisy_document::{
+    Document, DocumentFormat, ExtractedText, NormalizedTable, Region, TableExtractor, TextExtractor,
+};

From aadcc5bd5e473893c1f28c657c74d97cde004f94 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Fri, 16 Jan 2026 21:28:16 +0100
Subject: [PATCH 3/5] refactor: use nvisy-core error handling, split image
 formats

- Remove nvisy-document error module, re-export from nvisy-core
- Add load_file method to DocumentFormat trait
- Use data.as_string()? instead of String::from_utf8_lossy in nvisy-text
- Split ImageFormat into JpegFormat and PngFormat
- Register image formats in nvisy-engine
---
 Cargo.lock                                    |   1 +
 Cargo.toml                                    |  25 +-
 crates/nvisy-core/src/io/content.rs           |  32 +-
 crates/nvisy-core/src/io/content_data.rs      | 334 ++++++++--------
 crates/nvisy-core/src/io/data_reference.rs    |   6 +-
 crates/nvisy-document/Cargo.toml              |   2 +-
 crates/nvisy-document/src/conversion/mod.rs   |   2 +-
 crates/nvisy-document/src/error.rs            | 358 ------------------
 crates/nvisy-document/src/format/mod.rs       |  29 +-
 crates/nvisy-document/src/lib.rs              |  30 +-
 crates/nvisy-document/src/metadata/mod.rs     |   2 +-
 crates/nvisy-document/src/table/mod.rs        |   2 +-
 crates/nvisy-document/src/text/mod.rs         |   2 +-
 crates/nvisy-document/src/thumbnail/mod.rs    |   2 +-
 crates/nvisy-docx/src/document.rs             |   4 +-
 crates/nvisy-docx/src/format.rs               |   9 +-
 crates/nvisy-engine/Cargo.toml                |   4 +-
 crates/nvisy-engine/src/engine/mod.rs         |  13 +-
 crates/nvisy-engine/src/registry/mod.rs       |  36 +-
 .../src/{document.rs => documents/jpeg.rs}    |  20 +-
 crates/nvisy-image/src/documents/mod.rs       |   7 +
 crates/nvisy-image/src/documents/png.rs       |  54 +++
 crates/nvisy-image/src/format.rs              |  82 ----
 crates/nvisy-image/src/formats/jpeg.rs        |  71 ++++
 crates/nvisy-image/src/formats/mod.rs         |   7 +
 crates/nvisy-image/src/formats/png.rs         |  70 ++++
 crates/nvisy-image/src/lib.rs                 |   8 +-
 crates/nvisy-pdf/src/document.rs              |   4 +-
 crates/nvisy-pdf/src/format.rs                |   7 +-
 crates/nvisy-text/README.md                   |   5 +-
 crates/nvisy-text/src/documents/json.rs       |   2 +-
 crates/nvisy-text/src/formats/csv.rs          |  11 +-
 crates/nvisy-text/src/formats/ini.rs          |   9 +-
 crates/nvisy-text/src/formats/json.rs         |  11 +-
 crates/nvisy-text/src/formats/markdown.rs     |   9 +-
 crates/nvisy-text/src/formats/plain.rs        |   9 +-
 crates/nvisy-text/src/formats/toml.rs         |   9 +-
 crates/nvisy-text/src/formats/xml.rs          |   9 +-
 crates/nvisy-text/src/formats/yaml.rs         |   9 +-
 39 files changed, 524 insertions(+), 782 deletions(-)
 delete mode 100644 crates/nvisy-document/src/error.rs
 rename crates/nvisy-image/src/{document.rs => documents/jpeg.rs} (69%)
 create mode 100644 crates/nvisy-image/src/documents/mod.rs
 create mode 100644 crates/nvisy-image/src/documents/png.rs
 delete mode 100644 crates/nvisy-image/src/format.rs
 create mode 100644 crates/nvisy-image/src/formats/jpeg.rs
 create mode 100644 crates/nvisy-image/src/formats/mod.rs
 create mode 100644 crates/nvisy-image/src/formats/png.rs

diff --git a/Cargo.lock b/Cargo.lock
index ccd2d32..5accc23 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -713,6 +713,7 @@ dependencies = [
  "nvisy-archive",
  "nvisy-document",
  "nvisy-docx",
+ "nvisy-image",
  "nvisy-pdf",
  "nvisy-text",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 60a8c09..9b787b3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,14 +43,7 @@ nvisy-text = { path = "./crates/nvisy-text", version = "0.1.0", features = [] }
 
 # Async runtime and I/O
 tokio = { version = "1.49", default-features = false, features = [] }
-tokio-stream = { version = "0.1", default-features = false, features = [] }
-tokio-util = { version = "0.7", default-features = false, features = [] }
-futures = { version = "0.3", default-features = false, features = [] }
 async-trait = { version = "0.1", default-features = false, features = [] }
-
-# File system utilities
-walkdir = { version = "2.5", default-features = false, features = [] }
-memmap2 = { version = "0.9", default-features = false, features = [] }
 tempfile = { version = "3.24", default-features = false, features = [] }
 
 # Multithreading
@@ -64,44 +57,28 @@ csv = { version = "1.4", default-features = false, features = [] }
 # Data types and utilities
 uuid = { version = "1.19", features = [] }
 jiff = { version = "0.2", default-features = false, features = [] }
-size = { version = "0.5", default-features = false, features = [] }
 bytes = { version = "1.11", default-features = false, features = [] }
-rust_decimal = { version = "1.36", default-features = false, features = [] }
 semver = { version = "1.0", default-features = false, features = [] }
-isolang = { version = "2.4", default-features = false, features = ["english_names"] }
 
-# Text processing and pattern matching
+# Text processing
 markdown = { version = "1.0.0", default-features = false, features = [] }
-regex = { version = "1.11", default-features = false, features = [] }
-regex-lite = { version = "0.1", default-features = false, features = ["std"] }
-fancy-regex = { version = "0.16", default-features = false, features = [] }
-aho-corasick = { version = "1.1", default-features = false, features = [] }
-unicode-segmentation = { version = "1.10", default-features = false, features = [] }
 hipstr = { version = "0.8", default-features = false, features = [] }
 
 # Cryptography and hashing
 sha2 = { version = "0.10", default-features = false, features = [] }
-blake3 = { version = "1.8", default-features = false, features = [] }
 base64 = { version = "0.22", default-features = false, features = [] }
 hex = { version = "0.4", features = [] }
 zeroize = { version = "1.7", default-features = false, features = [] }
-rand = { version = "0.9", default-features = false, features = [] }
 
 # Error handling
 thiserror = { version = "2.0", features = [] }
-anyhow = { version = "1.0", features = ["backtrace"] }
 
 # Tracing and observability
 tracing = { version = "0.1", features = [] }
-tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
 
 # Macros and derive utilities
 derive_more = { version = "2.0", default-features = false, features = [] }
 strum = { version = "0.27", default-features = false, features = [] }
-const_format = { version = "0.2", default-features = false, features = [] }
 
 # Testing utilities
 tokio-test = { version = "0.4", default-features = false, features = [] }
-proptest = { version = "1.4", default-features = false, features = [] }
-criterion = { version = "0.7", default-features = false, features = [] }
-rstest = { version = "0.26", default-features = false, features = [] }
diff --git a/crates/nvisy-core/src/io/content.rs b/crates/nvisy-core/src/io/content.rs
index b3870f4..93de761 100644
--- a/crates/nvisy-core/src/io/content.rs
+++ b/crates/nvisy-core/src/io/content.rs
@@ -7,6 +7,7 @@ use derive_more::{AsRef, Deref};
 use serde::{Deserialize, Serialize};
 
 use super::ContentData;
+use crate::error::Result;
 use crate::fs::ContentMetadata;
 use crate::path::ContentSource;
 
@@ -28,7 +29,7 @@ use crate::path::ContentSource;
 /// let content = Content::new(data);
 ///
 /// assert_eq!(content.size(), 13);
-/// assert!(content.is_text());
+/// assert!(content.is_likely_text());
 ///
 /// // Create content with metadata
 /// let source = ContentSource::new();
@@ -88,33 +89,18 @@ impl Content {
         self.data.content_source
     }
 
-    /// Get the size of the content in bytes
-    pub fn size(&self) -> usize {
-        self.data.size()
-    }
-
-    /// Check if the content is empty
-    pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
-    }
-
-    /// Check if the content is stored as text
-    pub fn is_text(&self) -> bool {
-        self.data.is_text()
-    }
-
-    /// Check if the content is stored as binary
-    pub fn is_binary(&self) -> bool {
-        self.data.is_binary()
-    }
-
     /// Get the content as bytes
     pub fn as_bytes(&self) -> &[u8] {
         self.data.as_bytes()
     }
 
+    /// Returns `true` if the content appears to be text.
+    pub fn is_likely_text(&self) -> bool {
+        self.data.is_likely_text()
+    }
+
     /// Try to get the content as a string slice
-    pub fn as_str(&self) -> crate::error::Result<&str> {
+    pub fn as_str(&self) -> Result<&str> {
         self.data.as_str()
     }
 
@@ -159,7 +145,7 @@ mod tests {
         let content = Content::new(data.clone());
 
         assert_eq!(content.size(), 13);
-        assert!(content.is_text());
+        assert!(content.is_likely_text());
         assert!(content.metadata().is_none());
     }
 
diff --git a/crates/nvisy-core/src/io/content_data.rs b/crates/nvisy-core/src/io/content_data.rs
index 1f08bc3..8f41af3 100644
--- a/crates/nvisy-core/src/io/content_data.rs
+++ b/crates/nvisy-core/src/io/content_data.rs
@@ -4,6 +4,7 @@
 //! along with its metadata and source information.
 
 use std::fmt;
+use std::ops::Deref;
 use std::sync::OnceLock;
 
 use bytes::Bytes;
@@ -14,150 +15,158 @@ use sha2::{Digest, Sha256};
 use crate::error::{Error, ErrorResource, ErrorType, Result};
 use crate::path::ContentSource;
 
-/// The underlying data storage type for content
+/// A wrapper around `Bytes` for content storage.
 ///
-/// This enum allows content to be stored as either binary data (`Bytes`)
-/// or text data (`HipStr`). Both types are cheap to clone as they use
-/// reference counting internally.
-#[derive(Debug, Clone, PartialEq, Eq)]
+/// This struct wraps `bytes::Bytes` and provides additional methods
+/// for text conversion. It's cheap to clone as `Bytes` uses reference
+/// counting internally.
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
 #[derive(Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum ContentBytes {
-    /// Binary data stored as `bytes::Bytes`
-    Binary(Bytes),
-    /// Text data stored as `hipstr::HipStr<'static>` (owned)
-    Text(HipStr<'static>),
-}
+#[serde(transparent)]
+pub struct ContentBytes(Bytes);
 
 impl ContentBytes {
-    /// Get the size of the content in bytes
+    /// Creates a new `ContentBytes` from raw bytes.
+    #[must_use]
+    pub fn new(bytes: Bytes) -> Self {
+        Self(bytes)
+    }
+
+    /// Returns the size of the content in bytes.
+    #[must_use]
     pub fn len(&self) -> usize {
-        match self {
-            Self::Binary(bytes) => bytes.len(),
-            Self::Text(text) => text.len(),
-        }
+        self.0.len()
     }
 
-    /// Check if the content is empty
+    /// Returns `true` if the content is empty.
+    #[must_use]
     pub fn is_empty(&self) -> bool {
-        match self {
-            Self::Binary(bytes) => bytes.is_empty(),
-            Self::Text(text) => text.is_empty(),
-        }
+        self.0.is_empty()
     }
 
-    /// Get the content as a byte slice
+    /// Returns the content as a byte slice.
+    #[must_use]
     pub fn as_bytes(&self) -> &[u8] {
-        match self {
-            Self::Binary(bytes) => bytes,
-            Self::Text(text) => text.as_bytes(),
-        }
+        &self.0
     }
 
-    /// Check if this is text content
-    pub fn is_text(&self) -> bool {
-        matches!(self, Self::Text(_))
+    /// Tries to return the content as a string slice.
+    ///
+    /// Returns `None` if the content is not valid UTF-8.
+    #[must_use]
+    pub fn as_str(&self) -> Option<&str> {
+        std::str::from_utf8(&self.0).ok()
     }
 
-    /// Check if this is binary content
-    pub fn is_binary(&self) -> bool {
-        matches!(self, Self::Binary(_))
+    /// Converts to a `HipStr` if the content is valid UTF-8.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the content is not valid UTF-8.
+    pub fn as_hipstr(&self) -> Result<HipStr<'static>> {
+        let s = std::str::from_utf8(&self.0).map_err(|e| {
+            Error::new(format!("Invalid UTF-8: {e}"))
+                .with_type(ErrorType::Runtime)
+                .with_resource(ErrorResource::Core)
+        })?;
+        Ok(HipStr::from(s))
+    }
+
+    /// Returns the underlying `Bytes`.
+    #[must_use]
+    pub fn to_bytes(&self) -> Bytes {
+        self.0.clone()
     }
 
-    /// Try to get the content as a string slice
-    pub fn as_str(&self) -> Option<&str> {
-        match self {
-            Self::Binary(bytes) => std::str::from_utf8(bytes).ok(),
-            Self::Text(text) => Some(text.as_str()),
-        }
+    /// Consumes and returns the underlying `Bytes`.
+    #[must_use]
+    pub fn into_bytes(self) -> Bytes {
+        self.0
     }
 
-    /// Convert to Bytes (clones if text)
-    pub fn to_bytes(&self) -> Bytes {
-        match self {
-            Self::Binary(bytes) => bytes.clone(),
-            Self::Text(text) => Bytes::copy_from_slice(text.as_bytes()),
-        }
+    /// Returns `true` if the content appears to be text.
+    ///
+    /// Uses a simple heuristic: checks if all bytes are ASCII printable
+    /// or whitespace characters.
+    #[must_use]
+    pub fn is_likely_text(&self) -> bool {
+        self.0
+            .iter()
+            .all(|&b| b.is_ascii_graphic() || b.is_ascii_whitespace())
     }
+}
 
-    /// Convert to HipStr if valid UTF-8
-    pub fn to_hipstr(&self) -> Result<HipStr<'static>> {
-        match self {
-            Self::Binary(bytes) => {
-                let s = std::str::from_utf8(bytes).map_err(|e| {
-                    Error::new(format!("Invalid UTF-8: {e}"))
-                        .with_type(ErrorType::Runtime)
-                        .with_resource(ErrorResource::Core)
-                })?;
-                Ok(HipStr::from(s))
-            }
-            Self::Text(text) => Ok(text.clone()),
-        }
+impl Deref for ContentBytes {
+    type Target = [u8];
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
     }
 }
 
-impl Default for ContentBytes {
-    fn default() -> Self {
-        Self::Binary(Bytes::new())
+impl AsRef<[u8]> for ContentBytes {
+    fn as_ref(&self) -> &[u8] {
+        &self.0
     }
 }
 
 impl From<&str> for ContentBytes {
     fn from(s: &str) -> Self {
-        Self::Text(HipStr::from(s))
+        Self(Bytes::copy_from_slice(s.as_bytes()))
     }
 }
 
 impl From<String> for ContentBytes {
     fn from(s: String) -> Self {
-        Self::Text(HipStr::from(s))
+        Self(Bytes::from(s))
     }
 }
 
 impl From<HipStr<'static>> for ContentBytes {
     fn from(s: HipStr<'static>) -> Self {
-        Self::Text(s)
+        Self(Bytes::copy_from_slice(s.as_bytes()))
     }
 }
 
 impl From<&[u8]> for ContentBytes {
     fn from(bytes: &[u8]) -> Self {
-        Self::Binary(Bytes::copy_from_slice(bytes))
+        Self(Bytes::copy_from_slice(bytes))
     }
 }
 
 impl From<Vec<u8>> for ContentBytes {
     fn from(vec: Vec<u8>) -> Self {
-        Self::Binary(Bytes::from(vec))
+        Self(Bytes::from(vec))
     }
 }
 
 impl From<Bytes> for ContentBytes {
     fn from(bytes: Bytes) -> Self {
-        Self::Binary(bytes)
+        Self(bytes)
     }
 }
 
-/// Content data with metadata and computed hashes
+/// Content data with metadata and computed hashes.
 ///
-/// This struct wraps [`ContentBytes`] (either `Bytes` or `HipStr`) and stores content data
-/// along with metadata about its source and optional computed SHA256 hash.
+/// This struct wraps [`ContentBytes`] and stores content data along with
+/// metadata about its source and optional computed SHA256 hash.
 /// It's designed to be cheap to clone using reference-counted types.
-/// The SHA256 hash is lazily computed using `OnceLock` for lock-free access after initialization.
+/// The SHA256 hash is lazily computed using `OnceLock` for lock-free
+/// access after initialization.
 #[derive(Debug)]
 #[derive(Serialize, Deserialize)]
 pub struct ContentData {
-    /// Unique identifier for the content source
+    /// Unique identifier for the content source.
     pub content_source: ContentSource,
-    /// The actual content data (binary or text)
+    /// The actual content data.
     data: ContentBytes,
-    /// Lazily computed SHA256 hash of the content
+    /// Lazily computed SHA256 hash of the content.
     #[serde(skip)]
     sha256_cache: OnceLock<Bytes>,
 }
 
 impl ContentData {
-    /// Create new content data from bytes
+    /// Creates new content data from bytes.
     ///
     /// # Example
     ///
@@ -174,12 +183,12 @@ impl ContentData {
     pub fn new(content_source: ContentSource, data: Bytes) -> Self {
         Self {
             content_source,
-            data: ContentBytes::Binary(data),
+            data: ContentBytes::new(data),
             sha256_cache: OnceLock::new(),
         }
     }
 
-    /// Create new content data from text
+    /// Creates new content data from text.
     ///
     /// # Example
     ///
@@ -189,18 +198,17 @@ impl ContentData {
     /// let source = ContentSource::new();
     /// let content = ContentData::from_text(source, "Hello, world!");
     ///
-    /// assert!(content.is_text());
     /// assert_eq!(content.as_str().unwrap(), "Hello, world!");
     /// ```
-    pub fn from_text(content_source: ContentSource, text: impl Into<HipStr<'static>>) -> Self {
+    pub fn from_text(content_source: ContentSource, text: impl Into<String>) -> Self {
         Self {
             content_source,
-            data: ContentBytes::Text(text.into()),
+            data: ContentBytes::from(text.into()),
             sha256_cache: OnceLock::new(),
         }
     }
 
-    /// Create content data with explicit content bytes type
+    /// Creates content data with explicit `ContentBytes`.
     pub fn with_content_bytes(content_source: ContentSource, data: ContentBytes) -> Self {
         Self {
             content_source,
@@ -209,13 +217,15 @@ impl ContentData {
         }
     }
 
-    /// Get the size of the content in bytes
+    /// Returns the size of the content in bytes.
+    #[must_use]
     pub fn size(&self) -> usize {
         self.data.len()
     }
 
-    /// Get pretty formatted size string
+    /// Returns a pretty formatted size string.
     #[allow(clippy::cast_precision_loss)]
+    #[must_use]
     pub fn get_pretty_size(&self) -> String {
         let bytes = self.size();
         match bytes {
@@ -226,100 +236,91 @@ impl ContentData {
         }
     }
 
-    /// Get the content data as bytes slice
+    /// Returns the content data as a byte slice.
+    #[must_use]
     pub fn as_bytes(&self) -> &[u8] {
         self.data.as_bytes()
     }
 
-    /// Get the underlying content bytes
+    /// Returns a reference to the underlying `ContentBytes`.
+    #[must_use]
     pub fn content_bytes(&self) -> &ContentBytes {
         &self.data
     }
 
-    /// Convert the content data to Bytes
+    /// Converts the content data to `Bytes`.
+    #[must_use]
     pub fn to_bytes(&self) -> Bytes {
         self.data.to_bytes()
     }
 
-    /// Consume and convert into Bytes
+    /// Consumes and converts into `Bytes`.
+    #[must_use]
     pub fn into_bytes(self) -> Bytes {
-        match self.data {
-            ContentBytes::Binary(bytes) => bytes,
-            ContentBytes::Text(text) => Bytes::copy_from_slice(text.as_bytes()),
-        }
-    }
-
-    /// Check if the content is stored as text
-    pub fn is_text(&self) -> bool {
-        self.data.is_text()
-    }
-
-    /// Check if the content is stored as binary
-    pub fn is_binary(&self) -> bool {
-        self.data.is_binary()
+        self.data.into_bytes()
     }
 
-    /// Check if the content is likely text (basic heuristic for binary data)
+    /// Returns `true` if the content appears to be text.
+    ///
+    /// Uses a simple heuristic: checks if all bytes are ASCII printable
+    /// or whitespace characters.
+    #[must_use]
     pub fn is_likely_text(&self) -> bool {
-        match &self.data {
-            ContentBytes::Text(_) => true,
-            ContentBytes::Binary(bytes) => bytes
-                .iter()
-                .all(|&b| b.is_ascii_graphic() || b.is_ascii_whitespace()),
-        }
+        self.data.is_likely_text()
     }
 
-    /// Try to convert the content data to a UTF-8 string
+    /// Tries to convert the content data to a UTF-8 string.
     ///
     /// # Errors
     ///
     /// Returns an error if the content data contains invalid UTF-8 sequences.
     pub fn as_string(&self) -> Result<String> {
-        match &self.data {
-            ContentBytes::Text(text) => Ok(text.to_string()),
-            ContentBytes::Binary(bytes) => String::from_utf8(bytes.to_vec()).map_err(|e| {
-                Error::new(format!("Invalid UTF-8: {e}"))
-                    .with_type(ErrorType::Runtime)
-                    .with_resource(ErrorResource::Core)
-            }),
-        }
+        self.data.as_hipstr().map(|s| s.to_string())
     }
 
-    /// Try to convert the content data to a UTF-8 string slice
+    /// Tries to convert the content data to a UTF-8 string slice.
     ///
     /// # Errors
     ///
     /// Returns an error if the content data contains invalid UTF-8 sequences.
     pub fn as_str(&self) -> Result<&str> {
-        match &self.data {
-            ContentBytes::Text(text) => Ok(text.as_str()),
-            ContentBytes::Binary(bytes) => std::str::from_utf8(bytes).map_err(|e| {
-                Error::new(format!("Invalid UTF-8: {e}"))
-                    .with_type(ErrorType::Runtime)
-                    .with_resource(ErrorResource::Core)
-            }),
-        }
+        std::str::from_utf8(self.data.as_bytes()).map_err(|e| {
+            Error::new(format!("Invalid UTF-8: {e}"))
+                .with_type(ErrorType::Runtime)
+                .with_resource(ErrorResource::Core)
+        })
     }
 
-    /// Compute SHA256 hash of the content
+    /// Converts to a `HipStr` if the content is valid UTF-8.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the content is not valid UTF-8.
+    pub fn as_hipstr(&self) -> Result<HipStr<'static>> {
+        self.data.as_hipstr()
+    }
+
+    /// Computes SHA256 hash of the content.
     fn compute_sha256_internal(&self) -> Bytes {
         let mut hasher = Sha256::new();
         hasher.update(self.data.as_bytes());
         Bytes::from(hasher.finalize().to_vec())
     }
 
-    /// Get the SHA256 hash, computing it if not already done
+    /// Returns the SHA256 hash, computing it if not already done.
+    #[must_use]
     pub fn sha256(&self) -> &Bytes {
         self.sha256_cache
             .get_or_init(|| self.compute_sha256_internal())
     }
 
-    /// Get the SHA256 hash as hex string
+    /// Returns the SHA256 hash as a hex string.
+    #[must_use]
     pub fn sha256_hex(&self) -> String {
         hex::encode(self.sha256())
     }
 
-    /// Verify the content against a provided SHA256 hash
+    /// Verifies the content against a provided SHA256 hash.
     ///
     /// # Errors
     ///
@@ -341,11 +342,12 @@ impl ContentData {
         }
     }
 
-    /// Get a slice of the content data
+    /// Returns a slice of the content data.
     ///
     /// # Errors
     ///
-    /// Returns an error if the end index is beyond the content length or if start is greater than end.
+    /// Returns an error if the end index is beyond the content length
+    /// or if start is greater than end.
     pub fn slice(&self, start: usize, end: usize) -> Result<Bytes> {
         let bytes = self.data.as_bytes();
         if end > bytes.len() {
@@ -367,13 +369,13 @@ impl ContentData {
         Ok(Bytes::copy_from_slice(&bytes[start..end]))
     }
 
-    /// Check if the content is empty
+    /// Returns `true` if the content is empty.
+    #[must_use]
     pub fn is_empty(&self) -> bool {
         self.data.is_empty()
     }
 }
 
-// Manual implementation of Clone since OnceLock doesn't propagate the computed value
 impl Clone for ContentData {
     fn clone(&self) -> Self {
         let new_lock = OnceLock::new();
@@ -390,7 +392,6 @@ impl Clone for ContentData {
     }
 }
 
-// Manual implementation of PartialEq
 impl PartialEq for ContentData {
     fn eq(&self, other: &Self) -> bool {
         self.content_source == other.content_source && self.data == other.data
@@ -399,7 +400,6 @@ impl PartialEq for ContentData {
 
 impl Eq for ContentData {}
 
-// Implement From conversions for common types
 impl From<&str> for ContentData {
     fn from(s: &str) -> Self {
         let source = ContentSource::new();
@@ -438,7 +438,7 @@ impl From<Bytes> for ContentData {
 impl From<HipStr<'static>> for ContentData {
     fn from(text: HipStr<'static>) -> Self {
         let source = ContentSource::new();
-        Self::from_text(source, text)
+        Self::from_text(source, text.to_string())
     }
 }
 
@@ -464,7 +464,6 @@ mod tests {
 
         assert_eq!(content.content_source, source);
         assert_eq!(content.size(), 13);
-        // Check that hash is not computed yet
         assert!(content.sha256_cache.get().is_none());
     }
 
@@ -473,26 +472,34 @@ mod tests {
         let source = ContentSource::new();
         let content = ContentData::from_text(source, "Hello, world!");
 
-        assert!(content.is_text());
-        assert!(!content.is_binary());
         assert_eq!(content.as_str().unwrap(), "Hello, world!");
     }
 
     #[test]
-    fn test_content_bytes_text() {
-        let text = ContentBytes::from("Hello");
-        assert!(text.is_text());
-        assert!(!text.is_binary());
-        assert_eq!(text.as_str(), Some("Hello"));
-        assert_eq!(text.len(), 5);
+    fn test_content_bytes_wrapper() {
+        let bytes = ContentBytes::from("Hello");
+        assert_eq!(bytes.as_str(), Some("Hello"));
+        assert_eq!(bytes.len(), 5);
+        assert!(!bytes.is_empty());
+    }
+
+    #[test]
+    fn test_content_bytes_as_hipstr() {
+        let bytes = ContentBytes::from("Hello, HipStr!");
+        let hipstr = bytes.as_hipstr().unwrap();
+        assert_eq!(hipstr.as_str(), "Hello, HipStr!");
+
+        // Test with invalid UTF-8
+        let invalid = ContentBytes::from(vec![0xFF, 0xFE]);
+        assert!(invalid.as_hipstr().is_err());
     }
 
     #[test]
     fn test_content_bytes_binary() {
         let binary = ContentBytes::from(vec![0xFF, 0xFE]);
-        assert!(binary.is_binary());
-        assert!(!binary.is_text());
         assert_eq!(binary.len(), 2);
+        assert!(binary.as_str().is_none());
+        assert!(!binary.is_likely_text());
     }
 
     #[test]
@@ -510,9 +517,8 @@ mod tests {
         let hash = content.sha256();
 
         assert!(content.sha256_cache.get().is_some());
-        assert_eq!(hash.len(), 32); // SHA256 is 32 bytes
+        assert_eq!(hash.len(), 32);
 
-        // Test getting cached hash
         let hash2 = content.sha256();
         assert_eq!(hash, hash2);
     }
@@ -522,10 +528,8 @@ mod tests {
         let content = ContentData::from("Hello, world!");
         let hash = content.sha256().clone();
 
-        // Should verify successfully against itself
         assert!(content.verify_sha256(&hash).is_ok());
 
-        // Should fail against different hash
         let wrong_hash = vec![0u8; 32];
         assert!(content.verify_sha256(&wrong_hash).is_err());
     }
@@ -541,6 +545,16 @@ mod tests {
         assert!(binary_content.as_str().is_err());
     }
 
+    #[test]
+    fn test_as_hipstr() {
+        let content = ContentData::from("Hello, HipStr!");
+        let hipstr = content.as_hipstr().unwrap();
+        assert_eq!(hipstr.as_str(), "Hello, HipStr!");
+
+        let binary_content = ContentData::from(vec![0xFF, 0xFE]);
+        assert!(binary_content.as_hipstr().is_err());
+    }
+
     #[test]
     fn test_is_likely_text() {
         let text_content = ContentData::from("Hello, world!");
@@ -560,7 +574,6 @@ mod tests {
         let slice = content.slice(7, 12).unwrap();
         assert_eq!(slice, Bytes::from("world"));
 
-        // Test bounds checking
         assert!(content.slice(0, 100).is_err());
         assert!(content.slice(10, 5).is_err());
     }
@@ -578,10 +591,6 @@ mod tests {
         assert_eq!(from_bytes.as_str().unwrap(), "test");
         assert_eq!(from_vec.as_str().unwrap(), "test");
         assert_eq!(from_bytes_type.as_str().unwrap(), "test");
-
-        // Text types should be stored as text
-        assert!(from_str.is_text());
-        assert!(from_string.is_text());
     }
 
     #[test]
@@ -596,12 +605,10 @@ mod tests {
     #[test]
     fn test_cloning_preserves_hash() {
         let original = ContentData::from("Hello, world!");
-        // Compute hash first
         let _ = original.sha256();
 
         let cloned = original.clone();
 
-        // Both should have the hash computed
         assert!(original.sha256_cache.get().is_some());
         assert!(cloned.sha256_cache.get().is_some());
         assert_eq!(original.sha256(), cloned.sha256());
@@ -612,7 +619,6 @@ mod tests {
         let original = ContentData::from("Hello, world!");
         let cloned = original.clone();
 
-        // They should be equal
         assert_eq!(original, cloned);
     }
 
@@ -645,7 +651,13 @@ mod tests {
     fn test_from_hipstr() {
         let hipstr = HipStr::from("Hello from HipStr");
         let content = ContentData::from(hipstr);
-        assert!(content.is_text());
         assert_eq!(content.as_str().unwrap(), "Hello from HipStr");
     }
+
+    #[test]
+    fn test_content_bytes_deref() {
+        let bytes = ContentBytes::from("Hello");
+        assert_eq!(&*bytes, b"Hello");
+        assert_eq!(bytes.as_ref(), b"Hello");
+    }
 }
diff --git a/crates/nvisy-core/src/io/data_reference.rs b/crates/nvisy-core/src/io/data_reference.rs
index f97eb1a..cf98854 100644
--- a/crates/nvisy-core/src/io/data_reference.rs
+++ b/crates/nvisy-core/src/io/data_reference.rs
@@ -83,8 +83,8 @@ impl DataReference {
     }
 
     /// Check if the content is text-based
-    pub fn is_text(&self) -> bool {
-        self.content.is_text()
+    pub fn is_likely_text(&self) -> bool {
+        self.content.is_likely_text()
     }
 
     /// Get the size of the content in bytes
@@ -104,7 +104,7 @@ mod tests {
         let content = Content::new(ContentData::from("Hello, world!"));
         let data_ref = DataReference::new(content);
 
-        assert!(data_ref.is_text());
+        assert!(data_ref.is_likely_text());
         assert!(data_ref.mapping_id().is_none());
         assert_eq!(data_ref.size(), 13);
         // Verify UUIDv7 is used
diff --git a/crates/nvisy-document/Cargo.toml b/crates/nvisy-document/Cargo.toml
index 4351dea..43a9dde 100644
--- a/crates/nvisy-document/Cargo.toml
+++ b/crates/nvisy-document/Cargo.toml
@@ -25,7 +25,7 @@ rustdoc-args = ["--cfg", "docsrs"]
 nvisy-core = { workspace = true }
 
 # Async runtime and I/O
-tokio = { workspace = true, features = ["sync", "io-util"] }
+tokio = { workspace = true, features = ["sync", "io-util", "fs"] }
 async-trait = { workspace = true }
 
 # Data types
diff --git a/crates/nvisy-document/src/conversion/mod.rs b/crates/nvisy-document/src/conversion/mod.rs
index 14d7efd..f63f1a5 100644
--- a/crates/nvisy-document/src/conversion/mod.rs
+++ b/crates/nvisy-document/src/conversion/mod.rs
@@ -10,7 +10,7 @@ use async_trait::async_trait;
 pub use options::{ConversionOptions, HtmlOptions, PageMargins, PageOrientation, PdfOptions};
 pub use types::{ConversionPath, ConversionResult, ConversionStep, FormatPair, SkippedElement};
 
-use crate::error::Result;
+use crate::Result;
 use crate::format::Document;
 
 /// Trait for document format conversion.
diff --git a/crates/nvisy-document/src/error.rs b/crates/nvisy-document/src/error.rs
deleted file mode 100644
index d5a2d77..0000000
--- a/crates/nvisy-document/src/error.rs
+++ /dev/null
@@ -1,358 +0,0 @@
-//! Error types for document processing.
-
-use std::fmt;
-
-use crate::format::region::RegionId;
-
-/// A boxed error type for wrapping source errors.
-pub type BoxError = Box<dyn std::error::Error + Send + Sync>;
-
-/// Result type for document processing.
-pub type Result<T> = std::result::Result<T, Error>;
-
-/// The error type for document processing.
-#[derive(Debug)]
-pub struct Error {
-    kind: ErrorKind,
-    source: Option<BoxError>,
-}
-
-/// The kind of error that occurred during document processing.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum ErrorKind {
-    /// The document format is not supported.
-    UnsupportedFormat { format: String },
-
-    /// The document could not be parsed.
-    Parse { message: String },
-
-    /// A referenced region was not found.
-    RegionNotFound { id: RegionId },
-
-    /// A referenced page was not found.
-    PageNotFound { page: u32 },
-
-    /// An I/O error occurred.
-    Io { message: String },
-
-    /// Serialization/deserialization error.
-    Serialization { message: String },
-
-    /// The processing was cancelled.
-    Cancelled,
-
-    /// A timeout occurred.
-    Timeout { duration_ms: u64 },
-
-    /// Resource limit exceeded.
-    ResourceLimit { resource: String },
-
-    /// Conversion error.
-    Conversion { message: String },
-
-    /// Metadata extraction error.
-    Metadata { message: String },
-
-    /// Thumbnail generation error.
-    Thumbnail { message: String },
-
-    /// Protected or encrypted document.
-    Protected { message: String },
-}
-
-impl Error {
-    /// Creates a new error with the given kind.
-    pub fn new(kind: ErrorKind) -> Self {
-        Self { kind, source: None }
-    }
-
-    /// Creates a new error with the given kind and source.
-    pub fn with_source(
-        kind: ErrorKind,
-        source: impl std::error::Error + Send + Sync + 'static,
-    ) -> Self {
-        Self {
-            kind,
-            source: Some(Box::new(source)),
-        }
-    }
-
-    /// Returns the kind of error.
-    pub fn kind(&self) -> &ErrorKind {
-        &self.kind
-    }
-
-    /// Consumes the error and returns the kind.
-    pub fn into_kind(self) -> ErrorKind {
-        self.kind
-    }
-
-    /// Returns true if this error is retriable.
-    pub fn is_retriable(&self) -> bool {
-        matches!(
-            self.kind,
-            ErrorKind::Timeout { .. } | ErrorKind::Io { .. } | ErrorKind::ResourceLimit { .. }
-        )
-    }
-
-    /// Returns true if this error indicates invalid user input.
-    pub fn is_user_error(&self) -> bool {
-        matches!(
-            self.kind,
-            ErrorKind::RegionNotFound { .. } | ErrorKind::PageNotFound { .. }
-        )
-    }
-
-    // Convenience constructors
-
-    /// Creates a parse error.
-    pub fn parse(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::Parse {
-            message: message.into(),
-        })
-    }
-
-    /// Creates a parse error with a source.
-    pub fn parse_with_source(
-        message: impl Into<String>,
-        source: impl std::error::Error + Send + Sync + 'static,
-    ) -> Self {
-        Self::with_source(
-            ErrorKind::Parse {
-                message: message.into(),
-            },
-            source,
-        )
-    }
-
-    /// Creates an unsupported format error.
-    pub fn unsupported_format(format: impl Into<String>) -> Self {
-        Self::new(ErrorKind::UnsupportedFormat {
-            format: format.into(),
-        })
-    }
-
-    /// Creates a region not found error.
-    pub fn region_not_found(id: RegionId) -> Self {
-        Self::new(ErrorKind::RegionNotFound { id })
-    }
-
-    /// Creates a page not found error.
-    pub fn page_not_found(page: u32) -> Self {
-        Self::new(ErrorKind::PageNotFound { page })
-    }
-
-    /// Creates an I/O error.
-    pub fn io(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::Io {
-            message: message.into(),
-        })
-    }
-
-    /// Creates an I/O error with a source.
-    pub fn io_with_source(
-        message: impl Into<String>,
-        source: impl std::error::Error + Send + Sync + 'static,
-    ) -> Self {
-        Self::with_source(
-            ErrorKind::Io {
-                message: message.into(),
-            },
-            source,
-        )
-    }
-
-    /// Creates a serialization error.
-    pub fn serialization(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::Serialization {
-            message: message.into(),
-        })
-    }
-
-    /// Creates a timeout error.
-    pub fn timeout(duration_ms: u64) -> Self {
-        Self::new(ErrorKind::Timeout { duration_ms })
-    }
-
-    /// Creates a resource limit error.
-    pub fn resource_limit(resource: impl Into<String>) -> Self {
-        Self::new(ErrorKind::ResourceLimit {
-            resource: resource.into(),
-        })
-    }
-
-    /// Creates a cancelled error.
-    pub fn cancelled() -> Self {
-        Self::new(ErrorKind::Cancelled)
-    }
-
-    /// Creates a conversion error.
-    pub fn conversion(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::Conversion {
-            message: message.into(),
-        })
-    }
-
-    /// Creates a conversion error with a source.
-    pub fn conversion_with_source(
-        message: impl Into<String>,
-        source: impl std::error::Error + Send + Sync + 'static,
-    ) -> Self {
-        Self::with_source(
-            ErrorKind::Conversion {
-                message: message.into(),
-            },
-            source,
-        )
-    }
-
-    /// Creates a metadata error.
-    pub fn metadata(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::Metadata {
-            message: message.into(),
-        })
-    }
-
-    /// Creates a metadata error with a source.
-    pub fn metadata_with_source(
-        message: impl Into<String>,
-        source: impl std::error::Error + Send + Sync + 'static,
-    ) -> Self {
-        Self::with_source(
-            ErrorKind::Metadata {
-                message: message.into(),
-            },
-            source,
-        )
-    }
-
-    /// Creates a thumbnail error.
-    pub fn thumbnail(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::Thumbnail {
-            message: message.into(),
-        })
-    }
-
-    /// Creates a thumbnail error with a source.
-    pub fn thumbnail_with_source(
-        message: impl Into<String>,
-        source: impl std::error::Error + Send + Sync + 'static,
-    ) -> Self {
-        Self::with_source(
-            ErrorKind::Thumbnail {
-                message: message.into(),
-            },
-            source,
-        )
-    }
-
-    /// Creates a protected document error.
-    pub fn protected(message: impl Into<String>) -> Self {
-        Self::new(ErrorKind::Protected {
-            message: message.into(),
-        })
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match &self.kind {
-            ErrorKind::UnsupportedFormat { format } => write!(f, "unsupported format: {format}"),
-            ErrorKind::Parse { message } => write!(f, "parse error: {message}"),
-            ErrorKind::RegionNotFound { id } => write!(f, "region not found: {id}"),
-            ErrorKind::PageNotFound { page } => write!(f, "page not found: {page}"),
-            ErrorKind::Io { message } => write!(f, "I/O error: {message}"),
-            ErrorKind::Serialization { message } => write!(f, "serialization error: {message}"),
-            ErrorKind::Cancelled => write!(f, "processing cancelled"),
-            ErrorKind::Timeout { duration_ms } => {
-                write!(f, "processing timed out after {duration_ms}ms")
-            }
-            ErrorKind::ResourceLimit { resource } => {
-                write!(f, "resource limit exceeded: {resource}")
-            }
-            ErrorKind::Conversion { message } => write!(f, "conversion error: {message}"),
-            ErrorKind::Metadata { message } => write!(f, "metadata error: {message}"),
-            ErrorKind::Thumbnail { message } => write!(f, "thumbnail error: {message}"),
-            ErrorKind::Protected { message } => write!(f, "protected document: {message}"),
-        }
-    }
-}
-
-impl std::error::Error for Error {
-    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
-        self.source
-            .as_ref()
-            .map(|e| e.as_ref() as &(dyn std::error::Error + 'static))
-    }
-}
-
-impl From<std::io::Error> for Error {
-    fn from(error: std::io::Error) -> Self {
-        Self::io_with_source(error.to_string(), error)
-    }
-}
-
-impl From<ErrorKind> for Error {
-    fn from(kind: ErrorKind) -> Self {
-        Self::new(kind)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::error::Error as StdError;
-
-    use super::*;
-
-    #[test]
-    fn test_error_display() {
-        let err = Error::region_not_found(RegionId::new());
-        let msg = err.to_string();
-        assert!(msg.contains("region not found"));
-    }
-
-    #[test]
-    fn test_error_kind() {
-        let err = Error::timeout(1000);
-        assert!(matches!(
-            err.kind(),
-            ErrorKind::Timeout { duration_ms: 1000 }
-        ));
-    }
-
-    #[test]
-    fn test_error_is_retriable() {
-        assert!(Error::timeout(1000).is_retriable());
-        assert!(Error::io("failed").is_retriable());
-        assert!(!Error::region_not_found(RegionId::new()).is_retriable());
-    }
-
-    #[test]
-    fn test_error_is_user_error() {
-        assert!(Error::region_not_found(RegionId::new()).is_user_error());
-        assert!(Error::page_not_found(5).is_user_error());
-        assert!(!Error::timeout(1000).is_user_error());
-    }
-
-    #[test]
-    fn test_from_io_error() {
-        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found");
-        let err: Error = io_err.into();
-        assert!(matches!(err.kind(), ErrorKind::Io { .. }));
-        assert!(StdError::source(&err).is_some());
-    }
-
-    #[test]
-    fn test_error_with_source() {
-        let source = std::io::Error::other("underlying error");
-        let err = Error::parse_with_source("failed to parse", source);
-        assert!(StdError::source(&err).is_some());
-    }
-
-    #[test]
-    fn test_from_error_kind() {
-        let kind = ErrorKind::Cancelled;
-        let err: Error = kind.into();
-        assert!(matches!(err.kind(), ErrorKind::Cancelled));
-    }
-}
diff --git a/crates/nvisy-document/src/format/mod.rs b/crates/nvisy-document/src/format/mod.rs
index 58b1fa1..aa6915a 100644
--- a/crates/nvisy-document/src/format/mod.rs
+++ b/crates/nvisy-document/src/format/mod.rs
@@ -12,6 +12,7 @@ mod page;
 pub mod region;
 
 use std::future::Future;
+use std::path::Path;
 
 use async_trait::async_trait;
 use bytes::Bytes;
@@ -19,10 +20,11 @@ pub use capabilities::{
     Capabilities, MetadataCapabilities, StructureCapabilities, TextCapabilities,
 };
 pub use info::DocumentInfo;
+pub use nvisy_core::io::ContentData;
 pub use page::PageOptions;
 pub use region::{BoundingBox, Point, Region, RegionId, RegionKind, RegionSource, RegionStatus};
 
-use crate::error::Result;
+use crate::{Error, Result};
 
 /// Trait for document format handlers with an associated Document type.
 ///
@@ -45,8 +47,29 @@ pub trait DocumentFormat: Send + Sync {
     /// Returns the capabilities of this format.
     fn capabilities(&self) -> &Capabilities;
 
-    /// Loads a document from bytes.
-    fn load(&self, data: Bytes) -> impl Future<Output = Result<Self::Document>> + Send;
+    /// Loads a document from content data.
+    ///
+    /// The `ContentData` provides the raw bytes along with source tracking
+    /// and content metadata (size, hash, text/binary detection).
+    fn load(&self, data: ContentData) -> impl Future<Output = Result<Self::Document>> + Send;
+
+    /// Loads a document from a file path.
+    ///
+    /// This is a convenience method that reads the file and calls [`Self::load`].
+    fn load_file(
+        &self,
+        path: impl AsRef<Path> + Send,
+    ) -> impl Future<Output = Result<Self::Document>> + Send {
+        async move {
+            let path = path.as_ref();
+            let bytes = tokio::fs::read(path).await.map_err(|e| {
+                Error::from_source(format!("Failed to read file: {}", path.display()), e)
+                    .with_resource(crate::ErrorResource::Document)
+            })?;
+            let data = ContentData::from(bytes);
+            self.load(data).await
+        }
+    }
 }
 
 /// A loaded document instance (read-only access).
diff --git a/crates/nvisy-document/src/lib.rs b/crates/nvisy-document/src/lib.rs
index ec53667..18560e8 100644
--- a/crates/nvisy-document/src/lib.rs
+++ b/crates/nvisy-document/src/lib.rs
@@ -3,7 +3,6 @@
 #![doc = include_str!("../README.md")]
 
 // Core modules
-pub mod error;
 pub mod format;
 
 // Extension trait modules
@@ -13,37 +12,24 @@ pub mod table;
 pub mod text;
 pub mod thumbnail;
 
-// Error re-exports
-pub use error::{BoxError, Error, ErrorKind, Result};
+// Error re-exports from nvisy-core
+pub use nvisy_core::error::{BoxError, Error, ErrorResource, ErrorType, Result};
 
-// Region re-exports (from format::region)
+pub use conversion::{
+    Conversion, ConversionOptions, ConversionPath, ConversionResult, ConversionStep, FormatPair,
+    HtmlOptions, PageMargins, PageOrientation, PdfOptions, SkippedElement,
+};
 pub use format::region::{
     BoundingBox, Point, Region, RegionId, RegionKind, RegionSource, RegionStatus,
 };
-
-// Format re-exports
 pub use format::{
-    Capabilities, Document, DocumentFormat, DocumentInfo, MetadataCapabilities, PageOptions,
-    StructureCapabilities, TextCapabilities,
-};
-
-// Conversion re-exports
-pub use conversion::{
-    Conversion, ConversionOptions, ConversionPath, ConversionResult, ConversionStep, FormatPair,
-    HtmlOptions, PageMargins, PageOrientation, PdfOptions, SkippedElement,
+    Capabilities, ContentData, Document, DocumentFormat, DocumentInfo, MetadataCapabilities,
+    PageOptions, StructureCapabilities, TextCapabilities,
 };
-
-// Metadata re-exports
 pub use metadata::{
     CustomProperty, DocumentMetadata, Metadata, MetadataExtractOptions, MetadataField,
     PropertyValue,
 };
-
-// Table re-exports
 pub use table::{CellDataType, NormalizedCell, NormalizedRow, NormalizedTable, TableExtractor};
-
-// Text re-exports
 pub use text::{ExtractedText, TextExtractor};
-
-// Thumbnail re-exports
 pub use thumbnail::{ImageFormat, Thumbnail, ThumbnailGenerator, ThumbnailOptions, ThumbnailSize};
diff --git a/crates/nvisy-document/src/metadata/mod.rs b/crates/nvisy-document/src/metadata/mod.rs
index 80e7568..d2e0c5e 100644
--- a/crates/nvisy-document/src/metadata/mod.rs
+++ b/crates/nvisy-document/src/metadata/mod.rs
@@ -10,7 +10,7 @@ use async_trait::async_trait;
 pub use extract::MetadataExtractOptions;
 pub use types::{CustomProperty, DocumentMetadata, MetadataField, PropertyValue};
 
-use crate::error::Result;
+use crate::Result;
 use crate::format::Document;
 
 /// Trait for document metadata extraction and manipulation.
diff --git a/crates/nvisy-document/src/table/mod.rs b/crates/nvisy-document/src/table/mod.rs
index 44b24f9..80f82aa 100644
--- a/crates/nvisy-document/src/table/mod.rs
+++ b/crates/nvisy-document/src/table/mod.rs
@@ -8,7 +8,7 @@ mod types;
 use async_trait::async_trait;
 pub use types::{CellDataType, NormalizedCell, NormalizedRow, NormalizedTable};
 
-use crate::error::Result;
+use crate::Result;
 use crate::format::{Document, Region, RegionKind};
 
 /// Trait for document table extraction and normalization.
diff --git a/crates/nvisy-document/src/text/mod.rs b/crates/nvisy-document/src/text/mod.rs
index cfcdc25..319380d 100644
--- a/crates/nvisy-document/src/text/mod.rs
+++ b/crates/nvisy-document/src/text/mod.rs
@@ -8,7 +8,7 @@ mod types;
 use async_trait::async_trait;
 pub use types::ExtractedText;
 
-use crate::error::Result;
+use crate::Result;
 use crate::format::Document;
 
 /// Trait for document text extraction.
diff --git a/crates/nvisy-document/src/thumbnail/mod.rs b/crates/nvisy-document/src/thumbnail/mod.rs
index 7db8f4a..2099b96 100644
--- a/crates/nvisy-document/src/thumbnail/mod.rs
+++ b/crates/nvisy-document/src/thumbnail/mod.rs
@@ -10,7 +10,7 @@ use async_trait::async_trait;
 pub use options::ThumbnailOptions;
 pub use types::{ImageFormat, Thumbnail, ThumbnailSize};
 
-use crate::error::Result;
+use crate::Result;
 use crate::format::Document;
 
 /// Trait for document thumbnail generation.
diff --git a/crates/nvisy-docx/src/document.rs b/crates/nvisy-docx/src/document.rs
index cee2059..c82a00c 100644
--- a/crates/nvisy-docx/src/document.rs
+++ b/crates/nvisy-docx/src/document.rs
@@ -49,8 +49,6 @@ impl Document for DocxDocument {
 
     async fn to_bytes(&self) -> Result<Bytes> {
         // TODO: Implement DOCX serialization
-        Err(Error::unsupported_format(
-            "DOCX serialization not yet implemented",
-        ))
+        Err(Error::new("DOCX serialization not yet implemented"))
     }
 }
diff --git a/crates/nvisy-docx/src/format.rs b/crates/nvisy-docx/src/format.rs
index b4a1287..ba50278 100644
--- a/crates/nvisy-docx/src/format.rs
+++ b/crates/nvisy-docx/src/format.rs
@@ -1,7 +1,6 @@
 //! DOCX format handler implementation.
 
-use bytes::Bytes;
-use nvisy_document::{Capabilities, DocumentFormat, Error, Result};
+use nvisy_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
 
 use crate::DocxDocument;
 
@@ -40,11 +39,9 @@ impl DocumentFormat for DocxFormat {
         &self.capabilities
     }
 
-    async fn load(&self, _data: Bytes) -> Result<Self::Document> {
+    async fn load(&self, _data: ContentData) -> Result<Self::Document> {
         // TODO: Implement DOCX loading
-        Err(Error::unsupported_format(
-            "DOCX loading not yet implemented",
-        ))
+        Err(Error::new("DOCX loading not yet implemented"))
     }
 }
 
diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml
index f22977a..5fb8c27 100644
--- a/crates/nvisy-engine/Cargo.toml
+++ b/crates/nvisy-engine/Cargo.toml
@@ -21,16 +21,18 @@ all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
 
 [features]
-default = ["pdf", "docx", "text"]
+default = ["pdf", "docx", "text", "image"]
 pdf = ["dep:nvisy-pdf"]
 docx = ["dep:nvisy-docx"]
 text = ["dep:nvisy-text"]
+image = ["dep:nvisy-image"]
 
 [dependencies]
 # Internal crates
 nvisy-archive = { workspace = true }
 nvisy-document = { workspace = true }
 nvisy-docx = { workspace = true, optional = true }
+nvisy-image = { workspace = true, optional = true }
 nvisy-pdf = { workspace = true, optional = true }
 nvisy-text = { workspace = true, optional = true }
 
diff --git a/crates/nvisy-engine/src/engine/mod.rs b/crates/nvisy-engine/src/engine/mod.rs
index f16805d..5b47267 100644
--- a/crates/nvisy-engine/src/engine/mod.rs
+++ b/crates/nvisy-engine/src/engine/mod.rs
@@ -7,9 +7,8 @@ mod config;
 
 use std::path::Path;
 
-use bytes::Bytes;
 pub use config::EngineConfig;
-use nvisy_document::Result;
+use nvisy_document::{ContentData, Result};
 
 use crate::registry::{BoxDocument, FormatRegistry};
 
@@ -118,7 +117,7 @@ impl Engine {
     /// # Errors
     ///
     /// Returns an error if the extension is not supported or loading fails.
-    pub async fn load_by_extension(&self, ext: &str, data: Bytes) -> Result<BoxDocument> {
+    pub async fn load_by_extension(&self, ext: &str, data: ContentData) -> Result<BoxDocument> {
         self.registry.load_by_extension(ext, data).await
     }
 
@@ -127,7 +126,7 @@ impl Engine {
     /// # Errors
     ///
     /// Returns an error if the MIME type is not supported or loading fails.
-    pub async fn load_by_mime(&self, mime: &str, data: Bytes) -> Result<BoxDocument> {
+    pub async fn load_by_mime(&self, mime: &str, data: ContentData) -> Result<BoxDocument> {
         self.registry.load_by_mime(mime, data).await
     }
 
@@ -243,13 +242,13 @@ mod tests {
         let engine = Engine::new();
 
         let doc = engine
-            .load_by_extension("json", Bytes::from(r#"{"key": "value"}"#))
+            .load_by_extension("json", ContentData::from(r#"{"key": "value"}"#))
             .await
             .unwrap();
         assert!(!doc.regions().is_empty());
 
         let doc = engine
-            .load_by_extension("md", Bytes::from("# Title\n\nParagraph"))
+            .load_by_extension("md", ContentData::from("# Title\n\nParagraph"))
             .await
             .unwrap();
         assert!(!doc.regions().is_empty());
@@ -261,7 +260,7 @@ mod tests {
         let engine = Engine::new();
 
         let doc = engine
-            .load_by_mime("application/json", Bytes::from(r#"{"key": "value"}"#))
+            .load_by_mime("application/json", ContentData::from(r#"{"key": "value"}"#))
             .await
             .unwrap();
         assert!(!doc.regions().is_empty());
diff --git a/crates/nvisy-engine/src/registry/mod.rs b/crates/nvisy-engine/src/registry/mod.rs
index 514a50d..fc3d854 100644
--- a/crates/nvisy-engine/src/registry/mod.rs
+++ b/crates/nvisy-engine/src/registry/mod.rs
@@ -7,8 +7,7 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use bytes::Bytes;
-use nvisy_document::{Capabilities, Document, Error, Result};
+use nvisy_document::{Capabilities, ContentData, Document, Error, Result};
 
 /// A type-erased document that can be used for common operations.
 pub type BoxDocument = Box<dyn Document + Send + Sync>;
@@ -30,10 +29,10 @@ pub trait AnyFormat: Send + Sync {
     /// Returns the format capabilities.
     fn capabilities(&self) -> &Capabilities;
 
-    /// Loads a document from bytes, returning a type-erased document.
+    /// Loads a document from content data, returning a type-erased document.
     fn load_boxed(
         &self,
-        data: Bytes,
+        data: ContentData,
     ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<BoxDocument>> + Send + '_>>;
 }
 
@@ -65,7 +64,7 @@ where
 
     fn load_boxed(
         &self,
-        data: Bytes,
+        data: ContentData,
     ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<BoxDocument>> + Send + '_>> {
         Box::pin(async move {
             let doc = nvisy_document::DocumentFormat::load(&self.inner, data).await?;
@@ -147,6 +146,12 @@ impl FormatRegistry {
             self.register(nvisy_text::TomlFormat::new());
             self.register(nvisy_text::IniFormat::new());
         }
+
+        #[cfg(feature = "image")]
+        {
+            self.register(nvisy_image::JpegFormat::new());
+            self.register(nvisy_image::PngFormat::new());
+        }
     }
 
     /// Registers a format handler.
@@ -235,14 +240,14 @@ impl FormatRegistry {
     /// Returns an error if:
     /// - The extension is not supported
     /// - The document fails to load
-    pub async fn load_by_extension(&self, ext: &str, data: Bytes) -> Result<BoxDocument> {
+    pub async fn load_by_extension(&self, ext: &str, data: ContentData) -> Result<BoxDocument> {
         let ext_lower = ext.trim_start_matches('.').to_lowercase();
 
         let format = self
             .by_extension
             .get(ext_lower.as_str())
             .and_then(|&idx| self.formats.get(idx))
-            .ok_or_else(|| Error::unsupported_format(format!("Unsupported extension: {}", ext)))?;
+            .ok_or_else(|| Error::new(format!("Unsupported extension: {}", ext)))?;
 
         format.format.load_boxed(data).await
     }
@@ -254,14 +259,14 @@ impl FormatRegistry {
     /// Returns an error if:
     /// - The MIME type is not supported
     /// - The document fails to load
-    pub async fn load_by_mime(&self, mime: &str, data: Bytes) -> Result<BoxDocument> {
+    pub async fn load_by_mime(&self, mime: &str, data: ContentData) -> Result<BoxDocument> {
         let mime_lower = mime.to_lowercase();
 
         let format = self
             .by_mime
             .get(mime_lower.as_str())
             .and_then(|&idx| self.formats.get(idx))
-            .ok_or_else(|| Error::unsupported_format(format!("Unsupported MIME type: {}", mime)))?;
+            .ok_or_else(|| Error::new(format!("Unsupported MIME type: {}", mime)))?;
 
         format.format.load_boxed(data).await
     }
@@ -283,12 +288,13 @@ impl FormatRegistry {
         let ext = path
             .extension()
             .and_then(|e| e.to_str())
-            .ok_or_else(|| Error::unsupported_format("File has no extension"))?;
+            .ok_or_else(|| Error::new("File has no extension"))?;
 
-        let data = std::fs::read(path)
-            .map_err(|e| Error::io(format!("Failed to read file '{}': {}", path.display(), e)))?;
+        let data = std::fs::read(path).map_err(|e| {
+            Error::from_source(format!("Failed to read file '{}'", path.display()), e)
+        })?;
 
-        self.load_by_extension(ext, Bytes::from(data)).await
+        self.load_by_extension(ext, ContentData::from(data)).await
     }
 }
 
@@ -347,7 +353,7 @@ mod tests {
         let registry = FormatRegistry::with_defaults();
 
         let doc = registry
-            .load_by_extension("json", Bytes::from(r#"{"key": "value"}"#))
+            .load_by_extension("json", ContentData::from(r#"{"key": "value"}"#))
             .await
             .unwrap();
 
@@ -360,7 +366,7 @@ mod tests {
         let registry = FormatRegistry::with_defaults();
 
         let doc = registry
-            .load_by_mime("application/json", Bytes::from(r#"{"key": "value"}"#))
+            .load_by_mime("application/json", ContentData::from(r#"{"key": "value"}"#))
             .await
             .unwrap();
 
diff --git a/crates/nvisy-image/src/document.rs b/crates/nvisy-image/src/documents/jpeg.rs
similarity index 69%
rename from crates/nvisy-image/src/document.rs
rename to crates/nvisy-image/src/documents/jpeg.rs
index 092b45c..df75644 100644
--- a/crates/nvisy-image/src/document.rs
+++ b/crates/nvisy-image/src/documents/jpeg.rs
@@ -1,22 +1,22 @@
-//! Image document implementation.
+//! JPEG document implementation.
 
 use async_trait::async_trait;
 use bytes::Bytes;
 use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
 
-/// A loaded image document.
+/// A loaded JPEG document.
 #[derive(Debug)]
-pub struct ImageDocument {
+pub struct JpegDocument {
     info: DocumentInfo,
     regions: Vec<Region>,
     #[allow(dead_code)]
     data: Bytes,
 }
 
-impl ImageDocument {
-    /// Creates a new image document (internal use).
+impl JpegDocument {
+    /// Creates a new JPEG document (internal use).
     #[must_use]
-    #[allow(dead_code)] // Will be used when load() is implemented
+    #[allow(dead_code)]
     pub(crate) fn new(info: DocumentInfo, data: Bytes) -> Self {
         Self {
             info,
@@ -27,7 +27,7 @@ impl ImageDocument {
 }
 
 #[async_trait]
-impl Document for ImageDocument {
+impl Document for JpegDocument {
     fn info(&self) -> &DocumentInfo {
         &self.info
     }
@@ -48,9 +48,7 @@ impl Document for ImageDocument {
     }
 
     async fn to_bytes(&self) -> Result<Bytes> {
-        // TODO: Implement image serialization
-        Err(Error::unsupported_format(
-            "Image serialization not yet implemented",
-        ))
+        // TODO: Implement JPEG serialization
+        Err(Error::new("JPEG serialization not yet implemented"))
     }
 }
diff --git a/crates/nvisy-image/src/documents/mod.rs b/crates/nvisy-image/src/documents/mod.rs
new file mode 100644
index 0000000..d2ab5e1
--- /dev/null
+++ b/crates/nvisy-image/src/documents/mod.rs
@@ -0,0 +1,7 @@
+//! Image document implementations.
+
+mod jpeg;
+mod png;
+
+pub use jpeg::JpegDocument;
+pub use png::PngDocument;
diff --git a/crates/nvisy-image/src/documents/png.rs b/crates/nvisy-image/src/documents/png.rs
new file mode 100644
index 0000000..b8ca50e
--- /dev/null
+++ b/crates/nvisy-image/src/documents/png.rs
@@ -0,0 +1,54 @@
+//! PNG document implementation.
+
+use async_trait::async_trait;
+use bytes::Bytes;
+use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
+
+/// A loaded PNG document.
+#[derive(Debug)]
+pub struct PngDocument {
+    info: DocumentInfo,
+    regions: Vec<Region>,
+    #[allow(dead_code)]
+    data: Bytes,
+}
+
+impl PngDocument {
+    /// Creates a new PNG document (internal use).
+    #[must_use]
+    #[allow(dead_code)]
+    pub(crate) fn new(info: DocumentInfo, data: Bytes) -> Self {
+        Self {
+            info,
+            regions: Vec::new(),
+            data,
+        }
+    }
+}
+
+#[async_trait]
+impl Document for PngDocument {
+    fn info(&self) -> &DocumentInfo {
+        &self.info
+    }
+
+    fn regions(&self) -> &[Region] {
+        &self.regions
+    }
+
+    fn regions_for_page(&self, page: u32) -> Vec<&Region> {
+        self.regions
+            .iter()
+            .filter(|r| r.page.map(|p| p.get()) == Some(page))
+            .collect()
+    }
+
+    fn find_region(&self, id: RegionId) -> Option<&Region> {
+        self.regions.iter().find(|r| r.id == id)
+    }
+
+    async fn to_bytes(&self) -> Result<Bytes> {
+        // TODO: Implement PNG serialization
+        Err(Error::new("PNG serialization not yet implemented"))
+    }
+}
diff --git a/crates/nvisy-image/src/format.rs b/crates/nvisy-image/src/format.rs
deleted file mode 100644
index e479706..0000000
--- a/crates/nvisy-image/src/format.rs
+++ /dev/null
@@ -1,82 +0,0 @@
-//! Image format handler implementation.
-
-use bytes::Bytes;
-use nvisy_document::{Capabilities, DocumentFormat, Error, Result};
-
-use crate::ImageDocument;
-
-/// Image document format handler.
-#[derive(Debug, Clone, Default)]
-pub struct ImageFormat {
-    capabilities: Capabilities,
-}
-
-impl ImageFormat {
-    /// Creates a new image format handler.
-    #[must_use]
-    pub fn new() -> Self {
-        Self {
-            capabilities: Capabilities::image(),
-        }
-    }
-}
-
-impl DocumentFormat for ImageFormat {
-    type Document = ImageDocument;
-
-    fn name(&self) -> &'static str {
-        "image"
-    }
-
-    fn mime_types(&self) -> &'static [&'static str] {
-        &[
-            "image/png",
-            "image/jpeg",
-            "image/gif",
-            "image/webp",
-            "image/bmp",
-            "image/tiff",
-        ]
-    }
-
-    fn extensions(&self) -> &'static [&'static str] {
-        &["png", "jpg", "jpeg", "gif", "webp", "bmp", "tiff", "tif"]
-    }
-
-    fn capabilities(&self) -> &Capabilities {
-        &self.capabilities
-    }
-
-    async fn load(&self, _data: Bytes) -> Result<Self::Document> {
-        // TODO: Implement image loading
-        Err(Error::unsupported_format(
-            "Image loading not yet implemented",
-        ))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_format_metadata() {
-        let format = ImageFormat::new();
-        assert_eq!(format.name(), "image");
-        assert!(format.mime_types().contains(&"image/png"));
-        assert!(format.mime_types().contains(&"image/jpeg"));
-        assert!(format.extensions().contains(&"png"));
-        assert!(format.extensions().contains(&"jpg"));
-    }
-
-    #[test]
-    fn test_capabilities() {
-        let format = ImageFormat::new();
-        let caps = format.capabilities();
-
-        assert!(!caps.text.can_extract);
-        assert!(caps.text.may_need_ocr);
-        assert!(!caps.structure.has_pages);
-        assert!(caps.metadata.can_extract); // EXIF support
-    }
-}
diff --git a/crates/nvisy-image/src/formats/jpeg.rs b/crates/nvisy-image/src/formats/jpeg.rs
new file mode 100644
index 0000000..8a0cb81
--- /dev/null
+++ b/crates/nvisy-image/src/formats/jpeg.rs
@@ -0,0 +1,71 @@
+//! JPEG format handler implementation.
+
+use nvisy_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
+
+use crate::documents::JpegDocument;
+
+/// JPEG image format handler.
+#[derive(Debug, Clone, Default)]
+pub struct JpegFormat {
+    capabilities: Capabilities,
+}
+
+impl JpegFormat {
+    /// Creates a new JPEG format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities::image(),
+        }
+    }
+}
+
+impl DocumentFormat for JpegFormat {
+    type Document = JpegDocument;
+
+    fn name(&self) -> &'static str {
+        "jpeg"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["image/jpeg"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["jpg", "jpeg"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, _data: ContentData) -> Result<Self::Document> {
+        // TODO: Implement JPEG loading
+        Err(Error::new("JPEG loading not yet implemented"))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = JpegFormat::new();
+        assert_eq!(format.name(), "jpeg");
+        assert!(format.mime_types().contains(&"image/jpeg"));
+        assert!(format.extensions().contains(&"jpg"));
+        assert!(format.extensions().contains(&"jpeg"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = JpegFormat::new();
+        let caps = format.capabilities();
+
+        assert!(!caps.text.can_extract);
+        assert!(caps.text.may_need_ocr);
+        assert!(!caps.structure.has_pages);
+        assert!(caps.metadata.can_extract);
+    }
+}
diff --git a/crates/nvisy-image/src/formats/mod.rs b/crates/nvisy-image/src/formats/mod.rs
new file mode 100644
index 0000000..aac0ecf
--- /dev/null
+++ b/crates/nvisy-image/src/formats/mod.rs
@@ -0,0 +1,7 @@
+//! Image format handlers.
+
+mod jpeg;
+mod png;
+
+pub use jpeg::JpegFormat;
+pub use png::PngFormat;
diff --git a/crates/nvisy-image/src/formats/png.rs b/crates/nvisy-image/src/formats/png.rs
new file mode 100644
index 0000000..93572fe
--- /dev/null
+++ b/crates/nvisy-image/src/formats/png.rs
@@ -0,0 +1,70 @@
+//! PNG format handler implementation.
+
+use nvisy_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
+
+use crate::documents::PngDocument;
+
+/// PNG image format handler.
+#[derive(Debug, Clone, Default)]
+pub struct PngFormat {
+    capabilities: Capabilities,
+}
+
+impl PngFormat {
+    /// Creates a new PNG format handler.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            capabilities: Capabilities::image(),
+        }
+    }
+}
+
+impl DocumentFormat for PngFormat {
+    type Document = PngDocument;
+
+    fn name(&self) -> &'static str {
+        "png"
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        &["image/png"]
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        &["png"]
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        &self.capabilities
+    }
+
+    async fn load(&self, _data: ContentData) -> Result<Self::Document> {
+        // TODO: Implement PNG loading
+        Err(Error::new("PNG loading not yet implemented"))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_format_metadata() {
+        let format = PngFormat::new();
+        assert_eq!(format.name(), "png");
+        assert!(format.mime_types().contains(&"image/png"));
+        assert!(format.extensions().contains(&"png"));
+    }
+
+    #[test]
+    fn test_capabilities() {
+        let format = PngFormat::new();
+        let caps = format.capabilities();
+
+        assert!(!caps.text.can_extract);
+        assert!(caps.text.may_need_ocr);
+        assert!(!caps.structure.has_pages);
+        assert!(caps.metadata.can_extract);
+    }
+}
diff --git a/crates/nvisy-image/src/lib.rs b/crates/nvisy-image/src/lib.rs
index 4b9608e..f6217f4 100644
--- a/crates/nvisy-image/src/lib.rs
+++ b/crates/nvisy-image/src/lib.rs
@@ -2,8 +2,8 @@
 #![cfg_attr(docsrs, feature(doc_cfg))]
 #![doc = include_str!("../README.md")]
 
-mod document;
-mod format;
+pub mod documents;
+pub mod formats;
 
-pub use document::ImageDocument;
-pub use format::ImageFormat;
+pub use documents::{JpegDocument, PngDocument};
+pub use formats::{JpegFormat, PngFormat};
diff --git a/crates/nvisy-pdf/src/document.rs b/crates/nvisy-pdf/src/document.rs
index 71ad404..d74514f 100644
--- a/crates/nvisy-pdf/src/document.rs
+++ b/crates/nvisy-pdf/src/document.rs
@@ -49,8 +49,6 @@ impl Document for PdfDocument {
 
     async fn to_bytes(&self) -> Result<Bytes> {
         // TODO: Implement PDF serialization
-        Err(Error::unsupported_format(
-            "PDF serialization not yet implemented",
-        ))
+        Err(Error::new("PDF serialization not yet implemented"))
     }
 }
diff --git a/crates/nvisy-pdf/src/format.rs b/crates/nvisy-pdf/src/format.rs
index f36167f..7f3904e 100644
--- a/crates/nvisy-pdf/src/format.rs
+++ b/crates/nvisy-pdf/src/format.rs
@@ -1,7 +1,6 @@
 //! PDF format handler implementation.
 
-use bytes::Bytes;
-use nvisy_document::{Capabilities, DocumentFormat, Error, Result};
+use nvisy_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
 
 use crate::PdfDocument;
 
@@ -40,9 +39,9 @@ impl DocumentFormat for PdfFormat {
         &self.capabilities
     }
 
-    async fn load(&self, _data: Bytes) -> Result<Self::Document> {
+    async fn load(&self, _data: ContentData) -> Result<Self::Document> {
         // TODO: Implement PDF loading
-        Err(Error::unsupported_format("PDF loading not yet implemented"))
+        Err(Error::new("PDF loading not yet implemented"))
     }
 }
 
diff --git a/crates/nvisy-text/README.md b/crates/nvisy-text/README.md
index 10590d2..7c14a4d 100644
--- a/crates/nvisy-text/README.md
+++ b/crates/nvisy-text/README.md
@@ -18,12 +18,11 @@ various text-based file formats:
 
 ```rust
 use nvisy_text::{PlainTextFormat, PlainTextDocument};
-use nvisy_document::{DocumentFormat, Document, TextExtractor};
-use bytes::Bytes;
+use nvisy_document::{ContentData, DocumentFormat, Document, TextExtractor};
 
 # tokio_test::block_on(async {
 let format = PlainTextFormat::new();
-let data = Bytes::from("Hello, world!\n\nThis is a paragraph.");
+let data = ContentData::from("Hello, world!\n\nThis is a paragraph.");
 
 let doc = format.load(data).await.unwrap();
 assert_eq!(doc.regions().len(), 2);
diff --git a/crates/nvisy-text/src/documents/json.rs b/crates/nvisy-text/src/documents/json.rs
index e7c6603..e82360a 100644
--- a/crates/nvisy-text/src/documents/json.rs
+++ b/crates/nvisy-text/src/documents/json.rs
@@ -23,7 +23,7 @@ impl JsonDocument {
     /// Creates a new JSON document from content.
     pub fn new(content: String) -> Result<Self> {
         let parsed: Value = serde_json::from_str(&content)
-            .map_err(|e| nvisy_document::Error::parse(format!("Invalid JSON: {e}")))?;
+            .map_err(|e| nvisy_document::Error::new(format!("Invalid JSON: {e}")))?;
 
         let regions = Self::extract_regions(&parsed);
         let size = content.len() as u64;
diff --git a/crates/nvisy-text/src/formats/csv.rs b/crates/nvisy-text/src/formats/csv.rs
index e5533a7..66b77b2 100644
--- a/crates/nvisy-text/src/formats/csv.rs
+++ b/crates/nvisy-text/src/formats/csv.rs
@@ -1,8 +1,7 @@
 //! CSV format handler.
 
-use bytes::Bytes;
 use nvisy_document::{
-    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+    Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
 use crate::documents::CsvDocument;
@@ -60,8 +59,8 @@ impl DocumentFormat for CsvFormat {
         &self.capabilities
     }
 
-    async fn load(&self, data: Bytes) -> Result<Self::Document> {
-        let content = String::from_utf8_lossy(&data).into_owned();
+    async fn load(&self, data: ContentData) -> Result<Self::Document> {
+        let content = data.as_string()?;
         // Auto-detect delimiter
         let first_line = content.lines().next().unwrap_or("");
         let delimiter = if first_line.contains('\t') {
@@ -98,7 +97,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_csv() {
         let format = CsvFormat::new();
-        let data = Bytes::from("a,b,c\n1,2,3");
+        let data = ContentData::from("a,b,c\n1,2,3");
         let doc = format.load(data).await.unwrap();
         assert_eq!(doc.delimiter(), b',');
         assert!(!doc.regions().is_empty());
@@ -107,7 +106,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_tsv() {
         let format = CsvFormat::new();
-        let data = Bytes::from("a\tb\tc\n1\t2\t3");
+        let data = ContentData::from("a\tb\tc\n1\t2\t3");
         let doc = format.load(data).await.unwrap();
         assert_eq!(doc.delimiter(), b'\t');
     }
diff --git a/crates/nvisy-text/src/formats/ini.rs b/crates/nvisy-text/src/formats/ini.rs
index bfe22ce..b953cd8 100644
--- a/crates/nvisy-text/src/formats/ini.rs
+++ b/crates/nvisy-text/src/formats/ini.rs
@@ -1,8 +1,7 @@
 //! INI format handler.
 
-use bytes::Bytes;
 use nvisy_document::{
-    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+    Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
 use crate::documents::IniDocument;
@@ -60,8 +59,8 @@ impl DocumentFormat for IniFormat {
         &self.capabilities
     }
 
-    async fn load(&self, data: Bytes) -> Result<Self::Document> {
-        let content = String::from_utf8_lossy(&data).into_owned();
+    async fn load(&self, data: ContentData) -> Result<Self::Document> {
+        let content = data.as_string()?;
         Ok(IniDocument::new(content))
     }
 }
@@ -91,7 +90,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_ini() {
         let format = IniFormat::new();
-        let data = Bytes::from("[section]\nkey=value\nfoo=bar");
+        let data = ContentData::from("[section]\nkey=value\nfoo=bar");
         let doc = format.load(data).await.unwrap();
         assert!(!doc.regions().is_empty());
     }
diff --git a/crates/nvisy-text/src/formats/json.rs b/crates/nvisy-text/src/formats/json.rs
index 163135e..c0e7be8 100644
--- a/crates/nvisy-text/src/formats/json.rs
+++ b/crates/nvisy-text/src/formats/json.rs
@@ -1,8 +1,7 @@
 //! JSON format handler.
 
-use bytes::Bytes;
 use nvisy_document::{
-    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+    Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
 use crate::documents::JsonDocument;
@@ -60,8 +59,8 @@ impl DocumentFormat for JsonFormat {
         &self.capabilities
     }
 
-    async fn load(&self, data: Bytes) -> Result<Self::Document> {
-        let content = String::from_utf8_lossy(&data).into_owned();
+    async fn load(&self, data: ContentData) -> Result<Self::Document> {
+        let content = data.as_string()?;
         JsonDocument::new(content)
     }
 }
@@ -83,7 +82,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_document() {
         let format = JsonFormat::new();
-        let data = Bytes::from(r#"{"hello": "world"}"#);
+        let data = ContentData::from(r#"{"hello": "world"}"#);
         let doc = format.load(data).await.unwrap();
         assert!(!doc.regions().is_empty());
     }
@@ -91,7 +90,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_invalid_json() {
         let format = JsonFormat::new();
-        let data = Bytes::from("not valid json {");
+        let data = ContentData::from("not valid json {");
         let result = format.load(data).await;
         assert!(result.is_err());
     }
diff --git a/crates/nvisy-text/src/formats/markdown.rs b/crates/nvisy-text/src/formats/markdown.rs
index d040865..4e10f33 100644
--- a/crates/nvisy-text/src/formats/markdown.rs
+++ b/crates/nvisy-text/src/formats/markdown.rs
@@ -1,8 +1,7 @@
 //! Markdown format handler.
 
-use bytes::Bytes;
 use nvisy_document::{
-    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+    Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
 use crate::documents::MarkdownDocument;
@@ -60,8 +59,8 @@ impl DocumentFormat for MarkdownFormat {
         &self.capabilities
     }
 
-    async fn load(&self, data: Bytes) -> Result<Self::Document> {
-        let content = String::from_utf8_lossy(&data).into_owned();
+    async fn load(&self, data: ContentData) -> Result<Self::Document> {
+        let content = data.as_string()?;
         Ok(MarkdownDocument::new(content))
     }
 }
@@ -92,7 +91,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_document() {
         let format = MarkdownFormat::new();
-        let data = Bytes::from("# Test\n\nContent here.");
+        let data = ContentData::from("# Test\n\nContent here.");
         let doc = format.load(data).await.unwrap();
         assert!(!doc.regions().is_empty());
     }
diff --git a/crates/nvisy-text/src/formats/plain.rs b/crates/nvisy-text/src/formats/plain.rs
index 5a03060..ee57eb1 100644
--- a/crates/nvisy-text/src/formats/plain.rs
+++ b/crates/nvisy-text/src/formats/plain.rs
@@ -1,7 +1,6 @@
 //! Plain text format handler.
 
-use bytes::Bytes;
-use nvisy_document::{Capabilities, DocumentFormat, Result};
+use nvisy_document::{Capabilities, ContentData, DocumentFormat, Result};
 
 use crate::documents::PlainTextDocument;
 
@@ -46,8 +45,8 @@ impl DocumentFormat for PlainTextFormat {
         &self.capabilities
     }
 
-    async fn load(&self, data: Bytes) -> Result<Self::Document> {
-        let content = String::from_utf8_lossy(&data).into_owned();
+    async fn load(&self, data: ContentData) -> Result<Self::Document> {
+        let content = data.as_string()?;
         Ok(PlainTextDocument::new(content))
     }
 }
@@ -67,7 +66,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_document() {
         let format = PlainTextFormat::new();
-        let data = Bytes::from("Hello, world!");
+        let data = ContentData::from("Hello, world!");
         let doc = format.load(data).await.unwrap();
         assert_eq!(doc.content(), "Hello, world!");
     }
diff --git a/crates/nvisy-text/src/formats/toml.rs b/crates/nvisy-text/src/formats/toml.rs
index b0411a5..6929395 100644
--- a/crates/nvisy-text/src/formats/toml.rs
+++ b/crates/nvisy-text/src/formats/toml.rs
@@ -1,8 +1,7 @@
 //! TOML format handler.
 
-use bytes::Bytes;
 use nvisy_document::{
-    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+    Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
 use crate::documents::TomlDocument;
@@ -60,8 +59,8 @@ impl DocumentFormat for TomlFormat {
         &self.capabilities
     }
 
-    async fn load(&self, data: Bytes) -> Result<Self::Document> {
-        let content = String::from_utf8_lossy(&data).into_owned();
+    async fn load(&self, data: ContentData) -> Result<Self::Document> {
+        let content = data.as_string()?;
         Ok(TomlDocument::new(content))
     }
 }
@@ -90,7 +89,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_toml() {
         let format = TomlFormat::new();
-        let data = Bytes::from("[package]\nname = \"test\"\nversion = \"1.0\"");
+        let data = ContentData::from("[package]\nname = \"test\"\nversion = \"1.0\"");
         let doc = format.load(data).await.unwrap();
         assert!(!doc.regions().is_empty());
     }
diff --git a/crates/nvisy-text/src/formats/xml.rs b/crates/nvisy-text/src/formats/xml.rs
index 2113191..c92cd48 100644
--- a/crates/nvisy-text/src/formats/xml.rs
+++ b/crates/nvisy-text/src/formats/xml.rs
@@ -1,8 +1,7 @@
 //! XML format handler.
 
-use bytes::Bytes;
 use nvisy_document::{
-    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+    Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
 use crate::documents::XmlDocument;
@@ -60,8 +59,8 @@ impl DocumentFormat for XmlFormat {
         &self.capabilities
     }
 
-    async fn load(&self, data: Bytes) -> Result<Self::Document> {
-        let content = String::from_utf8_lossy(&data).into_owned();
+    async fn load(&self, data: ContentData) -> Result<Self::Document> {
+        let content = data.as_string()?;
         Ok(XmlDocument::new(content))
     }
 }
@@ -92,7 +91,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_xml() {
         let format = XmlFormat::new();
-        let data = Bytes::from("<root>content</root>");
+        let data = ContentData::from("<root>content</root>");
         let doc = format.load(data).await.unwrap();
         assert!(!doc.regions().is_empty());
     }
diff --git a/crates/nvisy-text/src/formats/yaml.rs b/crates/nvisy-text/src/formats/yaml.rs
index 63d47a5..4db8660 100644
--- a/crates/nvisy-text/src/formats/yaml.rs
+++ b/crates/nvisy-text/src/formats/yaml.rs
@@ -1,8 +1,7 @@
 //! YAML format handler.
 
-use bytes::Bytes;
 use nvisy_document::{
-    Capabilities, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
+    Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
 use crate::documents::YamlDocument;
@@ -60,8 +59,8 @@ impl DocumentFormat for YamlFormat {
         &self.capabilities
     }
 
-    async fn load(&self, data: Bytes) -> Result<Self::Document> {
-        let content = String::from_utf8_lossy(&data).into_owned();
+    async fn load(&self, data: ContentData) -> Result<Self::Document> {
+        let content = data.as_string()?;
         Ok(YamlDocument::new(content))
     }
 }
@@ -91,7 +90,7 @@ mod tests {
     #[tokio::test]
     async fn test_load_yaml() {
         let format = YamlFormat::new();
-        let data = Bytes::from("key: value\nlist:\n  - item1\n  - item2");
+        let data = ContentData::from("key: value\nlist:\n  - item1\n  - item2");
         let doc = format.load(data).await.unwrap();
         assert!(!doc.regions().is_empty());
     }

From ac30b2630454da75b1a59a83e92b555c56693727 Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Sat, 17 Jan 2026 08:26:45 +0100
Subject: [PATCH 4/5] refactor: rename crates from nvisy-* to nvisy-rt-*

- Rename all crate packages to avoid conflicts with consumer library
- Update all Cargo.toml dependencies and features
- Update all source file imports (nvisy_* -> nvisy_rt_*)
- Update README doc examples with new crate names
- Fix clippy lints and doc warnings
- Add diff module with Differ trait, Change, ChangeKind, RegionChange, Diff types
---
 Cargo.lock                                    |  54 ++-
 Cargo.toml                                    |  16 +-
 crates/nvisy-archive/Cargo.toml               |   4 +-
 crates/nvisy-archive/src/file/archive_type.rs |   6 +-
 crates/nvisy-archive/src/file/mod.rs          |  14 +-
 crates/nvisy-archive/src/handler/mod.rs       |   4 +-
 .../nvisy-archive/src/handler/tar_handler.rs  |   8 +-
 crates/nvisy-archive/src/lib.rs               |   9 +-
 crates/nvisy-core/Cargo.toml                  |   2 +-
 crates/nvisy-core/README.md                   |   2 +-
 crates/nvisy-core/src/error/mod.rs            |   2 +-
 crates/nvisy-core/src/fs/content_file.rs      |  12 +-
 crates/nvisy-core/src/fs/content_metadata.rs  |   4 +-
 crates/nvisy-core/src/fs/data_sensitivity.rs  |   2 +-
 crates/nvisy-core/src/fs/mod.rs               |   4 +-
 crates/nvisy-core/src/io/content.rs           |  14 +-
 crates/nvisy-core/src/io/content_data.rs      |   4 +-
 crates/nvisy-core/src/io/content_read.rs      |   8 +-
 crates/nvisy-core/src/io/content_write.rs     |  20 +-
 crates/nvisy-core/src/io/data_reference.rs    |   5 +-
 crates/nvisy-core/src/path/source.rs          |  14 +-
 crates/nvisy-document/Cargo.toml              |   5 +-
 crates/nvisy-document/README.md               |   5 -
 crates/nvisy-document/src/diff/change.rs      | 110 +++++
 crates/nvisy-document/src/diff/mod.rs         |  18 +
 .../nvisy-document/src/diff/region_change.rs  | 119 ++++++
 crates/nvisy-document/src/diff/result.rs      |  72 ++++
 crates/nvisy-document/src/format/mod.rs       |   2 +-
 .../nvisy-document/src/format/region/mod.rs   |   3 +-
 crates/nvisy-document/src/lib.rs              |   6 +-
 crates/nvisy-document/src/table/mod.rs        |   2 +-
 crates/nvisy-document/src/text/mod.rs         |   2 +-
 .../nvisy-document/src/thumbnail/options.rs   |   8 +-
 crates/nvisy-docx/Cargo.toml                  |   4 +-
 crates/nvisy-docx/src/document.rs             |   2 +-
 crates/nvisy-docx/src/format.rs               |   2 +-
 crates/nvisy-engine/Cargo.toml                |  25 +-
 crates/nvisy-engine/src/engine/mod.rs         |  21 +-
 crates/nvisy-engine/src/lib.rs                |  16 +-
 .../nvisy-engine/src/registry/format_ref.rs   | 114 ++++++
 .../src/registry/format_registry.rs           | 343 ++++++++++++++++
 .../src/registry/loaded_document.rs           |  41 ++
 crates/nvisy-engine/src/registry/mod.rs       | 380 +-----------------
 crates/nvisy-engine/src/session/mod.rs        |   2 +-
 crates/nvisy-image/Cargo.toml                 |   4 +-
 crates/nvisy-image/src/documents/jpeg.rs      |   2 +-
 crates/nvisy-image/src/documents/png.rs       |   2 +-
 crates/nvisy-image/src/formats/jpeg.rs        |   2 +-
 crates/nvisy-image/src/formats/png.rs         |   2 +-
 crates/nvisy-pdf/Cargo.toml                   |   4 +-
 crates/nvisy-pdf/src/document.rs              |   2 +-
 crates/nvisy-pdf/src/format.rs                |   2 +-
 crates/nvisy-text/Cargo.toml                  |   4 +-
 crates/nvisy-text/README.md                   |  22 +-
 crates/nvisy-text/src/documents/csv.rs        |   2 +-
 crates/nvisy-text/src/documents/ini.rs        |   2 +-
 crates/nvisy-text/src/documents/json.rs       |   4 +-
 crates/nvisy-text/src/documents/markdown.rs   |   5 +-
 crates/nvisy-text/src/documents/plain.rs      |   2 +-
 crates/nvisy-text/src/documents/toml.rs       |   2 +-
 crates/nvisy-text/src/documents/xml.rs        |   2 +-
 crates/nvisy-text/src/documents/yaml.rs       |   2 +-
 crates/nvisy-text/src/formats/csv.rs          |   4 +-
 crates/nvisy-text/src/formats/ini.rs          |   4 +-
 crates/nvisy-text/src/formats/json.rs         |   4 +-
 crates/nvisy-text/src/formats/markdown.rs     |   4 +-
 crates/nvisy-text/src/formats/plain.rs        |   2 +-
 crates/nvisy-text/src/formats/toml.rs         |   4 +-
 crates/nvisy-text/src/formats/xml.rs          |   4 +-
 crates/nvisy-text/src/formats/yaml.rs         |   4 +-
 crates/nvisy-text/src/lib.rs                  |  11 +-
 71 files changed, 1050 insertions(+), 568 deletions(-)
 create mode 100644 crates/nvisy-document/src/diff/change.rs
 create mode 100644 crates/nvisy-document/src/diff/mod.rs
 create mode 100644 crates/nvisy-document/src/diff/region_change.rs
 create mode 100644 crates/nvisy-document/src/diff/result.rs
 create mode 100644 crates/nvisy-engine/src/registry/format_ref.rs
 create mode 100644 crates/nvisy-engine/src/registry/format_registry.rs
 create mode 100644 crates/nvisy-engine/src/registry/loaded_document.rs

diff --git a/Cargo.lock b/Cargo.lock
index 5accc23..c06b57b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -641,13 +641,13 @@ dependencies = [
 ]
 
 [[package]]
-name = "nvisy-archive"
+name = "nvisy-rt-archive"
 version = "0.1.0"
 dependencies = [
  "bytes",
  "bzip2",
  "flate2",
- "nvisy-core",
+ "nvisy-rt-core",
  "sevenz-rust",
  "strum",
  "tar",
@@ -659,7 +659,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "nvisy-core"
+name = "nvisy-rt-core"
 version = "0.1.0"
 dependencies = [
  "bytes",
@@ -678,7 +678,7 @@ dependencies = [
 ]
 
 [[package]]
-name = "nvisy-document"
+name = "nvisy-rt-document"
 version = "0.1.0"
 dependencies = [
  "async-trait",
@@ -686,71 +686,73 @@ dependencies = [
  "bytes",
  "derive_more",
  "jiff",
- "nvisy-core",
+ "nvisy-rt-core",
  "serde",
  "serde_json",
+ "strum",
  "thiserror",
  "tokio",
  "uuid",
 ]
 
 [[package]]
-name = "nvisy-docx"
+name = "nvisy-rt-docx"
 version = "0.1.0"
 dependencies = [
  "async-trait",
  "bytes",
- "nvisy-document",
+ "nvisy-rt-document",
  "thiserror",
 ]
 
 [[package]]
-name = "nvisy-engine"
+name = "nvisy-rt-engine"
 version = "0.1.0"
 dependencies = [
  "bytes",
  "jiff",
- "nvisy-archive",
- "nvisy-document",
- "nvisy-docx",
- "nvisy-image",
- "nvisy-pdf",
- "nvisy-text",
+ "nvisy-rt-archive",
+ "nvisy-rt-document",
+ "nvisy-rt-docx",
+ "nvisy-rt-image",
+ "nvisy-rt-pdf",
+ "nvisy-rt-text",
  "serde",
  "serde_json",
  "tokio",
+ "tracing",
  "uuid",
 ]
 
 [[package]]
-name = "nvisy-image"
+name = "nvisy-rt-image"
 version = "0.1.0"
 dependencies = [
  "async-trait",
  "bytes",
- "nvisy-document",
+ "nvisy-rt-document",
  "thiserror",
 ]
 
 [[package]]
-name = "nvisy-pdf"
+name = "nvisy-rt-pdf"
 version = "0.1.0"
 dependencies = [
  "async-trait",
  "bytes",
- "nvisy-document",
+ "nvisy-rt-document",
  "thiserror",
 ]
 
 [[package]]
-name = "nvisy-text"
+name = "nvisy-rt-text"
 version = "0.1.0"
 dependencies = [
  "async-trait",
  "bytes",
  "csv",
  "markdown",
- "nvisy-document",
+ "nvisy-rt-document",
  "serde_json",
  "thiserror",
  "tokio",
@@ -1188,9 +1190,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
 dependencies = [
  "pin-project-lite",
+ "tracing-attributes",
  "tracing-core",
 ]
 
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "tracing-core"
 version = "0.1.36"
diff --git a/Cargo.toml b/Cargo.toml
index 9b787b3..f14aa39 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -32,14 +32,14 @@ documentation = "https://docs.rs/nvisy"
 
 [workspace.dependencies]
 # Internal crates
-nvisy-archive = { path = "./crates/nvisy-archive", version = "0.1.0", features = [] }
-nvisy-core = { path = "./crates/nvisy-core", version = "0.1.0", features = [] }
-nvisy-docx = { path = "./crates/nvisy-docx", version = "0.1.0", features = [] }
-nvisy-document = { path = "./crates/nvisy-document", version = "0.1.0", features = [] }
-nvisy-engine = { path = "./crates/nvisy-engine", version = "0.1.0", features = [] }
-nvisy-image = { path = "./crates/nvisy-image", version = "0.1.0", features = [] }
-nvisy-pdf = { path = "./crates/nvisy-pdf", version = "0.1.0", features = [] }
-nvisy-text = { path = "./crates/nvisy-text", version = "0.1.0", features = [] }
+nvisy-rt-archive = { path = "./crates/nvisy-archive", version = "0.1.0", features = [] }
+nvisy-rt-core = { path = "./crates/nvisy-core", version = "0.1.0", features = [] }
+nvisy-rt-docx = { path = "./crates/nvisy-docx", version = "0.1.0", features = [] }
+nvisy-rt-document = { path = "./crates/nvisy-document", version = "0.1.0", features = [] }
+nvisy-rt-engine = { path = "./crates/nvisy-engine", version = "0.1.0", features = [] }
+nvisy-rt-image = { path = "./crates/nvisy-image", version = "0.1.0", features = [] }
+nvisy-rt-pdf = { path = "./crates/nvisy-pdf", version = "0.1.0", features = [] }
+nvisy-rt-text = { path = "./crates/nvisy-text", version = "0.1.0", features = [] }
 
 # Async runtime and I/O
 tokio = { version = "1.49", default-features = false, features = [] }
diff --git a/crates/nvisy-archive/Cargo.toml b/crates/nvisy-archive/Cargo.toml
index 2c49250..04815d5 100644
--- a/crates/nvisy-archive/Cargo.toml
+++ b/crates/nvisy-archive/Cargo.toml
@@ -1,7 +1,7 @@
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [package]
-name = "nvisy-archive"
+name = "nvisy-rt-archive"
 description = "Archive handling library for nvisy (ZIP, TAR, 7z, etc.)"
 readme = "./README.md"
 
@@ -31,7 +31,7 @@ xz = ["dep:xz2"]
 
 [dependencies]
 # Internal crates
-nvisy-core = { workspace = true }
+nvisy-rt-core = { workspace = true }
 
 # Data types
 bytes = { workspace = true }
diff --git a/crates/nvisy-archive/src/file/archive_type.rs b/crates/nvisy-archive/src/file/archive_type.rs
index fdcaa58..ddd3cba 100644
--- a/crates/nvisy-archive/src/file/archive_type.rs
+++ b/crates/nvisy-archive/src/file/archive_type.rs
@@ -59,7 +59,7 @@ impl ArchiveType {
     ///
     /// ```
     /// use std::ffi::OsStr;
-    /// use nvisy_archive::ArchiveType;
+    /// use nvisy_rt_archive::ArchiveType;
     ///
     /// assert_eq!(ArchiveType::from_file_extension(OsStr::new("zip")), Some(ArchiveType::Zip));
     /// assert_eq!(ArchiveType::from_file_extension(OsStr::new("tar.gz")), Some(ArchiveType::TarGz));
@@ -89,7 +89,7 @@ impl ArchiveType {
     /// # Examples
     ///
     /// ```
-    /// use nvisy_archive::ArchiveType;
+    /// use nvisy_rt_archive::ArchiveType;
     ///
     /// assert_eq!(ArchiveType::Zip.file_extensions(), &["zip"]);
     /// assert_eq!(ArchiveType::TarGz.file_extensions(), &["tar.gz", "tgz"]);
@@ -115,7 +115,7 @@ impl ArchiveType {
     /// # Examples
     ///
     /// ```
-    /// use nvisy_archive::ArchiveType;
+    /// use nvisy_rt_archive::ArchiveType;
     ///
     /// assert_eq!(ArchiveType::Zip.primary_extension(), "zip");
     /// assert_eq!(ArchiveType::TarGz.primary_extension(), "tar.gz");
diff --git a/crates/nvisy-archive/src/file/mod.rs b/crates/nvisy-archive/src/file/mod.rs
index c2eeef9..9faa187 100644
--- a/crates/nvisy-archive/src/file/mod.rs
+++ b/crates/nvisy-archive/src/file/mod.rs
@@ -14,9 +14,9 @@ use bytes::Bytes;
 use tempfile::TempDir;
 use tokio::fs;
 
-use crate::handler::ArchiveHandler;
 #[cfg(feature = "zip")]
 use crate::ZipResultExt;
+use crate::handler::ArchiveHandler;
 use crate::{ArchiveErrorExt, ContentData, ContentSource, Error, Result};
 
 /// Represents an archive file that can be loaded from various sources
@@ -53,11 +53,11 @@ impl ArchiveFile {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_archive::ArchiveFile;
+    /// use nvisy_rt_archive::ArchiveFile;
     /// use std::path::PathBuf;
     ///
     /// let archive = ArchiveFile::from_path("archive.zip")?;
-    /// # Ok::<(), nvisy_archive::Error>(())
+    /// # Ok::<(), nvisy_rt_archive::Error>(())
     /// ```
     pub fn from_path(path: impl AsRef<Path>) -> Result<Self> {
         let path = path.as_ref();
@@ -100,7 +100,7 @@ impl ArchiveFile {
     /// # Example
     ///
     /// ```
-    /// use nvisy_archive::{ArchiveFile, ArchiveType, ContentData};
+    /// use nvisy_rt_archive::{ArchiveFile, ArchiveType, ContentData};
     ///
     /// let data = ContentData::from(vec![0x50, 0x4B, 0x03, 0x04]); // ZIP signature
     /// let archive = ArchiveFile::from_content_data(ArchiveType::Zip, data);
@@ -120,7 +120,7 @@ impl ArchiveFile {
     /// # Example
     ///
     /// ```
-    /// use nvisy_archive::{ArchiveFile, ArchiveType};
+    /// use nvisy_rt_archive::{ArchiveFile, ArchiveType};
     ///
     /// let data = vec![0x50, 0x4B, 0x03, 0x04]; // ZIP signature
     /// let archive = ArchiveFile::from_bytes(ArchiveType::Zip, data);
@@ -209,9 +209,9 @@ impl ArchiveFile {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_archive::ArchiveFile;
+    /// use nvisy_rt_archive::ArchiveFile;
     ///
-    /// # async fn example() -> nvisy_archive::Result<()> {
+    /// # async fn example() -> nvisy_rt_archive::Result<()> {
     /// let archive = ArchiveFile::from_path("archive.zip")?;
     /// let handler = archive.unpack().await?;
     ///
diff --git a/crates/nvisy-archive/src/handler/mod.rs b/crates/nvisy-archive/src/handler/mod.rs
index ef3415b..a3b6c30 100644
--- a/crates/nvisy-archive/src/handler/mod.rs
+++ b/crates/nvisy-archive/src/handler/mod.rs
@@ -182,9 +182,9 @@ impl ArchiveHandler {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_archive::{ArchiveFile, ArchiveType};
+    /// use nvisy_rt_archive::{ArchiveFile, ArchiveType};
     ///
-    /// # async fn example() -> nvisy_archive::Result<()> {
+    /// # async fn example() -> nvisy_rt_archive::Result<()> {
     /// let archive = ArchiveFile::from_path("original.zip")?;
     /// let handler = archive.unpack().await?;
     ///
diff --git a/crates/nvisy-archive/src/handler/tar_handler.rs b/crates/nvisy-archive/src/handler/tar_handler.rs
index 26a8e2e..efa030b 100644
--- a/crates/nvisy-archive/src/handler/tar_handler.rs
+++ b/crates/nvisy-archive/src/handler/tar_handler.rs
@@ -363,8 +363,8 @@ impl TarDirectoryBuilder {
                 }
                 #[cfg(feature = "gzip")]
                 ArchiveType::TarGz => {
-                    use flate2::write::GzEncoder;
                     use flate2::Compression;
+                    use flate2::write::GzEncoder;
 
                     let file = std::fs::File::create(&target_path)?;
                     let encoder = GzEncoder::new(file, Compression::default());
@@ -381,8 +381,8 @@ impl TarDirectoryBuilder {
                 }
                 #[cfg(feature = "bzip2")]
                 ArchiveType::TarBz2 => {
-                    use bzip2::write::BzEncoder;
                     use bzip2::Compression;
+                    use bzip2::write::BzEncoder;
 
                     let file = std::fs::File::create(&target_path)?;
                     let encoder = BzEncoder::new(file, Compression::default());
@@ -500,8 +500,8 @@ impl<W: Write + Send + 'static> TarArchiveBuilder<W> {
                 })
             }
             ArchiveType::TarGz => {
-                use flate2::write::GzEncoder;
                 use flate2::Compression;
+                use flate2::write::GzEncoder;
                 let encoder = GzEncoder::new(writer, Compression::default());
                 let writer: Box<dyn Write + Send> = Box::new(encoder);
                 Ok(TarArchiveBuilder {
@@ -510,8 +510,8 @@ impl<W: Write + Send + 'static> TarArchiveBuilder<W> {
                 })
             }
             ArchiveType::TarBz2 => {
-                use bzip2::write::BzEncoder;
                 use bzip2::Compression;
+                use bzip2::write::BzEncoder;
                 let encoder = BzEncoder::new(writer, Compression::default());
                 let writer: Box<dyn Write + Send> = Box::new(encoder);
                 Ok(TarArchiveBuilder {
diff --git a/crates/nvisy-archive/src/lib.rs b/crates/nvisy-archive/src/lib.rs
index 8f2d86c..17e3c45 100644
--- a/crates/nvisy-archive/src/lib.rs
+++ b/crates/nvisy-archive/src/lib.rs
@@ -9,12 +9,11 @@ pub mod prelude;
 // Re-exports for convenience
 pub use file::{ArchiveFile, ArchiveType};
 pub use handler::ArchiveHandler;
-
 // Re-export core types used in archive operations
-pub use nvisy_core::error::{Error, ErrorResource, ErrorType, Result};
-pub use nvisy_core::fs::{ContentKind, ContentMetadata};
-pub use nvisy_core::io::ContentData;
-pub use nvisy_core::path::ContentSource;
+pub use nvisy_rt_core::error::{Error, ErrorResource, ErrorType, Result};
+pub use nvisy_rt_core::fs::{ContentKind, ContentMetadata};
+pub use nvisy_rt_core::io::ContentData;
+pub use nvisy_rt_core::path::ContentSource;
 
 /// Extension trait for creating archive-specific errors
 pub trait ArchiveErrorExt {
diff --git a/crates/nvisy-core/Cargo.toml b/crates/nvisy-core/Cargo.toml
index 46029ed..760fde3 100644
--- a/crates/nvisy-core/Cargo.toml
+++ b/crates/nvisy-core/Cargo.toml
@@ -1,7 +1,7 @@
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [package]
-name = "nvisy-core"
+name = "nvisy-rt-core"
 description = "Core types and utilities for nvisy"
 readme = "./README.md"
 
diff --git a/crates/nvisy-core/README.md b/crates/nvisy-core/README.md
index 68059c0..30369d4 100644
--- a/crates/nvisy-core/README.md
+++ b/crates/nvisy-core/README.md
@@ -48,6 +48,6 @@ tracking.
 
 - `tokio` - Async runtime for I/O operations
 - `bytes` - Zero-copy byte buffer management
-- `uuid` - Unique identifiers with UUIDv7 support
+- `uuid` - Unique identifiers with `UUIDv7` support
 - `jiff` - Timestamp support for content source tracking
 - `strum` - Derive macros for enums
diff --git a/crates/nvisy-core/src/error/mod.rs b/crates/nvisy-core/src/error/mod.rs
index 26dca6a..c087aaf 100644
--- a/crates/nvisy-core/src/error/mod.rs
+++ b/crates/nvisy-core/src/error/mod.rs
@@ -21,7 +21,7 @@ pub type BoxError = Box<dyn std::error::Error + Send + Sync>;
 /// # Example
 ///
 /// ```
-/// use nvisy_core::error::{Error, ErrorType, ErrorResource};
+/// use nvisy_rt_core::error::{Error, ErrorType, ErrorResource};
 ///
 /// let error = Error::new("Something went wrong")
 ///     .with_type(ErrorType::Runtime)
diff --git a/crates/nvisy-core/src/fs/content_file.rs b/crates/nvisy-core/src/fs/content_file.rs
index 86bbd8a..e7695b2 100644
--- a/crates/nvisy-core/src/fs/content_file.rs
+++ b/crates/nvisy-core/src/fs/content_file.rs
@@ -40,7 +40,7 @@ impl ContentFile {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::fs::ContentFile;
+    /// use nvisy_rt_core::fs::ContentFile;
     /// use std::path::Path;
     ///
     /// async fn open_file() -> Result<(), Box<dyn std::error::Error>> {
@@ -89,7 +89,7 @@ impl ContentFile {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::fs::ContentFile;
+    /// use nvisy_rt_core::fs::ContentFile;
     ///
     /// async fn create_file() -> Result<(), Box<dyn std::error::Error>> {
     ///     let content_file = ContentFile::create("new_file.txt").await?;
@@ -133,7 +133,7 @@ impl ContentFile {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::fs::ContentFile;
+    /// use nvisy_rt_core::fs::ContentFile;
     /// use tokio::fs::OpenOptions;
     ///
     /// async fn open_with_options() -> Result<(), Box<dyn std::error::Error>> {
@@ -174,7 +174,7 @@ impl ContentFile {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::fs::ContentFile;
+    /// use nvisy_rt_core::fs::ContentFile;
     ///
     /// async fn read_content() -> Result<(), Box<dyn std::error::Error>> {
     ///     let mut content_file = ContentFile::open("example.txt").await?;
@@ -236,8 +236,8 @@ impl ContentFile {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::fs::ContentFile;
-    /// use nvisy_core::io::ContentData;
+    /// use nvisy_rt_core::fs::ContentFile;
+    /// use nvisy_rt_core::io::ContentData;
     ///
     /// async fn write_content() -> Result<(), Box<dyn std::error::Error>> {
     ///     let mut content_file = ContentFile::create("output.txt").await?;
diff --git a/crates/nvisy-core/src/fs/content_metadata.rs b/crates/nvisy-core/src/fs/content_metadata.rs
index 23d01da..8ab09c2 100644
--- a/crates/nvisy-core/src/fs/content_metadata.rs
+++ b/crates/nvisy-core/src/fs/content_metadata.rs
@@ -27,7 +27,7 @@ impl ContentMetadata {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::{fs::ContentMetadata, path::ContentSource};
+    /// use nvisy_rt_core::{fs::ContentMetadata, path::ContentSource};
     ///
     /// let source = ContentSource::new();
     /// let metadata = ContentMetadata::new(source);
@@ -45,7 +45,7 @@ impl ContentMetadata {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::{fs::ContentMetadata, path::ContentSource};
+    /// use nvisy_rt_core::{fs::ContentMetadata, path::ContentSource};
     /// use std::path::PathBuf;
     ///
     /// let source = ContentSource::new();
diff --git a/crates/nvisy-core/src/fs/data_sensitivity.rs b/crates/nvisy-core/src/fs/data_sensitivity.rs
index b7e1a3b..a3252d8 100644
--- a/crates/nvisy-core/src/fs/data_sensitivity.rs
+++ b/crates/nvisy-core/src/fs/data_sensitivity.rs
@@ -17,7 +17,7 @@ use strum::{Display, EnumIter, EnumString};
 /// # Examples
 ///
 /// ```rust
-/// use nvisy_core::fs::DataSensitivity;
+/// use nvisy_rt_core::fs::DataSensitivity;
 ///
 /// let high = DataSensitivity::High;
 /// let medium = DataSensitivity::Medium;
diff --git a/crates/nvisy-core/src/fs/mod.rs b/crates/nvisy-core/src/fs/mod.rs
index c6386bd..bdc802c 100644
--- a/crates/nvisy-core/src/fs/mod.rs
+++ b/crates/nvisy-core/src/fs/mod.rs
@@ -13,8 +13,8 @@
 //! # Example
 //!
 //! ```no_run
-//! use nvisy_core::fs::ContentFile;
-//! use nvisy_core::io::ContentData;
+//! use nvisy_rt_core::fs::ContentFile;
+//! use nvisy_rt_core::io::ContentData;
 //!
 //! async fn example() -> Result<(), Box<dyn std::error::Error>> {
 //!     // Create a new file
diff --git a/crates/nvisy-core/src/io/content.rs b/crates/nvisy-core/src/io/content.rs
index 93de761..c0dd1c5 100644
--- a/crates/nvisy-core/src/io/content.rs
+++ b/crates/nvisy-core/src/io/content.rs
@@ -20,9 +20,9 @@ use crate::path::ContentSource;
 /// # Examples
 ///
 /// ```rust
-/// use nvisy_core::io::{Content, ContentData};
-/// use nvisy_core::fs::ContentMetadata;
-/// use nvisy_core::path::ContentSource;
+/// use nvisy_rt_core::io::{Content, ContentData};
+/// use nvisy_rt_core::fs::ContentMetadata;
+/// use nvisy_rt_core::path::ContentSource;
 ///
 /// // Create content from data
 /// let data = ContentData::from("Hello, world!");
@@ -99,7 +99,11 @@ impl Content {
         self.data.is_likely_text()
     }
 
-    /// Try to get the content as a string slice
+    /// Try to get the content as a string slice.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the content is not valid UTF-8.
     pub fn as_str(&self) -> Result<&str> {
         self.data.as_str()
     }
@@ -124,7 +128,7 @@ impl Content {
         self.metadata = None;
     }
 
-    /// Consume and return the inner ContentData
+    /// Consume and return the inner [`ContentData`].
     pub fn into_data(self) -> ContentData {
         self.data
     }
diff --git a/crates/nvisy-core/src/io/content_data.rs b/crates/nvisy-core/src/io/content_data.rs
index 8f41af3..f60a17f 100644
--- a/crates/nvisy-core/src/io/content_data.rs
+++ b/crates/nvisy-core/src/io/content_data.rs
@@ -171,7 +171,7 @@ impl ContentData {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::{io::ContentData, path::ContentSource};
+    /// use nvisy_rt_core::{io::ContentData, path::ContentSource};
     /// use bytes::Bytes;
     ///
     /// let source = ContentSource::new();
@@ -193,7 +193,7 @@ impl ContentData {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::{io::ContentData, path::ContentSource};
+    /// use nvisy_rt_core::{io::ContentData, path::ContentSource};
     ///
     /// let source = ContentSource::new();
     /// let content = ContentData::from_text(source, "Hello, world!");
diff --git a/crates/nvisy-core/src/io/content_read.rs b/crates/nvisy-core/src/io/content_read.rs
index f889aea..23d4216 100644
--- a/crates/nvisy-core/src/io/content_read.rs
+++ b/crates/nvisy-core/src/io/content_read.rs
@@ -27,7 +27,7 @@ pub trait AsyncContentRead: AsyncRead + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::io::{AsyncContentRead, ContentData};
+    /// use nvisy_rt_core::io::{AsyncContentRead, ContentData};
     /// use tokio::fs::File;
     /// use std::io;
     ///
@@ -58,7 +58,7 @@ pub trait AsyncContentRead: AsyncRead + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::{io::{AsyncContentRead, ContentData}, path::ContentSource};
+    /// use nvisy_rt_core::{io::{AsyncContentRead, ContentData}, path::ContentSource};
     /// use tokio::fs::File;
     /// use std::io;
     ///
@@ -97,7 +97,7 @@ pub trait AsyncContentRead: AsyncRead + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::io::{AsyncContentRead, ContentData};
+    /// use nvisy_rt_core::io::{AsyncContentRead, ContentData};
     /// use tokio::fs::File;
     /// use std::io;
     ///
@@ -155,7 +155,7 @@ pub trait AsyncContentRead: AsyncRead + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::io::AsyncContentRead;
+    /// use nvisy_rt_core::io::AsyncContentRead;
     /// use tokio::fs::File;
     /// use bytes::Bytes;
     /// use std::io;
diff --git a/crates/nvisy-core/src/io/content_write.rs b/crates/nvisy-core/src/io/content_write.rs
index 99e749e..9e84912 100644
--- a/crates/nvisy-core/src/io/content_write.rs
+++ b/crates/nvisy-core/src/io/content_write.rs
@@ -25,8 +25,8 @@ pub trait AsyncContentWrite: AsyncWrite + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::io::{AsyncContentWrite, ContentData};
-    /// use nvisy_core::fs::ContentMetadata;
+    /// use nvisy_rt_core::io::{AsyncContentWrite, ContentData};
+    /// use nvisy_rt_core::fs::ContentMetadata;
     /// use tokio::fs::File;
     /// use std::io;
     ///
@@ -61,8 +61,8 @@ pub trait AsyncContentWrite: AsyncWrite + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::io::{AsyncContentWrite, ContentData};
-    /// use nvisy_core::fs::ContentMetadata;
+    /// use nvisy_rt_core::io::{AsyncContentWrite, ContentData};
+    /// use nvisy_rt_core::fs::ContentMetadata;
     /// use tokio::fs::File;
     /// use std::path::PathBuf;
     /// use std::io;
@@ -103,8 +103,8 @@ pub trait AsyncContentWrite: AsyncWrite + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::io::{AsyncContentWrite, ContentData};
-    /// use nvisy_core::fs::ContentMetadata;
+    /// use nvisy_rt_core::io::{AsyncContentWrite, ContentData};
+    /// use nvisy_rt_core::fs::ContentMetadata;
     /// use tokio::fs::File;
     /// use std::io;
     ///
@@ -145,8 +145,8 @@ pub trait AsyncContentWrite: AsyncWrite + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::io::{AsyncContentWrite, ContentData};
-    /// use nvisy_core::fs::ContentMetadata;
+    /// use nvisy_rt_core::io::{AsyncContentWrite, ContentData};
+    /// use nvisy_rt_core::fs::ContentMetadata;
     /// use tokio::fs::File;
     /// use std::io;
     ///
@@ -191,8 +191,8 @@ pub trait AsyncContentWrite: AsyncWrite + Unpin + Send {
     /// # Example
     ///
     /// ```no_run
-    /// use nvisy_core::io::{AsyncContentWrite, ContentData};
-    /// use nvisy_core::fs::ContentMetadata;
+    /// use nvisy_rt_core::io::{AsyncContentWrite, ContentData};
+    /// use nvisy_rt_core::fs::ContentMetadata;
     /// use tokio::fs::OpenOptions;
     /// use std::io;
     ///
diff --git a/crates/nvisy-core/src/io/data_reference.rs b/crates/nvisy-core/src/io/data_reference.rs
index cf98854..ed067b3 100644
--- a/crates/nvisy-core/src/io/data_reference.rs
+++ b/crates/nvisy-core/src/io/data_reference.rs
@@ -17,7 +17,7 @@ use crate::path::ContentSource;
 /// # Examples
 ///
 /// ```rust
-/// use nvisy_core::io::{DataReference, Content, ContentData};
+/// use nvisy_rt_core::io::{DataReference, Content, ContentData};
 ///
 /// let content = Content::new(ContentData::from("Hello, world!"));
 /// let data_ref = DataReference::new(content)
@@ -95,9 +95,8 @@ impl DataReference {
 
 #[cfg(test)]
 mod tests {
-    use crate::io::ContentData;
-
     use super::*;
+    use crate::io::ContentData;
 
     #[test]
     fn test_data_reference_creation() {
diff --git a/crates/nvisy-core/src/path/source.rs b/crates/nvisy-core/src/path/source.rs
index 49b2811..88efd35 100644
--- a/crates/nvisy-core/src/path/source.rs
+++ b/crates/nvisy-core/src/path/source.rs
@@ -28,7 +28,7 @@ impl ContentSource {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::path::ContentSource;
+    /// use nvisy_rt_core::path::ContentSource;
     ///
     /// let source = ContentSource::new();
     /// assert!(!source.as_uuid().is_nil());
@@ -52,7 +52,7 @@ impl ContentSource {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::path::ContentSource;
+    /// use nvisy_rt_core::path::ContentSource;
     /// use uuid::Uuid;
     ///
     /// let source = ContentSource::new();
@@ -70,7 +70,7 @@ impl ContentSource {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::path::ContentSource;
+    /// use nvisy_rt_core::path::ContentSource;
     ///
     /// let source = ContentSource::new();
     /// let uuid = source.as_uuid();
@@ -86,7 +86,7 @@ impl ContentSource {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::path::ContentSource;
+    /// use nvisy_rt_core::path::ContentSource;
     ///
     /// let source = ContentSource::new();
     /// let id_str = source.to_string();
@@ -102,7 +102,7 @@ impl ContentSource {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::path::ContentSource;
+    /// use nvisy_rt_core::path::ContentSource;
     ///
     /// let source = ContentSource::new();
     /// let id_str = source.to_string();
@@ -122,7 +122,7 @@ impl ContentSource {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::path::ContentSource;
+    /// use nvisy_rt_core::path::ContentSource;
     /// use std::time::{SystemTime, UNIX_EPOCH};
     ///
     /// let source = ContentSource::new();
@@ -150,7 +150,7 @@ impl ContentSource {
     /// # Example
     ///
     /// ```
-    /// use nvisy_core::path::ContentSource;
+    /// use nvisy_rt_core::path::ContentSource;
     /// use std::thread;
     /// use std::time::Duration;
     ///
diff --git a/crates/nvisy-document/Cargo.toml b/crates/nvisy-document/Cargo.toml
index 43a9dde..ac62698 100644
--- a/crates/nvisy-document/Cargo.toml
+++ b/crates/nvisy-document/Cargo.toml
@@ -1,7 +1,7 @@
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [package]
-name = "nvisy-document"
+name = "nvisy-rt-document"
 description = "Document abstraction layer for nvisy"
 readme = "./README.md"
 
@@ -22,7 +22,7 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
 # Internal crates
-nvisy-core = { workspace = true }
+nvisy-rt-core = { workspace = true }
 
 # Async runtime and I/O
 tokio = { workspace = true, features = ["sync", "io-util", "fs"] }
@@ -43,6 +43,7 @@ thiserror = { workspace = true, features = ["std"] }
 
 # Macros
 derive_more = { workspace = true, features = ["display", "from", "into", "deref", "deref_mut", "as_ref", "constructor"] }
+strum = { workspace = true, features = ["derive"] }
 
 [dev-dependencies]
 tokio = { workspace = true, features = ["rt", "macros"] }
diff --git a/crates/nvisy-document/README.md b/crates/nvisy-document/README.md
index 9b1d780..15024a2 100644
--- a/crates/nvisy-document/README.md
+++ b/crates/nvisy-document/README.md
@@ -14,14 +14,9 @@ operations like redaction, text replacement, splitting, and merging.
 
 - **[`Document`]** - A loaded document instance for reading document content.
 
-- **[`EditableDocument`]** - Extension trait for documents that support editing.
-
 - **[`Region`]** - Semantic units within a document (text blocks, images,
   tables) with stable IDs that persist across edit sessions.
 
-- **[`EditOperation`]** - Edit commands that target regions by ID,
-  supporting undo/redo and batch operations.
-
 ## Extension Traits
 
 Document implementations can optionally implement these extension traits:
diff --git a/crates/nvisy-document/src/diff/change.rs b/crates/nvisy-document/src/diff/change.rs
new file mode 100644
index 0000000..b61d11d
--- /dev/null
+++ b/crates/nvisy-document/src/diff/change.rs
@@ -0,0 +1,110 @@
+//! Change types.
+
+use serde::{Deserialize, Serialize};
+use strum::{Display, EnumIs};
+
+/// The kind of change detected between document versions.
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    PartialEq,
+    Eq,
+    Hash,
+    Serialize,
+    Deserialize,
+    Display,
+    EnumIs
+)]
+#[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
+pub enum ChangeKind {
+    /// Content was added.
+    Added,
+    /// Content was removed.
+    Removed,
+    /// Content was modified.
+    Modified,
+    /// Content was moved to a different location.
+    Moved,
+}
+
+/// A generic change entry.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct Change<T> {
+    /// The kind of change.
+    pub kind: ChangeKind,
+    /// The old value (for removed/modified).
+    pub old: Option<T>,
+    /// The new value (for added/modified).
+    pub new: Option<T>,
+}
+
+impl<T> Change<T> {
+    /// Creates a new addition change.
+    #[must_use]
+    pub fn added(value: T) -> Self {
+        Self {
+            kind: ChangeKind::Added,
+            old: None,
+            new: Some(value),
+        }
+    }
+
+    /// Creates a new removal change.
+    #[must_use]
+    pub fn removed(value: T) -> Self {
+        Self {
+            kind: ChangeKind::Removed,
+            old: Some(value),
+            new: None,
+        }
+    }
+
+    /// Creates a new modification change.
+    #[must_use]
+    pub fn modified(old: T, new: T) -> Self {
+        Self {
+            kind: ChangeKind::Modified,
+            old: Some(old),
+            new: Some(new),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_change_kind_display() {
+        assert_eq!(ChangeKind::Added.to_string(), "added");
+        assert_eq!(ChangeKind::Removed.to_string(), "removed");
+        assert_eq!(ChangeKind::Modified.to_string(), "modified");
+        assert_eq!(ChangeKind::Moved.to_string(), "moved");
+    }
+
+    #[test]
+    fn test_change_kind_predicates() {
+        assert!(ChangeKind::Added.is_added());
+        assert!(ChangeKind::Removed.is_removed());
+        assert!(ChangeKind::Modified.is_modified());
+        assert!(ChangeKind::Moved.is_moved());
+    }
+
+    #[test]
+    fn test_generic_change() {
+        let added: Change<i32> = Change::added(42);
+        assert!(added.kind.is_added());
+        assert_eq!(added.new, Some(42));
+
+        let removed: Change<&str> = Change::removed("gone");
+        assert!(removed.kind.is_removed());
+        assert_eq!(removed.old, Some("gone"));
+
+        let modified: Change<String> = Change::modified("old".into(), "new".into());
+        assert!(modified.kind.is_modified());
+        assert_eq!(modified.old, Some("old".into()));
+        assert_eq!(modified.new, Some("new".into()));
+    }
+}
diff --git a/crates/nvisy-document/src/diff/mod.rs b/crates/nvisy-document/src/diff/mod.rs
new file mode 100644
index 0000000..7ba1c1a
--- /dev/null
+++ b/crates/nvisy-document/src/diff/mod.rs
@@ -0,0 +1,18 @@
+//! Document diffing and comparison.
+//!
+//! This module provides types and utilities for comparing documents
+//! and tracking changes between document versions.
+
+mod change;
+mod region_change;
+mod result;
+
+pub use change::{Change, ChangeKind};
+pub use region_change::RegionChange;
+pub use result::Diff;
+
+/// A trait for computing diffs between documents.
+pub trait Differ {
+    /// Computes the difference between this document and another.
+    fn diff(&self, other: &Self) -> Diff;
+}
diff --git a/crates/nvisy-document/src/diff/region_change.rs b/crates/nvisy-document/src/diff/region_change.rs
new file mode 100644
index 0000000..77e776c
--- /dev/null
+++ b/crates/nvisy-document/src/diff/region_change.rs
@@ -0,0 +1,119 @@
+//! Region-specific change type.
+
+use serde::{Deserialize, Serialize};
+
+use super::ChangeKind;
+use crate::RegionId;
+
+/// A change to a specific region in a document.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct RegionChange {
+    /// The kind of change.
+    pub kind: ChangeKind,
+    /// The region ID in the old document (if applicable).
+    pub old_id: Option<RegionId>,
+    /// The region ID in the new document (if applicable).
+    pub new_id: Option<RegionId>,
+    /// The old text content (for removed/modified).
+    pub old_text: Option<String>,
+    /// The new text content (for added/modified).
+    pub new_text: Option<String>,
+}
+
+impl RegionChange {
+    /// Creates a new addition change.
+    #[must_use]
+    pub fn added(new_id: RegionId, text: impl Into<String>) -> Self {
+        Self {
+            kind: ChangeKind::Added,
+            old_id: None,
+            new_id: Some(new_id),
+            old_text: None,
+            new_text: Some(text.into()),
+        }
+    }
+
+    /// Creates a new removal change.
+    #[must_use]
+    pub fn removed(old_id: RegionId, text: impl Into<String>) -> Self {
+        Self {
+            kind: ChangeKind::Removed,
+            old_id: Some(old_id),
+            new_id: None,
+            old_text: Some(text.into()),
+            new_text: None,
+        }
+    }
+
+    /// Creates a new modification change.
+    #[must_use]
+    pub fn modified(
+        old_id: RegionId,
+        new_id: RegionId,
+        old_text: impl Into<String>,
+        new_text: impl Into<String>,
+    ) -> Self {
+        Self {
+            kind: ChangeKind::Modified,
+            old_id: Some(old_id),
+            new_id: Some(new_id),
+            old_text: Some(old_text.into()),
+            new_text: Some(new_text.into()),
+        }
+    }
+
+    /// Creates a new move change.
+    #[must_use]
+    pub fn moved(old_id: RegionId, new_id: RegionId, text: impl Into<String>) -> Self {
+        let text = text.into();
+        Self {
+            kind: ChangeKind::Moved,
+            old_id: Some(old_id),
+            new_id: Some(new_id),
+            old_text: Some(text.clone()),
+            new_text: Some(text),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_region_change_added() {
+        let id = RegionId::new();
+        let change = RegionChange::added(id, "new content");
+
+        assert_eq!(change.kind, ChangeKind::Added);
+        assert!(change.old_id.is_none());
+        assert_eq!(change.new_id, Some(id));
+        assert!(change.old_text.is_none());
+        assert_eq!(change.new_text, Some("new content".to_string()));
+    }
+
+    #[test]
+    fn test_region_change_removed() {
+        let id = RegionId::new();
+        let change = RegionChange::removed(id, "old content");
+
+        assert_eq!(change.kind, ChangeKind::Removed);
+        assert_eq!(change.old_id, Some(id));
+        assert!(change.new_id.is_none());
+        assert_eq!(change.old_text, Some("old content".to_string()));
+        assert!(change.new_text.is_none());
+    }
+
+    #[test]
+    fn test_region_change_modified() {
+        let old_id = RegionId::new();
+        let new_id = RegionId::new();
+        let change = RegionChange::modified(old_id, new_id, "old", "new");
+
+        assert_eq!(change.kind, ChangeKind::Modified);
+        assert_eq!(change.old_id, Some(old_id));
+        assert_eq!(change.new_id, Some(new_id));
+        assert_eq!(change.old_text, Some("old".to_string()));
+        assert_eq!(change.new_text, Some("new".to_string()));
+    }
+}
diff --git a/crates/nvisy-document/src/diff/result.rs b/crates/nvisy-document/src/diff/result.rs
new file mode 100644
index 0000000..ac933ca
--- /dev/null
+++ b/crates/nvisy-document/src/diff/result.rs
@@ -0,0 +1,72 @@
+//! Diff result type.
+
+use derive_more::{Deref, DerefMut};
+use serde::{Deserialize, Serialize};
+
+use super::RegionChange;
+
+/// The result of comparing two documents.
+#[derive(Debug, Clone, Default, Serialize, Deserialize, Deref, DerefMut)]
+pub struct Diff {
+    /// Changes to document regions.
+    #[deref]
+    #[deref_mut]
+    pub regions: Vec<RegionChange>,
+    /// Whether the documents are identical.
+    pub is_identical: bool,
+}
+
+impl Diff {
+    /// Creates a new empty diff indicating identical documents.
+    #[must_use]
+    pub fn identical() -> Self {
+        Self {
+            regions: Vec::new(),
+            is_identical: true,
+        }
+    }
+
+    /// Creates a new diff with the given changes.
+    #[must_use]
+    pub fn with_changes(regions: Vec<RegionChange>) -> Self {
+        let is_identical = regions.is_empty();
+        Self {
+            regions,
+            is_identical,
+        }
+    }
+
+    /// Returns the number of changes.
+    #[must_use]
+    pub fn change_count(&self) -> usize {
+        self.regions.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::RegionId;
+
+    #[test]
+    fn test_diff_identical() {
+        let diff = Diff::identical();
+
+        assert!(diff.is_identical);
+        assert!(diff.regions.is_empty());
+    }
+
+    #[test]
+    fn test_diff_with_changes() {
+        let changes = vec![
+            RegionChange::added(RegionId::new(), "added"),
+            RegionChange::removed(RegionId::new(), "removed"),
+            RegionChange::modified(RegionId::new(), RegionId::new(), "old", "new"),
+        ];
+
+        let diff = Diff::with_changes(changes);
+
+        assert!(!diff.is_identical);
+        assert_eq!(diff.change_count(), 3);
+    }
+}
diff --git a/crates/nvisy-document/src/format/mod.rs b/crates/nvisy-document/src/format/mod.rs
index aa6915a..2e886e4 100644
--- a/crates/nvisy-document/src/format/mod.rs
+++ b/crates/nvisy-document/src/format/mod.rs
@@ -20,7 +20,7 @@ pub use capabilities::{
     Capabilities, MetadataCapabilities, StructureCapabilities, TextCapabilities,
 };
 pub use info::DocumentInfo;
-pub use nvisy_core::io::ContentData;
+pub use nvisy_rt_core::io::ContentData;
 pub use page::PageOptions;
 pub use region::{BoundingBox, Point, Region, RegionId, RegionKind, RegionSource, RegionStatus};
 
diff --git a/crates/nvisy-document/src/format/region/mod.rs b/crates/nvisy-document/src/format/region/mod.rs
index e9bde8e..b334549 100644
--- a/crates/nvisy-document/src/format/region/mod.rs
+++ b/crates/nvisy-document/src/format/region/mod.rs
@@ -10,8 +10,9 @@ mod kind;
 mod source;
 mod status;
 
-pub use bounds::{BoundingBox, Point};
 pub use core::Region;
+
+pub use bounds::{BoundingBox, Point};
 pub use id::RegionId;
 pub use kind::RegionKind;
 pub use source::RegionSource;
diff --git a/crates/nvisy-document/src/lib.rs b/crates/nvisy-document/src/lib.rs
index 18560e8..b27f9b5 100644
--- a/crates/nvisy-document/src/lib.rs
+++ b/crates/nvisy-document/src/lib.rs
@@ -3,6 +3,7 @@
 #![doc = include_str!("../README.md")]
 
 // Core modules
+pub mod diff;
 pub mod format;
 
 // Extension trait modules
@@ -12,13 +13,11 @@ pub mod table;
 pub mod text;
 pub mod thumbnail;
 
-// Error re-exports from nvisy-core
-pub use nvisy_core::error::{BoxError, Error, ErrorResource, ErrorType, Result};
-
 pub use conversion::{
     Conversion, ConversionOptions, ConversionPath, ConversionResult, ConversionStep, FormatPair,
     HtmlOptions, PageMargins, PageOrientation, PdfOptions, SkippedElement,
 };
+pub use diff::{Change, ChangeKind, Diff, Differ, RegionChange};
 pub use format::region::{
     BoundingBox, Point, Region, RegionId, RegionKind, RegionSource, RegionStatus,
 };
@@ -30,6 +29,7 @@ pub use metadata::{
     CustomProperty, DocumentMetadata, Metadata, MetadataExtractOptions, MetadataField,
     PropertyValue,
 };
+pub use nvisy_rt_core::error::{BoxError, Error, ErrorResource, ErrorType, Result};
 pub use table::{CellDataType, NormalizedCell, NormalizedRow, NormalizedTable, TableExtractor};
 pub use text::{ExtractedText, TextExtractor};
 pub use thumbnail::{ImageFormat, Thumbnail, ThumbnailGenerator, ThumbnailOptions, ThumbnailSize};
diff --git a/crates/nvisy-document/src/table/mod.rs b/crates/nvisy-document/src/table/mod.rs
index 80f82aa..c5bab68 100644
--- a/crates/nvisy-document/src/table/mod.rs
+++ b/crates/nvisy-document/src/table/mod.rs
@@ -20,7 +20,7 @@ use crate::format::{Document, Region, RegionKind};
 /// # Example
 ///
 /// ```ignore
-/// use nvisy_document::{Document, TableExtractor, NormalizedTable};
+/// use nvisy_rt_document::{Document, TableExtractor, NormalizedTable};
 ///
 /// async fn process_tables<D>(doc: &D) -> Result<Vec<NormalizedTable>>
 /// where
diff --git a/crates/nvisy-document/src/text/mod.rs b/crates/nvisy-document/src/text/mod.rs
index 319380d..e53c99c 100644
--- a/crates/nvisy-document/src/text/mod.rs
+++ b/crates/nvisy-document/src/text/mod.rs
@@ -20,7 +20,7 @@ use crate::format::Document;
 /// # Example
 ///
 /// ```ignore
-/// use nvisy_document::{Document, TextExtractor, ExtractedText};
+/// use nvisy_rt_document::{Document, TextExtractor, ExtractedText};
 ///
 /// async fn extract_document_text<D>(doc: &D) -> Result<ExtractedText>
 /// where
diff --git a/crates/nvisy-document/src/thumbnail/options.rs b/crates/nvisy-document/src/thumbnail/options.rs
index 5ee44dd..b7d8d13 100644
--- a/crates/nvisy-document/src/thumbnail/options.rs
+++ b/crates/nvisy-document/src/thumbnail/options.rs
@@ -166,10 +166,10 @@ impl ThumbnailOptions {
             return Err("render_dpi exceeds maximum of 600".to_string());
         }
 
-        if let Some(ref bg) = self.background {
-            if bg.len() != 6 || !bg.chars().all(|c| c.is_ascii_hexdigit()) {
-                return Err("background must be a 6-character hex RGB value".to_string());
-            }
+        if let Some(ref bg) = self.background
+            && (bg.len() != 6 || !bg.chars().all(|c| c.is_ascii_hexdigit()))
+        {
+            return Err("background must be a 6-character hex RGB value".to_string());
         }
 
         if self.size.max_width() > 4096 || self.size.max_height() > 4096 {
diff --git a/crates/nvisy-docx/Cargo.toml b/crates/nvisy-docx/Cargo.toml
index f4b66cd..9e0b55c 100644
--- a/crates/nvisy-docx/Cargo.toml
+++ b/crates/nvisy-docx/Cargo.toml
@@ -1,7 +1,7 @@
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [package]
-name = "nvisy-docx"
+name = "nvisy-rt-docx"
 description = "DOCX document format support for nvisy"
 readme = "./README.md"
 
@@ -21,7 +21,7 @@ all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
-nvisy-document = { workspace = true }
+nvisy-rt-document = { workspace = true }
 
 async-trait = { workspace = true }
 bytes = { workspace = true }
diff --git a/crates/nvisy-docx/src/document.rs b/crates/nvisy-docx/src/document.rs
index c82a00c..7854e89 100644
--- a/crates/nvisy-docx/src/document.rs
+++ b/crates/nvisy-docx/src/document.rs
@@ -2,7 +2,7 @@
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
+use nvisy_rt_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
 
 /// A loaded DOCX document.
 #[derive(Debug)]
diff --git a/crates/nvisy-docx/src/format.rs b/crates/nvisy-docx/src/format.rs
index ba50278..0f0559b 100644
--- a/crates/nvisy-docx/src/format.rs
+++ b/crates/nvisy-docx/src/format.rs
@@ -1,6 +1,6 @@
 //! DOCX format handler implementation.
 
-use nvisy_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
+use nvisy_rt_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
 
 use crate::DocxDocument;
 
diff --git a/crates/nvisy-engine/Cargo.toml b/crates/nvisy-engine/Cargo.toml
index 5fb8c27..0dda308 100644
--- a/crates/nvisy-engine/Cargo.toml
+++ b/crates/nvisy-engine/Cargo.toml
@@ -1,7 +1,7 @@
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [package]
-name = "nvisy-engine"
+name = "nvisy-rt-engine"
 description = "Document processing engine for nvisy"
 readme = "./README.md"
 
@@ -22,19 +22,19 @@ rustdoc-args = ["--cfg", "docsrs"]
 
 [features]
 default = ["pdf", "docx", "text", "image"]
-pdf = ["dep:nvisy-pdf"]
-docx = ["dep:nvisy-docx"]
-text = ["dep:nvisy-text"]
-image = ["dep:nvisy-image"]
+pdf = ["dep:nvisy-rt-pdf"]
+docx = ["dep:nvisy-rt-docx"]
+text = ["dep:nvisy-rt-text"]
+image = ["dep:nvisy-rt-image"]
 
 [dependencies]
 # Internal crates
-nvisy-archive = { workspace = true }
-nvisy-document = { workspace = true }
-nvisy-docx = { workspace = true, optional = true }
-nvisy-image = { workspace = true, optional = true }
-nvisy-pdf = { workspace = true, optional = true }
-nvisy-text = { workspace = true, optional = true }
+nvisy-rt-archive = { workspace = true }
+nvisy-rt-document = { workspace = true }
+nvisy-rt-docx = { workspace = true, optional = true }
+nvisy-rt-image = { workspace = true, optional = true }
+nvisy-rt-pdf = { workspace = true, optional = true }
+nvisy-rt-text = { workspace = true, optional = true }
 
 # Data types
 bytes = { workspace = true }
@@ -44,6 +44,9 @@ jiff = { workspace = true, features = ["std"] }
 # Serialization
 serde = { workspace = true, features = ["std", "derive"] }
 
+# Observability
+tracing = { workspace = true }
+
 [dev-dependencies]
 serde_json = { workspace = true, features = ["std"] }
 tokio = { workspace = true, features = ["rt", "macros"] }
diff --git a/crates/nvisy-engine/src/engine/mod.rs b/crates/nvisy-engine/src/engine/mod.rs
index 5b47267..eb8c258 100644
--- a/crates/nvisy-engine/src/engine/mod.rs
+++ b/crates/nvisy-engine/src/engine/mod.rs
@@ -8,9 +8,11 @@ mod config;
 use std::path::Path;
 
 pub use config::EngineConfig;
-use nvisy_document::{ContentData, Result};
+use nvisy_rt_document::{ContentData, Result};
+use tracing::{debug, info};
 
-use crate::registry::{BoxDocument, FormatRegistry};
+use crate::TRACING_TARGET_ENGINE;
+use crate::registry::{FormatRegistry, LoadedDocument};
 
 /// The central document processing engine.
 ///
@@ -22,7 +24,7 @@ use crate::registry::{BoxDocument, FormatRegistry};
 /// # Example
 ///
 /// ```ignore
-/// use nvisy_engine::Engine;
+/// use nvisy_rt_engine::Engine;
 ///
 /// let engine = Engine::new();
 ///
@@ -48,6 +50,7 @@ impl Engine {
     /// Creates a new engine with default configuration and all default formats.
     #[must_use]
     pub fn new() -> Self {
+        info!(target: TRACING_TARGET_ENGINE, "Creating engine with default configuration");
         Self {
             config: EngineConfig::default(),
             registry: FormatRegistry::with_defaults(),
@@ -57,6 +60,7 @@ impl Engine {
     /// Creates a new engine with the specified configuration.
     #[must_use]
     pub fn with_config(config: EngineConfig) -> Self {
+        debug!(target: TRACING_TARGET_ENGINE, ?config, "Creating engine with custom configuration");
         Self {
             config,
             registry: FormatRegistry::with_defaults(),
@@ -66,6 +70,7 @@ impl Engine {
     /// Creates a new engine with a custom registry.
     #[must_use]
     pub fn with_registry(registry: FormatRegistry) -> Self {
+        debug!(target: TRACING_TARGET_ENGINE, "Creating engine with custom registry");
         Self {
             config: EngineConfig::default(),
             registry,
@@ -75,6 +80,7 @@ impl Engine {
     /// Creates a new engine with custom configuration and registry.
     #[must_use]
     pub fn with_config_and_registry(config: EngineConfig, registry: FormatRegistry) -> Self {
+        debug!(target: TRACING_TARGET_ENGINE, ?config, "Creating engine with custom configuration and registry");
         Self { config, registry }
     }
 
@@ -108,7 +114,7 @@ impl Engine {
     /// - The file has no extension
     /// - The extension is not supported
     /// - The document fails to load
-    pub async fn load_file<P: AsRef<Path>>(&self, path: P) -> Result<BoxDocument> {
+    pub async fn load_file<P: AsRef<Path>>(&self, path: P) -> Result<LoadedDocument> {
         self.registry.load_file(path).await
     }
 
@@ -117,7 +123,7 @@ impl Engine {
     /// # Errors
     ///
     /// Returns an error if the extension is not supported or loading fails.
-    pub async fn load_by_extension(&self, ext: &str, data: ContentData) -> Result<BoxDocument> {
+    pub async fn load_by_extension(&self, ext: &str, data: ContentData) -> Result<LoadedDocument> {
         self.registry.load_by_extension(ext, data).await
     }
 
@@ -126,7 +132,7 @@ impl Engine {
     /// # Errors
     ///
     /// Returns an error if the MIME type is not supported or loading fails.
-    pub async fn load_by_mime(&self, mime: &str, data: ContentData) -> Result<BoxDocument> {
+    pub async fn load_by_mime(&self, mime: &str, data: ContentData) -> Result<LoadedDocument> {
         self.registry.load_by_mime(mime, data).await
     }
 
@@ -163,6 +169,7 @@ impl Default for Engine {
 
 impl Clone for Engine {
     fn clone(&self) -> Self {
+        debug!(target: TRACING_TARGET_ENGINE, "Cloning engine");
         Self {
             config: self.config.clone(),
             registry: FormatRegistry::with_defaults(),
@@ -283,7 +290,7 @@ mod tests {
         let mut registry = FormatRegistry::new();
 
         #[cfg(feature = "text")]
-        registry.register(nvisy_text::JsonFormat::new());
+        registry.register(nvisy_rt_text::JsonFormat::new());
 
         let engine = Engine::with_registry(registry);
 
diff --git a/crates/nvisy-engine/src/lib.rs b/crates/nvisy-engine/src/lib.rs
index c4d83a1..768f829 100644
--- a/crates/nvisy-engine/src/lib.rs
+++ b/crates/nvisy-engine/src/lib.rs
@@ -7,9 +7,21 @@ pub mod registry;
 pub mod session;
 
 pub use engine::{Engine, EngineConfig};
-pub use nvisy_document::{
+pub use nvisy_rt_document::{
     self as doc, BoundingBox, Capabilities, Document, DocumentFormat, Point, Region, RegionId,
     RegionKind,
 };
-pub use registry::{AnyFormat, BoxDocument, FormatRegistry};
+pub use registry::{FormatRef, FormatRegistry, LoadedDocument};
 pub use session::{AccessEntry, AccessHistory, ReadSession, SessionConfig, SessionId};
+
+/// Tracing target for engine operations.
+pub const TRACING_TARGET_ENGINE: &str = "nvisy_rt_engine::engine";
+
+/// Tracing target for format registry operations.
+pub const TRACING_TARGET_REGISTRY: &str = "nvisy_rt_engine::registry";
+
+/// Tracing target for session management.
+pub const TRACING_TARGET_SESSION: &str = "nvisy_rt_engine::session";
+
+/// Tracing target for document loading.
+pub const TRACING_TARGET_LOAD: &str = "nvisy_rt_engine::load";
diff --git a/crates/nvisy-engine/src/registry/format_ref.rs b/crates/nvisy-engine/src/registry/format_ref.rs
new file mode 100644
index 0000000..784add6
--- /dev/null
+++ b/crates/nvisy-engine/src/registry/format_ref.rs
@@ -0,0 +1,114 @@
+//! Format reference types.
+
+use std::pin::Pin;
+
+use nvisy_rt_document::{Capabilities, ContentData, Document, DocumentFormat, Result};
+
+use super::LoadedDocument;
+
+/// Internal type alias for boxed documents.
+pub(crate) type BoxDocument = Box<dyn Document + Send + Sync>;
+
+/// A type-erased format handler.
+pub(crate) trait AnyFormat: Send + Sync {
+    fn name(&self) -> &'static str;
+    fn mime_types(&self) -> &'static [&'static str];
+    fn extensions(&self) -> &'static [&'static str];
+    fn capabilities(&self) -> &Capabilities;
+    fn load_boxed(
+        &self,
+        data: ContentData,
+    ) -> Pin<Box<dyn std::future::Future<Output = Result<BoxDocument>> + Send + '_>>;
+}
+
+/// Wrapper that implements `AnyFormat` for any `DocumentFormat`.
+pub(crate) struct FormatWrapper<F> {
+    pub(crate) inner: F,
+}
+
+impl<F> AnyFormat for FormatWrapper<F>
+where
+    F: DocumentFormat + Send + Sync + 'static,
+    F::Document: Send + Sync + 'static,
+{
+    fn name(&self) -> &'static str {
+        DocumentFormat::name(&self.inner)
+    }
+
+    fn mime_types(&self) -> &'static [&'static str] {
+        DocumentFormat::mime_types(&self.inner)
+    }
+
+    fn extensions(&self) -> &'static [&'static str] {
+        DocumentFormat::extensions(&self.inner)
+    }
+
+    fn capabilities(&self) -> &Capabilities {
+        DocumentFormat::capabilities(&self.inner)
+    }
+
+    fn load_boxed(
+        &self,
+        data: ContentData,
+    ) -> Pin<Box<dyn std::future::Future<Output = Result<BoxDocument>> + Send + '_>> {
+        Box::pin(async move {
+            let doc = DocumentFormat::load(&self.inner, data).await?;
+            Ok(Box::new(doc) as BoxDocument)
+        })
+    }
+}
+
+/// A borrowed reference to a format handler.
+///
+/// Provides access to format metadata and document loading.
+#[derive(Clone, Copy)]
+pub struct FormatRef<'a> {
+    inner: &'a dyn AnyFormat,
+}
+
+impl std::fmt::Debug for FormatRef<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("FormatRef")
+            .field("name", &self.name())
+            .field("extensions", &self.extensions())
+            .field("mime_types", &self.mime_types())
+            .finish()
+    }
+}
+
+impl<'a> FormatRef<'a> {
+    /// Creates a new format reference.
+    pub(crate) fn new(inner: &'a dyn AnyFormat) -> Self {
+        Self { inner }
+    }
+
+    /// Returns the format name.
+    #[must_use]
+    pub fn name(&self) -> &'static str {
+        self.inner.name()
+    }
+
+    /// Returns supported MIME types.
+    #[must_use]
+    pub fn mime_types(&self) -> &'static [&'static str] {
+        self.inner.mime_types()
+    }
+
+    /// Returns supported file extensions.
+    #[must_use]
+    pub fn extensions(&self) -> &'static [&'static str] {
+        self.inner.extensions()
+    }
+
+    /// Returns the format capabilities.
+    #[must_use]
+    pub fn capabilities(&self) -> &Capabilities {
+        self.inner.capabilities()
+    }
+
+    /// Loads a document from content data.
+    pub async fn load(&self, data: ContentData) -> Result<LoadedDocument> {
+        let doc = self.inner.load_boxed(data).await?;
+        Ok(LoadedDocument::new(doc))
+    }
+}
diff --git a/crates/nvisy-engine/src/registry/format_registry.rs b/crates/nvisy-engine/src/registry/format_registry.rs
new file mode 100644
index 0000000..4dc8b08
--- /dev/null
+++ b/crates/nvisy-engine/src/registry/format_registry.rs
@@ -0,0 +1,343 @@
+//! Format registry implementation.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use nvisy_rt_document::{ContentData, DocumentFormat, Error, Result};
+use tracing::{debug, instrument, trace, warn};
+
+use super::format_ref::{AnyFormat, FormatWrapper};
+use super::{FormatRef, LoadedDocument};
+use crate::{TRACING_TARGET_LOAD, TRACING_TARGET_REGISTRY};
+
+/// Registry entry containing a format handler.
+struct RegistryEntry {
+    format: Arc<dyn AnyFormat>,
+}
+
+/// A registry of document formats.
+///
+/// The registry maintains mappings from file extensions and MIME types
+/// to format handlers, enabling dynamic document loading.
+///
+/// # Example
+///
+/// ```ignore
+/// use nvisy_rt_engine::FormatRegistry;
+///
+/// let registry = FormatRegistry::with_defaults();
+///
+/// // Load by file path
+/// let doc = registry.load_file("document.pdf").await?;
+///
+/// // Load by extension
+/// let doc = registry.load_by_extension("json", data).await?;
+/// ```
+#[derive(Default)]
+pub struct FormatRegistry {
+    /// All registered formats.
+    formats: Vec<RegistryEntry>,
+
+    /// Extension to format index mapping.
+    by_extension: HashMap<&'static str, usize>,
+
+    /// MIME type to format index mapping.
+    by_mime: HashMap<&'static str, usize>,
+}
+
+impl FormatRegistry {
+    /// Creates an empty registry.
+    #[must_use]
+    pub fn new() -> Self {
+        trace!(target: TRACING_TARGET_REGISTRY, "Creating empty format registry");
+        Self {
+            formats: Vec::new(),
+            by_extension: HashMap::new(),
+            by_mime: HashMap::new(),
+        }
+    }
+
+    /// Creates a registry with all default formats registered.
+    #[must_use]
+    pub fn with_defaults() -> Self {
+        let mut registry = Self::new();
+        registry.register_defaults();
+        debug!(
+            target: TRACING_TARGET_REGISTRY,
+            formats = registry.formats.len(),
+            extensions = registry.by_extension.len(),
+            mime_types = registry.by_mime.len(),
+            "Initialized registry with default formats"
+        );
+        registry
+    }
+
+    /// Registers all default formats based on enabled features.
+    pub fn register_defaults(&mut self) {
+        #[cfg(feature = "pdf")]
+        self.register(nvisy_rt_pdf::PdfFormat::new());
+
+        #[cfg(feature = "docx")]
+        self.register(nvisy_rt_docx::DocxFormat::new());
+
+        #[cfg(feature = "text")]
+        {
+            self.register(nvisy_rt_text::PlainTextFormat::new());
+            self.register(nvisy_rt_text::MarkdownFormat::new());
+            self.register(nvisy_rt_text::JsonFormat::new());
+            self.register(nvisy_rt_text::CsvFormat::new());
+            self.register(nvisy_rt_text::XmlFormat::new());
+            self.register(nvisy_rt_text::YamlFormat::new());
+            self.register(nvisy_rt_text::TomlFormat::new());
+            self.register(nvisy_rt_text::IniFormat::new());
+        }
+
+        #[cfg(feature = "image")]
+        {
+            self.register(nvisy_rt_image::JpegFormat::new());
+            self.register(nvisy_rt_image::PngFormat::new());
+        }
+    }
+
+    /// Registers a format handler.
+    ///
+    /// Extensions and MIME types from the format are automatically indexed.
+    /// If an extension or MIME type is already registered, the new format
+    /// takes precedence.
+    pub fn register<F>(&mut self, format: F)
+    where
+        F: DocumentFormat + Send + Sync + 'static,
+        F::Document: Send + Sync + 'static,
+    {
+        let wrapper = FormatWrapper { inner: format };
+        let index = self.formats.len();
+        let format: Arc<dyn AnyFormat> = Arc::new(wrapper);
+
+        let name = format.name();
+        let extensions = format.extensions();
+        let mime_types = format.mime_types();
+
+        // Index by extension
+        for ext in extensions {
+            self.by_extension.insert(ext, index);
+        }
+
+        // Index by MIME type
+        for mime in mime_types {
+            self.by_mime.insert(mime, index);
+        }
+
+        self.formats.push(RegistryEntry { format });
+
+        trace!(
+            target: TRACING_TARGET_REGISTRY,
+            name,
+            ?extensions,
+            ?mime_types,
+            "Registered format"
+        );
+    }
+
+    /// Returns the format handler for a file extension.
+    #[must_use]
+    pub fn get_by_extension(&self, ext: &str) -> Option<FormatRef<'_>> {
+        let ext = ext.trim_start_matches('.').to_lowercase();
+        self.by_extension
+            .get(ext.as_str())
+            .and_then(|&idx| self.formats.get(idx))
+            .map(|e| FormatRef::new(e.format.as_ref()))
+    }
+
+    /// Returns the format handler for a MIME type.
+    #[must_use]
+    pub fn get_by_mime(&self, mime: &str) -> Option<FormatRef<'_>> {
+        let mime = mime.to_lowercase();
+        self.by_mime
+            .get(mime.as_str())
+            .and_then(|&idx| self.formats.get(idx))
+            .map(|e| FormatRef::new(e.format.as_ref()))
+    }
+
+    /// Checks if an extension is supported.
+    #[must_use]
+    pub fn supports_extension(&self, ext: &str) -> bool {
+        let ext = ext.trim_start_matches('.').to_lowercase();
+        self.by_extension.contains_key(ext.as_str())
+    }
+
+    /// Checks if a MIME type is supported.
+    #[must_use]
+    pub fn supports_mime(&self, mime: &str) -> bool {
+        let mime = mime.to_lowercase();
+        self.by_mime.contains_key(mime.as_str())
+    }
+
+    /// Returns all supported file extensions.
+    #[must_use]
+    pub fn supported_extensions(&self) -> Vec<&'static str> {
+        self.by_extension.keys().copied().collect()
+    }
+
+    /// Returns all supported MIME types.
+    #[must_use]
+    pub fn supported_mime_types(&self) -> Vec<&'static str> {
+        self.by_mime.keys().copied().collect()
+    }
+
+    /// Returns all registered formats.
+    #[must_use]
+    pub fn formats(&self) -> Vec<FormatRef<'_>> {
+        self.formats
+            .iter()
+            .map(|e| FormatRef::new(e.format.as_ref()))
+            .collect()
+    }
+
+    /// Loads a document by file extension.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The extension is not supported
+    /// - The document fails to load
+    #[instrument(target = TRACING_TARGET_LOAD, skip(self, data), fields(size = data.size()))]
+    pub async fn load_by_extension(&self, ext: &str, data: ContentData) -> Result<LoadedDocument> {
+        let format = self.get_by_extension(ext).ok_or_else(|| {
+            warn!(target: TRACING_TARGET_LOAD, ext, "Unsupported extension");
+            Error::new(format!("Unsupported extension: {}", ext))
+        })?;
+
+        debug!(target: TRACING_TARGET_LOAD, ext, format = format.name(), "Loading document");
+        format.load(data).await
+    }
+
+    /// Loads a document by MIME type.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The MIME type is not supported
+    /// - The document fails to load
+    #[instrument(target = TRACING_TARGET_LOAD, skip(self, data), fields(size = data.size()))]
+    pub async fn load_by_mime(&self, mime: &str, data: ContentData) -> Result<LoadedDocument> {
+        let format = self.get_by_mime(mime).ok_or_else(|| {
+            warn!(target: TRACING_TARGET_LOAD, mime, "Unsupported MIME type");
+            Error::new(format!("Unsupported MIME type: {}", mime))
+        })?;
+
+        debug!(target: TRACING_TARGET_LOAD, mime, format = format.name(), "Loading document");
+        format.load(data).await
+    }
+
+    /// Loads a document from a file path.
+    ///
+    /// The format is determined by the file extension.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The file cannot be read
+    /// - The file has no extension
+    /// - The extension is not supported
+    /// - The document fails to load
+    #[instrument(target = TRACING_TARGET_LOAD, skip(self), fields(path = %path.as_ref().display()))]
+    pub async fn load_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<LoadedDocument> {
+        let path = path.as_ref();
+
+        let ext = path.extension().and_then(|e| e.to_str()).ok_or_else(|| {
+            warn!(target: TRACING_TARGET_LOAD, path = %path.display(), "File has no extension");
+            Error::new("File has no extension")
+        })?;
+
+        debug!(target: TRACING_TARGET_LOAD, path = %path.display(), ext, "Reading file");
+
+        let data = std::fs::read(path).map_err(|e| {
+            warn!(target: TRACING_TARGET_LOAD, path = %path.display(), error = %e, "Failed to read file");
+            Error::from_source(format!("Failed to read file '{}'", path.display()), e)
+        })?;
+
+        self.load_by_extension(ext, ContentData::from(data)).await
+    }
+}
+
+impl std::fmt::Debug for FormatRegistry {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("FormatRegistry")
+            .field("formats", &self.formats.len())
+            .field("extensions", &self.by_extension.keys().collect::<Vec<_>>())
+            .field("mime_types", &self.by_mime.keys().collect::<Vec<_>>())
+            .finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_registry_creation() {
+        let registry = FormatRegistry::new();
+        assert!(registry.formats().is_empty());
+    }
+
+    #[test]
+    fn test_registry_with_defaults() {
+        let registry = FormatRegistry::with_defaults();
+        assert!(!registry.formats().is_empty());
+
+        #[cfg(feature = "text")]
+        {
+            assert!(registry.supports_extension("txt"));
+            assert!(registry.supports_extension("json"));
+            assert!(registry.supports_extension("md"));
+        }
+    }
+
+    #[test]
+    fn test_get_by_extension() {
+        let registry = FormatRegistry::with_defaults();
+
+        #[cfg(feature = "text")]
+        {
+            let format = registry.get_by_extension("json").unwrap();
+            assert_eq!(format.name(), "json");
+
+            let format = registry.get_by_extension(".JSON").unwrap();
+            assert_eq!(format.name(), "json");
+        }
+
+        assert!(registry.get_by_extension("xyz").is_none());
+    }
+
+    #[cfg(feature = "text")]
+    #[tokio::test]
+    async fn test_load_by_extension() {
+        let registry = FormatRegistry::with_defaults();
+
+        let doc = registry
+            .load_by_extension("json", ContentData::from(r#"{"key": "value"}"#))
+            .await
+            .unwrap();
+
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[cfg(feature = "text")]
+    #[tokio::test]
+    async fn test_load_by_mime() {
+        let registry = FormatRegistry::with_defaults();
+
+        let doc = registry
+            .load_by_mime("application/json", ContentData::from(r#"{"key": "value"}"#))
+            .await
+            .unwrap();
+
+        assert!(!doc.regions().is_empty());
+    }
+
+    #[test]
+    fn test_unsupported_extension() {
+        let registry = FormatRegistry::with_defaults();
+        assert!(!registry.supports_extension("xyz"));
+    }
+}
diff --git a/crates/nvisy-engine/src/registry/loaded_document.rs b/crates/nvisy-engine/src/registry/loaded_document.rs
new file mode 100644
index 0000000..396041d
--- /dev/null
+++ b/crates/nvisy-engine/src/registry/loaded_document.rs
@@ -0,0 +1,41 @@
+//! Loaded document wrapper.
+
+use nvisy_rt_document::Document;
+
+use super::format_ref::BoxDocument;
+
+/// A loaded document from the registry.
+///
+/// This struct wraps a type-erased document and provides access
+/// to common document operations through `Deref`.
+pub struct LoadedDocument {
+    inner: BoxDocument,
+}
+
+impl LoadedDocument {
+    /// Creates a new loaded document.
+    pub(crate) fn new(inner: BoxDocument) -> Self {
+        Self { inner }
+    }
+
+    /// Consumes this wrapper and returns the inner boxed document.
+    pub fn into_inner(self) -> Box<dyn Document + Send + Sync> {
+        self.inner
+    }
+}
+
+impl std::fmt::Debug for LoadedDocument {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("LoadedDocument")
+            .field("info", self.inner.info())
+            .finish()
+    }
+}
+
+impl std::ops::Deref for LoadedDocument {
+    type Target = dyn Document + Send + Sync;
+
+    fn deref(&self) -> &Self::Target {
+        self.inner.as_ref()
+    }
+}
diff --git a/crates/nvisy-engine/src/registry/mod.rs b/crates/nvisy-engine/src/registry/mod.rs
index fc3d854..7c09587 100644
--- a/crates/nvisy-engine/src/registry/mod.rs
+++ b/crates/nvisy-engine/src/registry/mod.rs
@@ -4,378 +4,10 @@
 //! to be loaded by extension or MIME type without knowing the concrete
 //! format at compile time.
 
-use std::collections::HashMap;
-use std::sync::Arc;
+mod format_ref;
+mod format_registry;
+mod loaded_document;
 
-use nvisy_document::{Capabilities, ContentData, Document, Error, Result};
-
-/// A type-erased document that can be used for common operations.
-pub type BoxDocument = Box<dyn Document + Send + Sync>;
-
-/// A type-erased format handler.
-///
-/// This trait provides a common interface for all format handlers,
-/// enabling dynamic dispatch and runtime format selection.
-pub trait AnyFormat: Send + Sync {
-    /// Returns the format name.
-    fn name(&self) -> &'static str;
-
-    /// Returns supported MIME types.
-    fn mime_types(&self) -> &'static [&'static str];
-
-    /// Returns supported file extensions.
-    fn extensions(&self) -> &'static [&'static str];
-
-    /// Returns the format capabilities.
-    fn capabilities(&self) -> &Capabilities;
-
-    /// Loads a document from content data, returning a type-erased document.
-    fn load_boxed(
-        &self,
-        data: ContentData,
-    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<BoxDocument>> + Send + '_>>;
-}
-
-/// Wrapper that implements AnyFormat for any DocumentFormat.
-struct FormatWrapper<F> {
-    inner: F,
-}
-
-impl<F> AnyFormat for FormatWrapper<F>
-where
-    F: nvisy_document::DocumentFormat + Send + Sync + 'static,
-    F::Document: Send + Sync + 'static,
-{
-    fn name(&self) -> &'static str {
-        nvisy_document::DocumentFormat::name(&self.inner)
-    }
-
-    fn mime_types(&self) -> &'static [&'static str] {
-        nvisy_document::DocumentFormat::mime_types(&self.inner)
-    }
-
-    fn extensions(&self) -> &'static [&'static str] {
-        nvisy_document::DocumentFormat::extensions(&self.inner)
-    }
-
-    fn capabilities(&self) -> &Capabilities {
-        nvisy_document::DocumentFormat::capabilities(&self.inner)
-    }
-
-    fn load_boxed(
-        &self,
-        data: ContentData,
-    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<BoxDocument>> + Send + '_>> {
-        Box::pin(async move {
-            let doc = nvisy_document::DocumentFormat::load(&self.inner, data).await?;
-            Ok(Box::new(doc) as BoxDocument)
-        })
-    }
-}
-
-/// Registry entry containing a format handler.
-struct RegistryEntry {
-    format: Arc<dyn AnyFormat>,
-}
-
-/// A registry of document formats.
-///
-/// The registry maintains mappings from file extensions and MIME types
-/// to format handlers, enabling dynamic document loading.
-///
-/// # Example
-///
-/// ```ignore
-/// use nvisy_engine::FormatRegistry;
-///
-/// let registry = FormatRegistry::with_defaults();
-///
-/// // Load by file path
-/// let doc = registry.load_file("document.pdf").await?;
-///
-/// // Load by extension
-/// let doc = registry.load_by_extension("json", data).await?;
-/// ```
-#[derive(Default)]
-pub struct FormatRegistry {
-    /// All registered formats.
-    formats: Vec<RegistryEntry>,
-
-    /// Extension to format index mapping.
-    by_extension: HashMap<&'static str, usize>,
-
-    /// MIME type to format index mapping.
-    by_mime: HashMap<&'static str, usize>,
-}
-
-impl FormatRegistry {
-    /// Creates an empty registry.
-    #[must_use]
-    pub fn new() -> Self {
-        Self {
-            formats: Vec::new(),
-            by_extension: HashMap::new(),
-            by_mime: HashMap::new(),
-        }
-    }
-
-    /// Creates a registry with all default formats registered.
-    #[must_use]
-    pub fn with_defaults() -> Self {
-        let mut registry = Self::new();
-        registry.register_defaults();
-        registry
-    }
-
-    /// Registers all default formats based on enabled features.
-    pub fn register_defaults(&mut self) {
-        #[cfg(feature = "pdf")]
-        self.register(nvisy_pdf::PdfFormat::new());
-
-        #[cfg(feature = "docx")]
-        self.register(nvisy_docx::DocxFormat::new());
-
-        #[cfg(feature = "text")]
-        {
-            self.register(nvisy_text::PlainTextFormat::new());
-            self.register(nvisy_text::MarkdownFormat::new());
-            self.register(nvisy_text::JsonFormat::new());
-            self.register(nvisy_text::CsvFormat::new());
-            self.register(nvisy_text::XmlFormat::new());
-            self.register(nvisy_text::YamlFormat::new());
-            self.register(nvisy_text::TomlFormat::new());
-            self.register(nvisy_text::IniFormat::new());
-        }
-
-        #[cfg(feature = "image")]
-        {
-            self.register(nvisy_image::JpegFormat::new());
-            self.register(nvisy_image::PngFormat::new());
-        }
-    }
-
-    /// Registers a format handler.
-    ///
-    /// Extensions and MIME types from the format are automatically indexed.
-    /// If an extension or MIME type is already registered, the new format
-    /// takes precedence.
-    pub fn register<F>(&mut self, format: F)
-    where
-        F: nvisy_document::DocumentFormat + Send + Sync + 'static,
-        F::Document: Send + Sync + 'static,
-    {
-        let wrapper = FormatWrapper { inner: format };
-        let index = self.formats.len();
-        let format: Arc<dyn AnyFormat> = Arc::new(wrapper);
-
-        // Index by extension
-        for ext in format.extensions() {
-            self.by_extension.insert(ext, index);
-        }
-
-        // Index by MIME type
-        for mime in format.mime_types() {
-            self.by_mime.insert(mime, index);
-        }
-
-        self.formats.push(RegistryEntry { format });
-    }
-
-    /// Returns the format handler for a file extension.
-    #[must_use]
-    pub fn get_by_extension(&self, ext: &str) -> Option<&dyn AnyFormat> {
-        let ext = ext.trim_start_matches('.').to_lowercase();
-        self.by_extension
-            .get(ext.as_str())
-            .and_then(|&idx| self.formats.get(idx))
-            .map(|e| e.format.as_ref())
-    }
-
-    /// Returns the format handler for a MIME type.
-    #[must_use]
-    pub fn get_by_mime(&self, mime: &str) -> Option<&dyn AnyFormat> {
-        let mime = mime.to_lowercase();
-        self.by_mime
-            .get(mime.as_str())
-            .and_then(|&idx| self.formats.get(idx))
-            .map(|e| e.format.as_ref())
-    }
-
-    /// Checks if an extension is supported.
-    #[must_use]
-    pub fn supports_extension(&self, ext: &str) -> bool {
-        let ext = ext.trim_start_matches('.').to_lowercase();
-        self.by_extension.contains_key(ext.as_str())
-    }
-
-    /// Checks if a MIME type is supported.
-    #[must_use]
-    pub fn supports_mime(&self, mime: &str) -> bool {
-        let mime = mime.to_lowercase();
-        self.by_mime.contains_key(mime.as_str())
-    }
-
-    /// Returns all supported file extensions.
-    #[must_use]
-    pub fn supported_extensions(&self) -> Vec<&'static str> {
-        self.by_extension.keys().copied().collect()
-    }
-
-    /// Returns all supported MIME types.
-    #[must_use]
-    pub fn supported_mime_types(&self) -> Vec<&'static str> {
-        self.by_mime.keys().copied().collect()
-    }
-
-    /// Returns all registered formats.
-    #[must_use]
-    pub fn formats(&self) -> Vec<&dyn AnyFormat> {
-        self.formats.iter().map(|e| e.format.as_ref()).collect()
-    }
-
-    /// Loads a document by file extension.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - The extension is not supported
-    /// - The document fails to load
-    pub async fn load_by_extension(&self, ext: &str, data: ContentData) -> Result<BoxDocument> {
-        let ext_lower = ext.trim_start_matches('.').to_lowercase();
-
-        let format = self
-            .by_extension
-            .get(ext_lower.as_str())
-            .and_then(|&idx| self.formats.get(idx))
-            .ok_or_else(|| Error::new(format!("Unsupported extension: {}", ext)))?;
-
-        format.format.load_boxed(data).await
-    }
-
-    /// Loads a document by MIME type.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - The MIME type is not supported
-    /// - The document fails to load
-    pub async fn load_by_mime(&self, mime: &str, data: ContentData) -> Result<BoxDocument> {
-        let mime_lower = mime.to_lowercase();
-
-        let format = self
-            .by_mime
-            .get(mime_lower.as_str())
-            .and_then(|&idx| self.formats.get(idx))
-            .ok_or_else(|| Error::new(format!("Unsupported MIME type: {}", mime)))?;
-
-        format.format.load_boxed(data).await
-    }
-
-    /// Loads a document from a file path.
-    ///
-    /// The format is determined by the file extension.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    /// - The file cannot be read
-    /// - The file has no extension
-    /// - The extension is not supported
-    /// - The document fails to load
-    pub async fn load_file<P: AsRef<std::path::Path>>(&self, path: P) -> Result<BoxDocument> {
-        let path = path.as_ref();
-
-        let ext = path
-            .extension()
-            .and_then(|e| e.to_str())
-            .ok_or_else(|| Error::new("File has no extension"))?;
-
-        let data = std::fs::read(path).map_err(|e| {
-            Error::from_source(format!("Failed to read file '{}'", path.display()), e)
-        })?;
-
-        self.load_by_extension(ext, ContentData::from(data)).await
-    }
-}
-
-impl std::fmt::Debug for FormatRegistry {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("FormatRegistry")
-            .field("formats", &self.formats.len())
-            .field("extensions", &self.by_extension.keys().collect::<Vec<_>>())
-            .field("mime_types", &self.by_mime.keys().collect::<Vec<_>>())
-            .finish()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_registry_creation() {
-        let registry = FormatRegistry::new();
-        assert!(registry.formats().is_empty());
-    }
-
-    #[test]
-    fn test_registry_with_defaults() {
-        let registry = FormatRegistry::with_defaults();
-        assert!(!registry.formats().is_empty());
-
-        #[cfg(feature = "text")]
-        {
-            assert!(registry.supports_extension("txt"));
-            assert!(registry.supports_extension("json"));
-            assert!(registry.supports_extension("md"));
-        }
-    }
-
-    #[test]
-    fn test_get_by_extension() {
-        let registry = FormatRegistry::with_defaults();
-
-        #[cfg(feature = "text")]
-        {
-            let format = registry.get_by_extension("json").unwrap();
-            assert_eq!(format.name(), "json");
-
-            let format = registry.get_by_extension(".JSON").unwrap();
-            assert_eq!(format.name(), "json");
-        }
-
-        assert!(registry.get_by_extension("xyz").is_none());
-    }
-
-    #[cfg(feature = "text")]
-    #[tokio::test]
-    async fn test_load_by_extension() {
-        let registry = FormatRegistry::with_defaults();
-
-        let doc = registry
-            .load_by_extension("json", ContentData::from(r#"{"key": "value"}"#))
-            .await
-            .unwrap();
-
-        assert!(!doc.regions().is_empty());
-    }
-
-    #[cfg(feature = "text")]
-    #[tokio::test]
-    async fn test_load_by_mime() {
-        let registry = FormatRegistry::with_defaults();
-
-        let doc = registry
-            .load_by_mime("application/json", ContentData::from(r#"{"key": "value"}"#))
-            .await
-            .unwrap();
-
-        assert!(!doc.regions().is_empty());
-    }
-
-    #[test]
-    fn test_unsupported_extension() {
-        let registry = FormatRegistry::with_defaults();
-        assert!(!registry.supports_extension("xyz"));
-    }
-}
+pub use format_ref::FormatRef;
+pub use format_registry::FormatRegistry;
+pub use loaded_document::LoadedDocument;
diff --git a/crates/nvisy-engine/src/session/mod.rs b/crates/nvisy-engine/src/session/mod.rs
index 2599aec..072831c 100644
--- a/crates/nvisy-engine/src/session/mod.rs
+++ b/crates/nvisy-engine/src/session/mod.rs
@@ -13,7 +13,7 @@ use std::num::NonZeroU32;
 use bytes::Bytes;
 pub use history::{AccessEntry, AccessHistory};
 use jiff::Timestamp;
-use nvisy_document::{Capabilities, Document, PageOptions, Region, RegionId, Result};
+use nvisy_rt_document::{Capabilities, Document, PageOptions, Region, RegionId, Result};
 use uuid::Uuid;
 
 /// Unique identifier for a read session.
diff --git a/crates/nvisy-image/Cargo.toml b/crates/nvisy-image/Cargo.toml
index d84ecb1..46f9ce6 100644
--- a/crates/nvisy-image/Cargo.toml
+++ b/crates/nvisy-image/Cargo.toml
@@ -1,7 +1,7 @@
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [package]
-name = "nvisy-image"
+name = "nvisy-rt-image"
 description = "Image format support for nvisy"
 readme = "./README.md"
 
@@ -21,7 +21,7 @@ all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
-nvisy-document = { workspace = true }
+nvisy-rt-document = { workspace = true }
 
 async-trait = { workspace = true }
 bytes = { workspace = true }
diff --git a/crates/nvisy-image/src/documents/jpeg.rs b/crates/nvisy-image/src/documents/jpeg.rs
index df75644..da91868 100644
--- a/crates/nvisy-image/src/documents/jpeg.rs
+++ b/crates/nvisy-image/src/documents/jpeg.rs
@@ -2,7 +2,7 @@
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
+use nvisy_rt_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
 
 /// A loaded JPEG document.
 #[derive(Debug)]
diff --git a/crates/nvisy-image/src/documents/png.rs b/crates/nvisy-image/src/documents/png.rs
index b8ca50e..8159482 100644
--- a/crates/nvisy-image/src/documents/png.rs
+++ b/crates/nvisy-image/src/documents/png.rs
@@ -2,7 +2,7 @@
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
+use nvisy_rt_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
 
 /// A loaded PNG document.
 #[derive(Debug)]
diff --git a/crates/nvisy-image/src/formats/jpeg.rs b/crates/nvisy-image/src/formats/jpeg.rs
index 8a0cb81..788dd2c 100644
--- a/crates/nvisy-image/src/formats/jpeg.rs
+++ b/crates/nvisy-image/src/formats/jpeg.rs
@@ -1,6 +1,6 @@
 //! JPEG format handler implementation.
 
-use nvisy_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
+use nvisy_rt_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
 
 use crate::documents::JpegDocument;
 
diff --git a/crates/nvisy-image/src/formats/png.rs b/crates/nvisy-image/src/formats/png.rs
index 93572fe..4c1c02a 100644
--- a/crates/nvisy-image/src/formats/png.rs
+++ b/crates/nvisy-image/src/formats/png.rs
@@ -1,6 +1,6 @@
 //! PNG format handler implementation.
 
-use nvisy_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
+use nvisy_rt_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
 
 use crate::documents::PngDocument;
 
diff --git a/crates/nvisy-pdf/Cargo.toml b/crates/nvisy-pdf/Cargo.toml
index c3ac1a0..fac0f72 100644
--- a/crates/nvisy-pdf/Cargo.toml
+++ b/crates/nvisy-pdf/Cargo.toml
@@ -1,7 +1,7 @@
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [package]
-name = "nvisy-pdf"
+name = "nvisy-rt-pdf"
 description = "PDF document format support for nvisy"
 readme = "./README.md"
 
@@ -21,7 +21,7 @@ all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
-nvisy-document = { workspace = true }
+nvisy-rt-document = { workspace = true }
 
 async-trait = { workspace = true }
 bytes = { workspace = true }
diff --git a/crates/nvisy-pdf/src/document.rs b/crates/nvisy-pdf/src/document.rs
index d74514f..f27c265 100644
--- a/crates/nvisy-pdf/src/document.rs
+++ b/crates/nvisy-pdf/src/document.rs
@@ -2,7 +2,7 @@
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
+use nvisy_rt_document::{Document, DocumentInfo, Error, Region, RegionId, Result};
 
 /// A loaded PDF document.
 #[derive(Debug)]
diff --git a/crates/nvisy-pdf/src/format.rs b/crates/nvisy-pdf/src/format.rs
index 7f3904e..89d2c3c 100644
--- a/crates/nvisy-pdf/src/format.rs
+++ b/crates/nvisy-pdf/src/format.rs
@@ -1,6 +1,6 @@
 //! PDF format handler implementation.
 
-use nvisy_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
+use nvisy_rt_document::{Capabilities, ContentData, DocumentFormat, Error, Result};
 
 use crate::PdfDocument;
 
diff --git a/crates/nvisy-text/Cargo.toml b/crates/nvisy-text/Cargo.toml
index 80ab4ff..1737744 100644
--- a/crates/nvisy-text/Cargo.toml
+++ b/crates/nvisy-text/Cargo.toml
@@ -1,7 +1,7 @@
 # https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [package]
-name = "nvisy-text"
+name = "nvisy-rt-text"
 description = "Plain text document format support for nvisy"
 readme = "./README.md"
 
@@ -21,7 +21,7 @@ all-features = true
 rustdoc-args = ["--cfg", "docsrs"]
 
 [dependencies]
-nvisy-document = { workspace = true }
+nvisy-rt-document = { workspace = true }
 
 async-trait = { workspace = true }
 bytes = { workspace = true }
diff --git a/crates/nvisy-text/README.md b/crates/nvisy-text/README.md
index 7c14a4d..fb89aad 100644
--- a/crates/nvisy-text/README.md
+++ b/crates/nvisy-text/README.md
@@ -17,8 +17,8 @@ various text-based file formats:
 ## Usage
 
 ```rust
-use nvisy_text::{PlainTextFormat, PlainTextDocument};
-use nvisy_document::{ContentData, DocumentFormat, Document, TextExtractor};
+use nvisy_rt_text::{PlainTextFormat, PlainTextDocument};
+use nvisy_rt_document::{ContentData, DocumentFormat, Document, TextExtractor};
 
 # tokio_test::block_on(async {
 let format = PlainTextFormat::new();
@@ -39,7 +39,7 @@ assert_eq!(text.word_count(), 6);
 Basic plain text with paragraph detection.
 
 ```rust
-use nvisy_text::PlainTextFormat;
+use nvisy_rt_text::PlainTextFormat;
 ```
 
 ### Markdown
@@ -47,7 +47,7 @@ use nvisy_text::PlainTextFormat;
 Full Markdown parsing using pulldown-cmark with support for headings, lists, code blocks, blockquotes, and more.
 
 ```rust
-use nvisy_text::MarkdownFormat;
+use nvisy_rt_text::MarkdownFormat;
 ```
 
 ### JSON
@@ -55,7 +55,7 @@ use nvisy_text::MarkdownFormat;
 JSON parsing with structure detection using serde_json.
 
 ```rust
-use nvisy_text::JsonFormat;
+use nvisy_rt_text::JsonFormat;
 ```
 
 ### CSV/TSV
@@ -63,8 +63,8 @@ use nvisy_text::JsonFormat;
 CSV and TSV parsing using the csv crate. Implements `TableExtractor` for structured table access.
 
 ```rust
-use nvisy_text::CsvFormat;
-use nvisy_document::TableExtractor;
+use nvisy_rt_text::CsvFormat;
+use nvisy_rt_document::TableExtractor;
 ```
 
 ### XML
@@ -72,7 +72,7 @@ use nvisy_document::TableExtractor;
 XML parsing with hierarchical structure detection.
 
 ```rust
-use nvisy_text::XmlFormat;
+use nvisy_rt_text::XmlFormat;
 ```
 
 ### YAML
@@ -80,7 +80,7 @@ use nvisy_text::XmlFormat;
 YAML parsing with list and key-value detection.
 
 ```rust
-use nvisy_text::YamlFormat;
+use nvisy_rt_text::YamlFormat;
 ```
 
 ### TOML
@@ -88,7 +88,7 @@ use nvisy_text::YamlFormat;
 TOML parsing with section and array table detection.
 
 ```rust
-use nvisy_text::TomlFormat;
+use nvisy_rt_text::TomlFormat;
 ```
 
 ### INI
@@ -96,7 +96,7 @@ use nvisy_text::TomlFormat;
 INI/config file parsing with section grouping.
 
 ```rust
-use nvisy_text::IniFormat;
+use nvisy_rt_text::IniFormat;
 ```
 
 ## License
diff --git a/crates/nvisy-text/src/documents/csv.rs b/crates/nvisy-text/src/documents/csv.rs
index d766a71..b44d6f2 100644
--- a/crates/nvisy-text/src/documents/csv.rs
+++ b/crates/nvisy-text/src/documents/csv.rs
@@ -5,7 +5,7 @@ use std::num::NonZeroU32;
 use async_trait::async_trait;
 use bytes::Bytes;
 use csv::{ReaderBuilder, Terminator};
-use nvisy_document::{
+use nvisy_rt_document::{
     BoundingBox, Document, DocumentInfo, ExtractedText, NormalizedCell, NormalizedRow,
     NormalizedTable, Region, RegionId, RegionKind, RegionSource, Result, TableExtractor,
     TextExtractor,
diff --git a/crates/nvisy-text/src/documents/ini.rs b/crates/nvisy-text/src/documents/ini.rs
index 91c1b3d..55e5d53 100644
--- a/crates/nvisy-text/src/documents/ini.rs
+++ b/crates/nvisy-text/src/documents/ini.rs
@@ -4,7 +4,7 @@ use std::num::NonZeroU32;
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{
+use nvisy_rt_document::{
     BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
     Result, TextExtractor,
 };
diff --git a/crates/nvisy-text/src/documents/json.rs b/crates/nvisy-text/src/documents/json.rs
index e82360a..ff51c7f 100644
--- a/crates/nvisy-text/src/documents/json.rs
+++ b/crates/nvisy-text/src/documents/json.rs
@@ -4,7 +4,7 @@ use std::num::NonZeroU32;
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{
+use nvisy_rt_document::{
     BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
     Result, TextExtractor,
 };
@@ -23,7 +23,7 @@ impl JsonDocument {
     /// Creates a new JSON document from content.
     pub fn new(content: String) -> Result<Self> {
         let parsed: Value = serde_json::from_str(&content)
-            .map_err(|e| nvisy_document::Error::new(format!("Invalid JSON: {e}")))?;
+            .map_err(|e| nvisy_rt_document::Error::new(format!("Invalid JSON: {e}")))?;
 
         let regions = Self::extract_regions(&parsed);
         let size = content.len() as u64;
diff --git a/crates/nvisy-text/src/documents/markdown.rs b/crates/nvisy-text/src/documents/markdown.rs
index c30720f..4487cba 100644
--- a/crates/nvisy-text/src/documents/markdown.rs
+++ b/crates/nvisy-text/src/documents/markdown.rs
@@ -4,8 +4,9 @@ use std::num::NonZeroU32;
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use markdown::{ParseOptions, mdast::Node, to_mdast};
-use nvisy_document::{
+use markdown::mdast::Node;
+use markdown::{ParseOptions, to_mdast};
+use nvisy_rt_document::{
     BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
     Result, TextExtractor,
 };
diff --git a/crates/nvisy-text/src/documents/plain.rs b/crates/nvisy-text/src/documents/plain.rs
index e11caa1..75361e7 100644
--- a/crates/nvisy-text/src/documents/plain.rs
+++ b/crates/nvisy-text/src/documents/plain.rs
@@ -4,7 +4,7 @@ use std::num::NonZeroU32;
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{
+use nvisy_rt_document::{
     BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
     Result, TextExtractor,
 };
diff --git a/crates/nvisy-text/src/documents/toml.rs b/crates/nvisy-text/src/documents/toml.rs
index f5ae371..d485a06 100644
--- a/crates/nvisy-text/src/documents/toml.rs
+++ b/crates/nvisy-text/src/documents/toml.rs
@@ -4,7 +4,7 @@ use std::num::NonZeroU32;
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{
+use nvisy_rt_document::{
     BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
     Result, TextExtractor,
 };
diff --git a/crates/nvisy-text/src/documents/xml.rs b/crates/nvisy-text/src/documents/xml.rs
index 87b2448..c3a775b 100644
--- a/crates/nvisy-text/src/documents/xml.rs
+++ b/crates/nvisy-text/src/documents/xml.rs
@@ -4,7 +4,7 @@ use std::num::NonZeroU32;
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{
+use nvisy_rt_document::{
     BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
     Result, TextExtractor,
 };
diff --git a/crates/nvisy-text/src/documents/yaml.rs b/crates/nvisy-text/src/documents/yaml.rs
index 7557513..e109fb4 100644
--- a/crates/nvisy-text/src/documents/yaml.rs
+++ b/crates/nvisy-text/src/documents/yaml.rs
@@ -4,7 +4,7 @@ use std::num::NonZeroU32;
 
 use async_trait::async_trait;
 use bytes::Bytes;
-use nvisy_document::{
+use nvisy_rt_document::{
     BoundingBox, Document, DocumentInfo, ExtractedText, Region, RegionId, RegionKind, RegionSource,
     Result, TextExtractor,
 };
diff --git a/crates/nvisy-text/src/formats/csv.rs b/crates/nvisy-text/src/formats/csv.rs
index 66b77b2..b140964 100644
--- a/crates/nvisy-text/src/formats/csv.rs
+++ b/crates/nvisy-text/src/formats/csv.rs
@@ -1,6 +1,6 @@
 //! CSV format handler.
 
-use nvisy_document::{
+use nvisy_rt_document::{
     Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
@@ -74,7 +74,7 @@ impl DocumentFormat for CsvFormat {
 
 #[cfg(test)]
 mod tests {
-    use nvisy_document::Document;
+    use nvisy_rt_document::Document;
 
     use super::*;
 
diff --git a/crates/nvisy-text/src/formats/ini.rs b/crates/nvisy-text/src/formats/ini.rs
index b953cd8..d355411 100644
--- a/crates/nvisy-text/src/formats/ini.rs
+++ b/crates/nvisy-text/src/formats/ini.rs
@@ -1,6 +1,6 @@
 //! INI format handler.
 
-use nvisy_document::{
+use nvisy_rt_document::{
     Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
@@ -67,7 +67,7 @@ impl DocumentFormat for IniFormat {
 
 #[cfg(test)]
 mod tests {
-    use nvisy_document::Document;
+    use nvisy_rt_document::Document;
 
     use super::*;
 
diff --git a/crates/nvisy-text/src/formats/json.rs b/crates/nvisy-text/src/formats/json.rs
index c0e7be8..a16675f 100644
--- a/crates/nvisy-text/src/formats/json.rs
+++ b/crates/nvisy-text/src/formats/json.rs
@@ -1,6 +1,6 @@
 //! JSON format handler.
 
-use nvisy_document::{
+use nvisy_rt_document::{
     Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
@@ -67,7 +67,7 @@ impl DocumentFormat for JsonFormat {
 
 #[cfg(test)]
 mod tests {
-    use nvisy_document::Document;
+    use nvisy_rt_document::Document;
 
     use super::*;
 
diff --git a/crates/nvisy-text/src/formats/markdown.rs b/crates/nvisy-text/src/formats/markdown.rs
index 4e10f33..bfd280d 100644
--- a/crates/nvisy-text/src/formats/markdown.rs
+++ b/crates/nvisy-text/src/formats/markdown.rs
@@ -1,6 +1,6 @@
 //! Markdown format handler.
 
-use nvisy_document::{
+use nvisy_rt_document::{
     Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
@@ -67,7 +67,7 @@ impl DocumentFormat for MarkdownFormat {
 
 #[cfg(test)]
 mod tests {
-    use nvisy_document::Document;
+    use nvisy_rt_document::Document;
 
     use super::*;
 
diff --git a/crates/nvisy-text/src/formats/plain.rs b/crates/nvisy-text/src/formats/plain.rs
index ee57eb1..f53e5bf 100644
--- a/crates/nvisy-text/src/formats/plain.rs
+++ b/crates/nvisy-text/src/formats/plain.rs
@@ -1,6 +1,6 @@
 //! Plain text format handler.
 
-use nvisy_document::{Capabilities, ContentData, DocumentFormat, Result};
+use nvisy_rt_document::{Capabilities, ContentData, DocumentFormat, Result};
 
 use crate::documents::PlainTextDocument;
 
diff --git a/crates/nvisy-text/src/formats/toml.rs b/crates/nvisy-text/src/formats/toml.rs
index 6929395..d101938 100644
--- a/crates/nvisy-text/src/formats/toml.rs
+++ b/crates/nvisy-text/src/formats/toml.rs
@@ -1,6 +1,6 @@
 //! TOML format handler.
 
-use nvisy_document::{
+use nvisy_rt_document::{
     Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
@@ -67,7 +67,7 @@ impl DocumentFormat for TomlFormat {
 
 #[cfg(test)]
 mod tests {
-    use nvisy_document::Document;
+    use nvisy_rt_document::Document;
 
     use super::*;
 
diff --git a/crates/nvisy-text/src/formats/xml.rs b/crates/nvisy-text/src/formats/xml.rs
index c92cd48..d3869db 100644
--- a/crates/nvisy-text/src/formats/xml.rs
+++ b/crates/nvisy-text/src/formats/xml.rs
@@ -1,6 +1,6 @@
 //! XML format handler.
 
-use nvisy_document::{
+use nvisy_rt_document::{
     Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
@@ -67,7 +67,7 @@ impl DocumentFormat for XmlFormat {
 
 #[cfg(test)]
 mod tests {
-    use nvisy_document::Document;
+    use nvisy_rt_document::Document;
 
     use super::*;
 
diff --git a/crates/nvisy-text/src/formats/yaml.rs b/crates/nvisy-text/src/formats/yaml.rs
index 4db8660..b055eb7 100644
--- a/crates/nvisy-text/src/formats/yaml.rs
+++ b/crates/nvisy-text/src/formats/yaml.rs
@@ -1,6 +1,6 @@
 //! YAML format handler.
 
-use nvisy_document::{
+use nvisy_rt_document::{
     Capabilities, ContentData, DocumentFormat, Result, StructureCapabilities, TextCapabilities,
 };
 
@@ -67,7 +67,7 @@ impl DocumentFormat for YamlFormat {
 
 #[cfg(test)]
 mod tests {
-    use nvisy_document::Document;
+    use nvisy_rt_document::Document;
 
     use super::*;
 
diff --git a/crates/nvisy-text/src/lib.rs b/crates/nvisy-text/src/lib.rs
index a54d6d0..0302f17 100644
--- a/crates/nvisy-text/src/lib.rs
+++ b/crates/nvisy-text/src/lib.rs
@@ -6,22 +6,19 @@ pub mod documents;
 pub mod formats;
 
 // Re-export document types
+// Legacy aliases for backwards compatibility
+pub use PlainTextDocument as TextDocument;
+pub use PlainTextFormat as TextFormat;
 pub use documents::{
     CsvDocument, IniDocument, JsonDocument, MarkdownDocument, PlainTextDocument, TomlDocument,
     XmlDocument, YamlDocument,
 };
-
 // Re-export format handlers
 pub use formats::{
     CsvFormat, IniFormat, JsonFormat, MarkdownFormat, PlainTextFormat, TomlFormat, XmlFormat,
     YamlFormat,
 };
-
-// Legacy aliases for backwards compatibility
-pub use PlainTextDocument as TextDocument;
-pub use PlainTextFormat as TextFormat;
-
 // Re-export commonly used types from nvisy-document
-pub use nvisy_document::{
+pub use nvisy_rt_document::{
     Document, DocumentFormat, ExtractedText, NormalizedTable, Region, TableExtractor, TextExtractor,
 };

From b953007bf1f251a70f023737e05b58606b002e7e Mon Sep 17 00:00:00 2001
From: Oleh Martsokha <o.martsokha@gmail.com>
Date: Sat, 17 Jan 2026 08:27:54 +0100
Subject: [PATCH 5/5] style: fix formatting

---
 crates/nvisy-text/src/lib.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/crates/nvisy-text/src/lib.rs b/crates/nvisy-text/src/lib.rs
index 0302f17..53b6b71 100644
--- a/crates/nvisy-text/src/lib.rs
+++ b/crates/nvisy-text/src/lib.rs
@@ -7,8 +7,6 @@ pub mod formats;
 
 // Re-export document types
 // Legacy aliases for backwards compatibility
-pub use PlainTextDocument as TextDocument;
-pub use PlainTextFormat as TextFormat;
 pub use documents::{
     CsvDocument, IniDocument, JsonDocument, MarkdownDocument, PlainTextDocument, TomlDocument,
     XmlDocument, YamlDocument,
@@ -22,3 +20,4 @@ pub use formats::{
 pub use nvisy_rt_document::{
     Document, DocumentFormat, ExtractedText, NormalizedTable, Region, TableExtractor, TextExtractor,
 };
+pub use {PlainTextDocument as TextDocument, PlainTextFormat as TextFormat};