From 3a411a92a03e31962dd273b102d14d2125b255e9 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Sat, 3 Jan 2026 21:39:45 +0000
Subject: [PATCH 01/23] SQLite cache with incremental HTML regeneration and
 pagination

- Migrate from JSON cache to SQLite for better performance and integrity
- Add database migrations system for schema versioning
- Implement incremental HTML regeneration (only rebuild changed sessions)
- Add pagination support for large projects
- Improve working directories handling
- Use explicit UTF-8 encoding for Windows compatibility
---
 claude_code_log/cache.py                      | 1340 +++++++++++++----
 claude_code_log/cli.py                        |   51 +-
 claude_code_log/converter.py                  |  709 ++++++++-
 .../templates/components/page_nav_styles.css  |   74 +
 .../html/templates/transcript.html            |   27 +
 .../migrations/001_initial_schema.sql         |  114 ++
 claude_code_log/migrations/002_html_cache.sql |   18 +
 .../migrations/003_html_pagination.sql        |   39 +
 claude_code_log/migrations/__init__.py        |    5 +
 claude_code_log/migrations/runner.py          |  163 ++
 claude_code_log/renderer.py                   |   15 +
 claude_code_log/tui.py                        |    6 +-
 claude_code_log/utils.py                      |   12 +-
 test/__snapshots__/test_snapshot_html.ambr    |  308 +++-
 test/test_cache.py                            |  116 +-
 test/test_cache_integration.py                |  109 +-
 test/test_cache_sqlite_integrity.py           |  908 +++++++++++
 test/test_html_regeneration.py                |  194 ++-
 test/test_integration_realistic.py            |   96 +-
 test/test_pagination.py                       |  550 +++++++
 test/test_performance.py                      |    2 +-
 test/test_project_display_name.py             |   33 +-
 test/test_project_matching.py                 |   33 +-
 test/test_sidechain_agents.py                 |   51 +-
 test/test_tui.py                              |    4 +-
 25 files changed, 4353 insertions(+), 624 deletions(-)
 create mode 100644 claude_code_log/html/templates/components/page_nav_styles.css
 create mode 100644 claude_code_log/migrations/001_initial_schema.sql
 create mode 100644 claude_code_log/migrations/002_html_cache.sql
 create mode 100644 claude_code_log/migrations/003_html_pagination.sql
 create mode 100644 claude_code_log/migrations/__init__.py
 create mode 100644 claude_code_log/migrations/runner.py
 create mode 100644 test/test_cache_sqlite_integrity.py
 create mode 100644 test/test_pagination.py

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 3f5d43b7..81522d80 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -1,14 +1,29 @@
 #!/usr/bin/env python3
-"""Cache management for Claude Code Log to improve performance."""
+"""SQLite-based cache management for Claude Code Log."""
 
 import json
-from pathlib import Path
-from typing import Any, Optional, cast
+import sqlite3
+from contextlib import contextmanager
 from datetime import datetime
-from pydantic import BaseModel
+from pathlib import Path
+from typing import Any, Dict, Generator, List, Optional
+
 from packaging import version
+from pydantic import BaseModel
+
+from .migrations.runner import run_migrations
+from .models import (
+    AssistantTranscriptEntry,
+    QueueOperationTranscriptEntry,
+    SummaryTranscriptEntry,
+    SystemTranscriptEntry,
+    TranscriptEntry,
+    UserTranscriptEntry,
+    parse_transcript_entry,
+)
 
-from .models import TranscriptEntry
+
+# ========== Data Models ==========
 
 
 class CachedFileInfo(BaseModel):
@@ -37,6 +52,38 @@ class SessionCacheData(BaseModel):
     total_cache_read_tokens: int = 0
 
 
+class HtmlCacheEntry(BaseModel):
+    """Information about a generated HTML file."""
+
+    html_path: str  # e.g., "session-abc123.html" or "combined_transcripts.html"
+    generated_at: str  # ISO timestamp when HTML was generated
+    source_session_id: Optional[str] = (
+        None  # session_id for individual files, None for combined
+    )
+    message_count: int = 0  # for sanity checking
+    library_version: str  # which version generated it
+
+
+class PageCacheData(BaseModel):
+    """Information about a paginated combined transcript page."""
+
+    page_number: int
+    html_path: str  # e.g., "combined_transcripts.html" or "combined_transcripts_2.html"
+    page_size_config: int  # the --page-size value used
+    message_count: int  # total messages on this page
+    session_ids: List[str]  # sessions on this page, in order
+    first_session_id: str
+    last_session_id: str
+    first_timestamp: Optional[str] = None
+    last_timestamp: Optional[str] = None
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0
+    total_cache_creation_tokens: int = 0
+    total_cache_read_tokens: int = 0
+    generated_at: str  # ISO timestamp when page was generated
+    library_version: str
+
+
 class ProjectCache(BaseModel):
     """Project-level cache index structure for index.json."""
 
@@ -66,11 +113,63 @@ class ProjectCache(BaseModel):
     latest_timestamp: str = ""
 
 
+# ========== Helper Functions ==========
+
+
+def get_library_version() -> str:
+    """Get the current library version from package metadata or pyproject.toml."""
+    # First try to get version from installed package metadata
+    try:
+        from importlib.metadata import version as get_version
+
+        return get_version("claude-code-log")
+    except Exception:
+        # Package not installed or other error, continue to file-based detection
+        pass
+
+    # Second approach: Use importlib.resources for more robust package location detection
+    try:
+        from importlib import resources
+        import toml
+
+        # Get the package directory and navigate to parent for pyproject.toml
+        package_files = resources.files("claude_code_log")
+        # Convert to Path to access parent reliably
+        package_root = Path(str(package_files)).parent
+        pyproject_path = package_root / "pyproject.toml"
+
+        if pyproject_path.exists():
+            with open(pyproject_path, "r", encoding="utf-8") as f:
+                pyproject_data = toml.load(f)
+            return pyproject_data.get("project", {}).get("version", "unknown")
+    except Exception:
+        pass
+
+    # Final fallback: Try to read from pyproject.toml using file-relative path
+    try:
+        import toml
+
+        project_root = Path(__file__).parent.parent
+        pyproject_path = project_root / "pyproject.toml"
+
+        if pyproject_path.exists():
+            with open(pyproject_path, "r", encoding="utf-8") as f:
+                pyproject_data = toml.load(f)
+            return pyproject_data.get("project", {}).get("version", "unknown")
+    except Exception:
+        pass
+
+    return "unknown"
+
+
+# ========== Cache Manager ==========
+
+
 class CacheManager:
-    """Manages cache operations for a project directory."""
+    """SQLite-based cache manager for Claude Code Log."""
 
     def __init__(self, project_path: Path, library_version: str):
-        """Initialize cache manager for a project.
+        """Initialise cache manager for a project.
 
         Args:
             project_path: Path to the project directory containing JSONL files
@@ -78,263 +177,363 @@ def __init__(self, project_path: Path, library_version: str):
         """
         self.project_path = project_path
         self.library_version = library_version
-        self.cache_dir = project_path / "cache"
-        self.index_file = self.cache_dir / "index.json"
-
-        # Ensure cache directory exists
-        self.cache_dir.mkdir(exist_ok=True)
-
-        # Load existing cache index if available
-        self._project_cache: Optional[ProjectCache] = None
-        self._load_project_cache()
-
-    def _load_project_cache(self) -> None:
-        """Load the project cache index from disk."""
-        if self.index_file.exists():
-            try:
-                with open(self.index_file, "r", encoding="utf-8") as f:
-                    cache_data = json.load(f)
-                self._project_cache = ProjectCache.model_validate(cache_data)
-
-                # Check if cache version is compatible with current library version
-                if not self._is_cache_version_compatible(self._project_cache.version):
+
+        # Database at parent level (projects_dir/cache.db)
+        self.db_path = project_path.parent / "cache.db"
+
+        # Initialise database and ensure project exists
+        self._init_database()
+        self._project_id: Optional[int] = None
+        self._ensure_project_exists()
+
+    @contextmanager
+    def _get_connection(self) -> Generator[sqlite3.Connection, None, None]:
+        """Get a database connection with proper settings."""
+        conn = sqlite3.connect(self.db_path, timeout=30.0)
+        conn.row_factory = sqlite3.Row
+        conn.execute("PRAGMA foreign_keys = ON")
+        conn.execute("PRAGMA journal_mode = WAL")
+        try:
+            yield conn
+        finally:
+            conn.close()
+
+    def _init_database(self) -> None:
+        """Create schema if needed using migration runner."""
+        # Run any pending migrations
+        run_migrations(self.db_path)
+
+    def _ensure_project_exists(self) -> None:
+        """Ensure project record exists and get its ID."""
+        project_path_str = str(self.project_path)
+
+        with self._get_connection() as conn:
+            row = conn.execute(
+                "SELECT id, version FROM projects WHERE project_path = ?",
+                (project_path_str,),
+            ).fetchone()
+
+            if row:
+                self._project_id = row["id"]
+                cached_version = row["version"]
+
+                # Check version compatibility
+                if not self._is_cache_version_compatible(cached_version):
                     print(
-                        f"Cache version incompatible: {self._project_cache.version} -> {self.library_version}, invalidating cache"
+                        f"Cache version incompatible: {cached_version} -> {self.library_version}, invalidating cache"
                     )
-                    self.clear_cache()
-                    self._project_cache = None
-            except Exception as e:
-                print(f"Warning: Failed to load cache index, will rebuild: {e}")
-                self._project_cache = None
-
-        # Initialize empty cache if none exists
-        if self._project_cache is None:
-            self._project_cache = ProjectCache(
-                version=self.library_version,
-                cache_created=datetime.now().isoformat(),
-                last_updated=datetime.now().isoformat(),
-                project_path=str(self.project_path),
-                cached_files={},
-                sessions={},
-            )
+                    self._clear_project_data(conn)
+                    self._project_id = self._create_project(conn)
+            else:
+                self._project_id = self._create_project(conn)
+
+            conn.commit()
+
+    def _create_project(self, conn: sqlite3.Connection) -> int:
+        """Create a new project record."""
+        now = datetime.now().isoformat()
+        cursor = conn.execute(
+            """
+            INSERT INTO projects (project_path, version, cache_created, last_updated)
+            VALUES (?, ?, ?, ?)
+            """,
+            (str(self.project_path), self.library_version, now, now),
+        )
+        return cursor.lastrowid or 0
 
-    def _save_project_cache(self) -> None:
-        """Save the project cache index to disk."""
-        if self._project_cache is None:
+    def _clear_project_data(self, conn: sqlite3.Connection) -> None:
+        """Clear all data for the current project."""
+        if self._project_id is None:
             return
 
-        self._project_cache.last_updated = datetime.now().isoformat()
+        # Cascade delete will handle messages and files
+        conn.execute("DELETE FROM projects WHERE id = ?", (self._project_id,))
 
-        with open(self.index_file, "w", encoding="utf-8") as f:
-            json.dump(self._project_cache.model_dump(), f, indent=2)
+    def _update_last_updated(self, conn: sqlite3.Connection) -> None:
+        """Update the last_updated timestamp for the project."""
+        if self._project_id is None:
+            return
 
-    def _get_cache_file_path(self, jsonl_path: Path) -> Path:
-        """Get the cache file path for a given JSONL file."""
-        return self.cache_dir / f"{jsonl_path.stem}.json"
+        conn.execute(
+            "UPDATE projects SET last_updated = ? WHERE id = ?",
+            (datetime.now().isoformat(), self._project_id),
+        )
+
+    def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, Any]:
+        """Convert TranscriptEntry to dict for SQLite insertion."""
+        base: Dict[str, Any] = {
+            "project_id": self._project_id,
+            "file_id": file_id,
+            "type": entry.type,
+            "timestamp": getattr(entry, "timestamp", None),
+            "session_id": getattr(entry, "sessionId", None),
+            "_uuid": getattr(entry, "uuid", None),
+            "_parent_uuid": getattr(entry, "parentUuid", None),
+            "_is_sidechain": 1 if getattr(entry, "isSidechain", False) else 0,
+            "_user_type": getattr(entry, "userType", None),
+            "_cwd": getattr(entry, "cwd", None),
+            "_version": getattr(entry, "version", None),
+            "_is_meta": (
+                1
+                if getattr(entry, "isMeta", None) is True
+                else (0 if getattr(entry, "isMeta", None) is False else None)
+            ),
+            "_agent_id": getattr(entry, "agentId", None),
+            "_request_id": None,
+            "input_tokens": None,
+            "output_tokens": None,
+            "cache_creation_tokens": None,
+            "cache_read_tokens": None,
+            "_leaf_uuid": None,
+            "_level": None,
+            "_operation": None,
+            "content": json.dumps(entry.model_dump()),
+        }
+
+        # Extract flattened usage for assistant messages
+        if isinstance(entry, AssistantTranscriptEntry):
+            base["_request_id"] = entry.requestId
+            if entry.message and entry.message.usage:
+                usage = entry.message.usage
+                base["input_tokens"] = usage.input_tokens
+                base["output_tokens"] = usage.output_tokens
+                base["cache_creation_tokens"] = usage.cache_creation_input_tokens
+                base["cache_read_tokens"] = usage.cache_read_input_tokens
+
+        # User entry specific
+        if isinstance(entry, UserTranscriptEntry):
+            if entry.agentId:
+                base["_agent_id"] = entry.agentId
+
+        # Summary specific
+        if isinstance(entry, SummaryTranscriptEntry):
+            base["_leaf_uuid"] = entry.leafUuid
+
+        # System specific
+        if isinstance(entry, SystemTranscriptEntry):
+            base["_level"] = entry.level
+
+        # Queue-operation specific
+        if isinstance(entry, QueueOperationTranscriptEntry):
+            base["_operation"] = entry.operation
+
+        return base
+
+    def _deserialize_entry(self, row: sqlite3.Row) -> TranscriptEntry:
+        """Convert SQLite row back to TranscriptEntry."""
+        content_dict = json.loads(row["content"])
+        return parse_transcript_entry(content_dict)
+
+    def _get_file_id(self, jsonl_path: Path) -> Optional[int]:
+        """Get the file ID for a JSONL file."""
+        if self._project_id is None:
+            return None
+
+        with self._get_connection() as conn:
+            row = conn.execute(
+                "SELECT id FROM cached_files WHERE project_id = ? AND file_name = ?",
+                (self._project_id, jsonl_path.name),
+            ).fetchone()
+
+        return row["id"] if row else None
 
     def is_file_cached(self, jsonl_path: Path) -> bool:
         """Check if a JSONL file has a valid cache entry."""
-        if self._project_cache is None:
+        if self._project_id is None:
             return False
 
-        file_key = jsonl_path.name
-        if file_key not in self._project_cache.cached_files:
+        if not jsonl_path.exists():
             return False
 
-        # Check if source file exists and modification time matches
-        if not jsonl_path.exists():
+        with self._get_connection() as conn:
+            row = conn.execute(
+                "SELECT source_mtime FROM cached_files WHERE project_id = ? AND file_name = ?",
+                (self._project_id, jsonl_path.name),
+            ).fetchone()
+
+        if not row:
             return False
 
-        cached_info = self._project_cache.cached_files[file_key]
         source_mtime = jsonl_path.stat().st_mtime
+        cached_mtime = row["source_mtime"]
 
-        # Cache is valid if modification times match and cache file exists
-        cache_file = self._get_cache_file_path(jsonl_path)
-        return (
-            abs(source_mtime - cached_info.source_mtime) < 1.0 and cache_file.exists()
-        )
+        # Cache is valid if modification times match (within 1 second tolerance)
+        return abs(source_mtime - cached_mtime) < 1.0
 
-    def load_cached_entries(self, jsonl_path: Path) -> Optional[list[TranscriptEntry]]:
+    def load_cached_entries(self, jsonl_path: Path) -> Optional[List[TranscriptEntry]]:
         """Load cached transcript entries for a JSONL file."""
         if not self.is_file_cached(jsonl_path):
             return None
 
-        cache_file = self._get_cache_file_path(jsonl_path)
-        try:
-            with open(cache_file, "r", encoding="utf-8") as f:
-                cache_data = json.load(f)
-
-            # Expect timestamp-keyed format - flatten all entries
-            entries_data: list[dict[str, Any]] = []
-            for timestamp_entries in cache_data.values():
-                if isinstance(timestamp_entries, list):
-                    # Type cast to ensure Pyright knows this is list[dict[str, Any]]
-                    entries_data.extend(cast(list[dict[str, Any]], timestamp_entries))
-
-            # Deserialize back to TranscriptEntry objects
-            from .factories import create_transcript_entry
-
-            entries = [
-                create_transcript_entry(entry_dict) for entry_dict in entries_data
-            ]
-            return entries
-        except Exception as e:
-            print(f"Warning: Failed to load cached entries from {cache_file}: {e}")
+        file_id = self._get_file_id(jsonl_path)
+        if file_id is None:
             return None
 
+        with self._get_connection() as conn:
+            rows = conn.execute(
+                "SELECT content FROM messages WHERE file_id = ? ORDER BY timestamp NULLS LAST",
+                (file_id,),
+            ).fetchall()
+
+        return [self._deserialize_entry(row) for row in rows]
+
     def load_cached_entries_filtered(
         self, jsonl_path: Path, from_date: Optional[str], to_date: Optional[str]
-    ) -> Optional[list[TranscriptEntry]]:
-        """Load cached entries with efficient timestamp-based filtering."""
+    ) -> Optional[List[TranscriptEntry]]:
+        """Load cached entries with SQL-based timestamp filtering."""
         if not self.is_file_cached(jsonl_path):
             return None
 
-        cache_file = self._get_cache_file_path(jsonl_path)
-        try:
-            with open(cache_file, "r", encoding="utf-8") as f:
-                cache_data = json.load(f)
-
-            # If no date filtering needed, fall back to regular loading
-            if not from_date and not to_date:
-                return self.load_cached_entries(jsonl_path)
-
-            # Parse date filters
-            from .parser import parse_timestamp
-            import dateparser
-
-            from_dt = None
-            to_dt = None
-
-            if from_date:
-                from_dt = dateparser.parse(from_date)
-                if from_dt and (
-                    from_date in ["today", "yesterday"] or "days ago" in from_date
-                ):
-                    from_dt = from_dt.replace(hour=0, minute=0, second=0, microsecond=0)
-
-            if to_date:
-                to_dt = dateparser.parse(to_date)
-                if to_dt:
-                    if to_date in ["today", "yesterday"] or "days ago" in to_date:
-                        to_dt = to_dt.replace(
-                            hour=23, minute=59, second=59, microsecond=999999
-                        )
-                    else:
-                        # For simple date strings like "2023-01-01", set to end of day
-                        to_dt = to_dt.replace(
-                            hour=23, minute=59, second=59, microsecond=999999
-                        )
-
-            # Filter entries by timestamp
-            filtered_entries_data: list[dict[str, Any]] = []
-
-            for timestamp_key, timestamp_entries in cache_data.items():
-                if timestamp_key == "_no_timestamp":
-                    # Always include entries without timestamps (like summaries)
-                    if isinstance(timestamp_entries, list):
-                        # Type cast to ensure Pyright knows this is list[dict[str, Any]]
-                        filtered_entries_data.extend(
-                            cast(list[dict[str, Any]], timestamp_entries)
-                        )
-                else:
-                    # Check if timestamp falls within range
-                    message_dt = parse_timestamp(timestamp_key)
-                    if message_dt:
-                        # Convert to naive datetime for comparison
-                        if message_dt.tzinfo:
-                            message_dt = message_dt.replace(tzinfo=None)
-
-                        # Apply date filtering
-                        if from_dt and message_dt < from_dt:
-                            continue
-                        if to_dt and message_dt > to_dt:
-                            continue
-
-                    if isinstance(timestamp_entries, list):
-                        # Type cast to ensure Pyright knows this is list[dict[str, Any]]
-                        filtered_entries_data.extend(
-                            cast(list[dict[str, Any]], timestamp_entries)
-                        )
-
-            # Deserialize filtered entries
-            from .factories import create_transcript_entry
-
-            entries = [
-                create_transcript_entry(entry_dict)
-                for entry_dict in filtered_entries_data
-            ]
-            return entries
-        except Exception as e:
-            print(
-                f"Warning: Failed to load filtered cached entries from {cache_file}: {e}"
-            )
+        # If no date filtering needed, fall back to regular loading
+        if not from_date and not to_date:
+            return self.load_cached_entries(jsonl_path)
+
+        file_id = self._get_file_id(jsonl_path)
+        if file_id is None:
             return None
 
+        # Parse dates
+        import dateparser
+
+        from_dt = None
+        to_dt = None
+
+        if from_date:
+            from_dt = dateparser.parse(from_date)
+            if from_dt and (
+                from_date in ["today", "yesterday"] or "days ago" in from_date
+            ):
+                from_dt = from_dt.replace(hour=0, minute=0, second=0, microsecond=0)
+
+        if to_date:
+            to_dt = dateparser.parse(to_date)
+            if to_dt:
+                if to_date in ["today", "yesterday"] or "days ago" in to_date:
+                    to_dt = to_dt.replace(
+                        hour=23, minute=59, second=59, microsecond=999999
+                    )
+                else:
+                    to_dt = to_dt.replace(
+                        hour=23, minute=59, second=59, microsecond=999999
+                    )
+
+        # Build query with SQL-based filtering
+        sql = "SELECT content FROM messages WHERE file_id = ?"
+        params: List[Any] = [file_id]
+
+        if from_dt:
+            # Include entries with NULL timestamp (like summaries) OR within date range
+            sql += " AND (timestamp IS NULL OR timestamp >= ?)"
+            params.append(from_dt.isoformat())
+
+        if to_dt:
+            sql += " AND (timestamp IS NULL OR timestamp <= ?)"
+            params.append(to_dt.isoformat())
+
+        sql += " ORDER BY timestamp NULLS LAST"
+
+        with self._get_connection() as conn:
+            rows = conn.execute(sql, params).fetchall()
+
+        return [self._deserialize_entry(row) for row in rows]
+
     def save_cached_entries(
-        self, jsonl_path: Path, entries: list[TranscriptEntry]
+        self, jsonl_path: Path, entries: List[TranscriptEntry]
     ) -> None:
-        """Save parsed transcript entries to cache with timestamp-based structure."""
-        cache_file = self._get_cache_file_path(jsonl_path)
+        """Save parsed transcript entries to cache."""
+        if self._project_id is None:
+            return
 
-        try:
-            # Create timestamp-keyed cache structure for efficient date filtering
-            cache_data: dict[str, Any] = {}
+        source_mtime = jsonl_path.stat().st_mtime
+        cached_mtime = datetime.now().timestamp()
+
+        with self._get_connection() as conn:
+            # Insert or update file record
+            conn.execute(
+                """
+                INSERT OR REPLACE INTO cached_files
+                (project_id, file_name, file_path, source_mtime, cached_mtime, message_count)
+                VALUES (?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    self._project_id,
+                    jsonl_path.name,
+                    str(jsonl_path),
+                    source_mtime,
+                    cached_mtime,
+                    len(entries),
+                ),
+            )
+
+            # Get the file ID
+            row = conn.execute(
+                "SELECT id FROM cached_files WHERE project_id = ? AND file_name = ?",
+                (self._project_id, jsonl_path.name),
+            ).fetchone()
+            file_id = row["id"]
 
+            # Delete existing messages for this file
+            conn.execute("DELETE FROM messages WHERE file_id = ?", (file_id,))
+
+            # Insert all entries in a batch
             for entry in entries:
-                # Get timestamp - use empty string as fallback for entries without timestamps
-                timestamp = (
-                    getattr(entry, "timestamp", "")
-                    if hasattr(entry, "timestamp")
-                    else ""
-                )
-                if not timestamp:
-                    # Use a special key for entries without timestamps (like summaries)
-                    timestamp = "_no_timestamp"
-
-                # Store entry data under timestamp
-                if timestamp not in cache_data:
-                    cache_data[timestamp] = []
-
-                cache_data[timestamp].append(entry.model_dump())
-
-            with open(cache_file, "w", encoding="utf-8") as f:
-                json.dump(cache_data, f, indent=2)
-
-            # Update cache index
-            if self._project_cache is not None:
-                source_mtime = jsonl_path.stat().st_mtime
-                cached_mtime = cache_file.stat().st_mtime
-
-                # Extract session IDs from entries
-                session_ids: list[str] = []
-                for entry in entries:
-                    if hasattr(entry, "sessionId"):
-                        session_id = getattr(entry, "sessionId", "")
-                        if session_id:
-                            session_ids.append(session_id)
-                session_ids = list(set(session_ids))  # Remove duplicates
-
-                self._project_cache.cached_files[jsonl_path.name] = CachedFileInfo(
-                    file_path=str(jsonl_path),
-                    source_mtime=source_mtime,
-                    cached_mtime=cached_mtime,
-                    message_count=len(entries),
-                    session_ids=session_ids,
+                serialized = self._serialize_entry(entry, file_id)
+                conn.execute(
+                    """
+                    INSERT INTO messages (
+                        project_id, file_id, type, timestamp, session_id,
+                        _uuid, _parent_uuid, _is_sidechain, _user_type, _cwd, _version,
+                        _is_meta, _agent_id, _request_id,
+                        input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
+                        _leaf_uuid, _level, _operation, content
+                    ) VALUES (
+                        :project_id, :file_id, :type, :timestamp, :session_id,
+                        :_uuid, :_parent_uuid, :_is_sidechain, :_user_type, :_cwd, :_version,
+                        :_is_meta, :_agent_id, :_request_id,
+                        :input_tokens, :output_tokens, :cache_creation_tokens, :cache_read_tokens,
+                        :_leaf_uuid, :_level, :_operation, :content
+                    )
+                    """,
+                    serialized,
                 )
 
-                self._save_project_cache()
-        except Exception as e:
-            print(f"Warning: Failed to save cached entries to {cache_file}: {e}")
+            self._update_last_updated(conn)
+            conn.commit()
 
-    def update_session_cache(self, session_data: dict[str, SessionCacheData]) -> None:
+    def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> None:
         """Update cached session information."""
-        if self._project_cache is None:
+        if self._project_id is None:
             return
 
-        self._project_cache.sessions.update(
-            {session_id: data for session_id, data in session_data.items()}
-        )
-        self._save_project_cache()
+        with self._get_connection() as conn:
+            for session_id, data in session_data.items():
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO sessions (
+                        project_id, session_id, summary, first_timestamp, last_timestamp,
+                        message_count, first_user_message, cwd,
+                        total_input_tokens, total_output_tokens,
+                        total_cache_creation_tokens, total_cache_read_tokens
+                    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        self._project_id,
+                        session_id,
+                        data.summary,
+                        data.first_timestamp,
+                        data.last_timestamp,
+                        data.message_count,
+                        data.first_user_message,
+                        data.cwd,
+                        data.total_input_tokens,
+                        data.total_output_tokens,
+                        data.total_cache_creation_tokens,
+                        data.total_cache_read_tokens,
+                    ),
+                )
+
+            self._update_last_updated(conn)
+            conn.commit()
 
     def update_project_aggregates(
         self,
@@ -347,165 +546,650 @@ def update_project_aggregates(
         latest_timestamp: str,
     ) -> None:
         """Update project-level aggregate information."""
-        if self._project_cache is None:
+        if self._project_id is None:
             return
 
-        self._project_cache.total_message_count = total_message_count
-        self._project_cache.total_input_tokens = total_input_tokens
-        self._project_cache.total_output_tokens = total_output_tokens
-        self._project_cache.total_cache_creation_tokens = total_cache_creation_tokens
-        self._project_cache.total_cache_read_tokens = total_cache_read_tokens
-        self._project_cache.earliest_timestamp = earliest_timestamp
-        self._project_cache.latest_timestamp = latest_timestamp
+        with self._get_connection() as conn:
+            conn.execute(
+                """
+                UPDATE projects SET
+                    total_message_count = ?,
+                    total_input_tokens = ?,
+                    total_output_tokens = ?,
+                    total_cache_creation_tokens = ?,
+                    total_cache_read_tokens = ?,
+                    earliest_timestamp = ?,
+                    latest_timestamp = ?,
+                    last_updated = ?
+                WHERE id = ?
+                """,
+                (
+                    total_message_count,
+                    total_input_tokens,
+                    total_output_tokens,
+                    total_cache_creation_tokens,
+                    total_cache_read_tokens,
+                    earliest_timestamp,
+                    latest_timestamp,
+                    datetime.now().isoformat(),
+                    self._project_id,
+                ),
+            )
+            conn.commit()
 
-        self._save_project_cache()
+    def get_working_directories(self) -> List[str]:
+        """Get list of working directories associated with this project.
 
-    def update_working_directories(self, working_directories: list[str]) -> None:
-        """Update the list of working directories associated with this project."""
-        if self._project_cache is None:
-            return
+        Queries distinct cwd values from sessions table.
+        """
+        if self._project_id is None:
+            return []
 
-        self._project_cache.working_directories = working_directories
-        self._save_project_cache()
+        with self._get_connection() as conn:
+            rows = conn.execute(
+                "SELECT DISTINCT cwd FROM sessions WHERE project_id = ? AND cwd IS NOT NULL",
+                (self._project_id,),
+            ).fetchall()
 
-    def get_modified_files(self, jsonl_files: list[Path]) -> list[Path]:
-        """Get list of JSONL files that need to be reprocessed."""
-        modified_files: list[Path] = []
+        return [row["cwd"] for row in rows]
 
-        for jsonl_file in jsonl_files:
-            if not self.is_file_cached(jsonl_file):
-                modified_files.append(jsonl_file)
-
-        return modified_files
+    def get_modified_files(self, jsonl_files: List[Path]) -> List[Path]:
+        """Get list of JSONL files that need to be reprocessed."""
+        return [
+            jsonl_file
+            for jsonl_file in jsonl_files
+            if not self.is_file_cached(jsonl_file)
+        ]
 
     def get_cached_project_data(self) -> Optional[ProjectCache]:
         """Get the cached project data if available."""
-        return self._project_cache
+        if self._project_id is None:
+            return None
 
-    def clear_cache(self) -> None:
-        """Clear all cache files and reset the project cache."""
-        if self.cache_dir.exists():
-            for cache_file in self.cache_dir.glob("*.json"):
-                if cache_file.name != "index.json":  # Don't delete the index file here
-                    try:
-                        cache_file.unlink()
-                    except Exception as e:
-                        print(f"Warning: Failed to delete cache file {cache_file}: {e}")
-
-        if self.index_file.exists():
-            try:
-                self.index_file.unlink()
-            except Exception as e:
-                print(f"Warning: Failed to delete cache index {self.index_file}: {e}")
-
-        # Reset the project cache
-        self._project_cache = ProjectCache(
-            version=self.library_version,
-            cache_created=datetime.now().isoformat(),
-            last_updated=datetime.now().isoformat(),
-            project_path=str(self.project_path),
-            cached_files={},
-            sessions={},
+        with self._get_connection() as conn:
+            # Get project data
+            project_row = conn.execute(
+                "SELECT * FROM projects WHERE id = ?", (self._project_id,)
+            ).fetchone()
+
+            if not project_row:
+                return None
+
+            # Get cached files
+            file_rows = conn.execute(
+                "SELECT * FROM cached_files WHERE project_id = ?", (self._project_id,)
+            ).fetchall()
+
+            cached_files: Dict[str, CachedFileInfo] = {}
+            for row in file_rows:
+                # Get session IDs for this file from messages
+                session_rows = conn.execute(
+                    "SELECT DISTINCT session_id FROM messages WHERE file_id = ? AND session_id IS NOT NULL",
+                    (row["id"],),
+                ).fetchall()
+                session_ids = [r["session_id"] for r in session_rows]
+
+                cached_files[row["file_name"]] = CachedFileInfo(
+                    file_path=row["file_path"],
+                    source_mtime=row["source_mtime"],
+                    cached_mtime=row["cached_mtime"],
+                    message_count=row["message_count"],
+                    session_ids=session_ids,
+                )
+
+            # Get sessions
+            session_rows = conn.execute(
+                "SELECT * FROM sessions WHERE project_id = ?", (self._project_id,)
+            ).fetchall()
+
+            sessions: Dict[str, SessionCacheData] = {}
+            for row in session_rows:
+                sessions[row["session_id"]] = SessionCacheData(
+                    session_id=row["session_id"],
+                    summary=row["summary"],
+                    first_timestamp=row["first_timestamp"],
+                    last_timestamp=row["last_timestamp"],
+                    message_count=row["message_count"],
+                    first_user_message=row["first_user_message"],
+                    cwd=row["cwd"],
+                    total_input_tokens=row["total_input_tokens"],
+                    total_output_tokens=row["total_output_tokens"],
+                    total_cache_creation_tokens=row["total_cache_creation_tokens"],
+                    total_cache_read_tokens=row["total_cache_read_tokens"],
+                )
+
+        return ProjectCache(
+            version=project_row["version"],
+            cache_created=project_row["cache_created"],
+            last_updated=project_row["last_updated"],
+            project_path=project_row["project_path"],
+            cached_files=cached_files,
+            total_message_count=project_row["total_message_count"],
+            total_input_tokens=project_row["total_input_tokens"],
+            total_output_tokens=project_row["total_output_tokens"],
+            total_cache_creation_tokens=project_row["total_cache_creation_tokens"],
+            total_cache_read_tokens=project_row["total_cache_read_tokens"],
+            sessions=sessions,
+            working_directories=self.get_working_directories(),
+            earliest_timestamp=project_row["earliest_timestamp"],
+            latest_timestamp=project_row["latest_timestamp"],
         )
 
-    def _is_cache_version_compatible(self, cache_version: str) -> bool:
-        """Check if a cache version is compatible with the current library version.
+    def clear_cache(self) -> None:
+        """Clear all cache data for this project."""
+        if self._project_id is None:
+            return
 
-        This uses a compatibility matrix to determine if cache invalidation is needed.
-        Only breaking changes require cache invalidation, not every version bump.
-        """
+        with self._get_connection() as conn:
+            self._clear_project_data(conn)
+            self._project_id = self._create_project(conn)
+            conn.commit()
+
+    def _is_cache_version_compatible(self, cache_version: str) -> bool:
+        """Check if a cache version is compatible with the current library version."""
         if cache_version == self.library_version:
             return True
 
         # Define compatibility rules
-        # Format: "cache_version": "minimum_library_version_required"
-        # If cache version is older than the minimum required, it needs invalidation
         breaking_changes: dict[str, str] = {
-            # 0.9.0 introduced _compact_ide_tags_for_preview() which transforms
-            # first_user_message to use emoji indicators instead of raw IDE tags
-            "0.8.0": "0.9.0",
+            # Example: "0.3.3": "0.3.4" means cache from 0.3.3 needs invalidation if lib is >= 0.3.4
         }
 
         cache_ver = version.parse(cache_version)
         current_ver = version.parse(self.library_version)
 
-        # Check if cache version requires invalidation due to breaking changes
         for breaking_version_pattern, min_required in breaking_changes.items():
             min_required_ver = version.parse(min_required)
 
-            # If current version is at or above the minimum required for this breaking change
             if current_ver >= min_required_ver:
-                # Check if cache version is affected by this breaking change
                 if breaking_version_pattern.endswith(".x"):
-                    # Pattern like "0.2.x" matches any 0.2.* version
                     major_minor = breaking_version_pattern[:-2]
                     if str(cache_ver).startswith(major_minor):
                         return False
                 else:
-                    # Exact version or version comparison
                     breaking_ver = version.parse(breaking_version_pattern)
                     if cache_ver <= breaking_ver:
                         return False
 
-        # If no breaking changes affect this cache version, it's compatible
         return True
 
-    def get_cache_stats(self) -> dict[str, Any]:
+    def get_cache_stats(self) -> Dict[str, Any]:
         """Get cache statistics for reporting."""
-        if self._project_cache is None:
+        if self._project_id is None:
+            return {"cache_enabled": False}
+
+        with self._get_connection() as conn:
+            project_row = conn.execute(
+                "SELECT * FROM projects WHERE id = ?", (self._project_id,)
+            ).fetchone()
+
+            file_count = conn.execute(
+                "SELECT COUNT(*) as cnt FROM cached_files WHERE project_id = ?",
+                (self._project_id,),
+            ).fetchone()
+
+            session_count = conn.execute(
+                "SELECT COUNT(*) as cnt FROM sessions WHERE project_id = ?",
+                (self._project_id,),
+            ).fetchone()
+
+        if not project_row:
             return {"cache_enabled": False}
 
         return {
             "cache_enabled": True,
-            "cached_files_count": len(self._project_cache.cached_files),
-            "total_cached_messages": self._project_cache.total_message_count,
-            "total_sessions": len(self._project_cache.sessions),
-            "cache_created": self._project_cache.cache_created,
-            "last_updated": self._project_cache.last_updated,
+            "cached_files_count": file_count["cnt"] if file_count else 0,
+            "total_cached_messages": project_row["total_message_count"],
+            "total_sessions": session_count["cnt"] if session_count else 0,
+            "cache_created": project_row["cache_created"],
+            "last_updated": project_row["last_updated"],
         }
 
+    # ========== HTML Cache Methods ==========
 
-def get_library_version() -> str:
-    """Get the current library version from package metadata or pyproject.toml."""
-    # First try to get version from installed package metadata
-    try:
-        from importlib.metadata import version
+    def get_html_cache(self, html_path: str) -> Optional[HtmlCacheEntry]:
+        """Get HTML cache entry for a given path."""
+        if self._project_id is None:
+            return None
 
-        return version("claude-code-log")
-    except Exception:
-        # Package not installed or other error, continue to file-based detection
-        pass
+        with self._get_connection() as conn:
+            row = conn.execute(
+                """SELECT html_path, generated_at, source_session_id, message_count, library_version
+                   FROM html_cache
+                   WHERE project_id = ? AND html_path = ?""",
+                (self._project_id, html_path),
+            ).fetchone()
 
-    # Second approach: Use importlib.resources for more robust package location detection
-    try:
-        from importlib import resources
-        import toml
+        if not row:
+            return None
 
-        # Get the package directory and navigate to parent for pyproject.toml
-        package_files = resources.files("claude_code_log")
-        # Convert to Path to access parent reliably
-        package_root = Path(str(package_files)).parent
-        pyproject_path = package_root / "pyproject.toml"
+        return HtmlCacheEntry(
+            html_path=row["html_path"],
+            generated_at=row["generated_at"],
+            source_session_id=row["source_session_id"],
+            message_count=row["message_count"] or 0,
+            library_version=row["library_version"],
+        )
 
-        if pyproject_path.exists():
-            with open(pyproject_path, "r", encoding="utf-8") as f:
-                pyproject_data = toml.load(f)
-            return pyproject_data.get("project", {}).get("version", "unknown")
-    except Exception:
-        pass
+    def update_html_cache(
+        self,
+        html_path: str,
+        session_id: Optional[str],
+        message_count: int,
+    ) -> None:
+        """Update or insert HTML cache entry."""
+        if self._project_id is None:
+            return
 
-    # Final fallback: Try to read from pyproject.toml using file-relative path
-    try:
-        import toml
+        with self._get_connection() as conn:
+            conn.execute(
+                """INSERT INTO html_cache
+                   (project_id, html_path, generated_at, source_session_id, message_count, library_version)
+                   VALUES (?, ?, ?, ?, ?, ?)
+                   ON CONFLICT(project_id, html_path)
+                   DO UPDATE SET
+                       generated_at = excluded.generated_at,
+                       source_session_id = excluded.source_session_id,
+                       message_count = excluded.message_count,
+                       library_version = excluded.library_version""",
+                (
+                    self._project_id,
+                    html_path,
+                    datetime.now().isoformat(),
+                    session_id,
+                    message_count,
+                    self.library_version,
+                ),
+            )
+            conn.commit()
 
-        project_root = Path(__file__).parent.parent
-        pyproject_path = project_root / "pyproject.toml"
+    def is_html_stale(
+        self, html_path: str, session_id: Optional[str] = None
+    ) -> tuple[bool, str]:
+        """Check if HTML file needs regeneration.
 
-        if pyproject_path.exists():
-            with open(pyproject_path, "r", encoding="utf-8") as f:
-                pyproject_data = toml.load(f)
-            return pyproject_data.get("project", {}).get("version", "unknown")
-    except Exception:
-        pass
+        Args:
+            html_path: Path to HTML file (e.g., "session-abc123.html")
+            session_id: Session ID for individual session files, None for combined
 
-    return "unknown"
+        Returns:
+            Tuple of (is_stale: bool, reason: str)
+        """
+        from .renderer import is_html_outdated
+
+        if self._project_id is None:
+            return True, "no_cache"
+
+        # Get existing HTML cache entry
+        html_cache = self.get_html_cache(html_path)
+        if html_cache is None:
+            return True, "not_cached"
+
+        # Check library version in cache
+        if html_cache.library_version != self.library_version:
+            return True, "version_mismatch"
+
+        # Check if file exists and has correct version
+        actual_file = self.project_path / html_path
+        if not actual_file.exists():
+            return True, "file_missing"
+        if is_html_outdated(actual_file):
+            return True, "file_version_mismatch"
+
+        with self._get_connection() as conn:
+            if session_id is not None:
+                # For individual session HTML: check if session message count changed
+                row = conn.execute(
+                    """SELECT message_count FROM sessions
+                       WHERE project_id = ? AND session_id = ?""",
+                    (self._project_id, session_id),
+                ).fetchone()
+
+                if not row:
+                    return True, "session_not_found"
+
+                # Compare message counts
+                if row["message_count"] != html_cache.message_count:
+                    return True, "session_updated"
+            else:
+                # For combined transcript: check if total message count changed
+                # This is more reliable than timestamp comparison, which can
+                # trigger false positives when cache metadata is updated
+                row = conn.execute(
+                    """SELECT total_message_count FROM projects
+                       WHERE id = ?""",
+                    (self._project_id,),
+                ).fetchone()
+
+                if row and row["total_message_count"] != html_cache.message_count:
+                    return True, "project_updated"
+
+        return False, "up_to_date"
+
+    def get_stale_sessions(self) -> List[tuple[str, str]]:
+        """Get list of sessions that need HTML regeneration.
+
+        Returns:
+            List of (session_id, reason) tuples for sessions needing regeneration
+        """
+        if self._project_id is None:
+            return []
+
+        stale_sessions: List[tuple[str, str]] = []
+
+        with self._get_connection() as conn:
+            # Get all sessions
+            session_rows = conn.execute(
+                """SELECT session_id, last_timestamp FROM sessions
+                   WHERE project_id = ?""",
+                (self._project_id,),
+            ).fetchall()
+
+            for row in session_rows:
+                session_id = row["session_id"]
+                html_path = f"session-{session_id}.html"
+
+                is_stale, reason = self.is_html_stale(html_path, session_id)
+                if is_stale:
+                    stale_sessions.append((session_id, reason))
+
+        return stale_sessions
+
+    # ========== Page Cache Methods (Pagination) ==========
+
+    def get_page_size_config(self) -> Optional[int]:
+        """Get the configured page size from the most recent page, if any."""
+        if self._project_id is None:
+            return None
+
+        with self._get_connection() as conn:
+            row = conn.execute(
+                """SELECT page_size_config FROM html_pages
+                   WHERE project_id = ?
+                   ORDER BY page_number ASC
+                   LIMIT 1""",
+                (self._project_id,),
+            ).fetchone()
+
+        return row["page_size_config"] if row else None
+
+    def get_page_data(self, page_number: int) -> Optional[PageCacheData]:
+        """Get cache data for a specific page."""
+        if self._project_id is None:
+            return None
+
+        with self._get_connection() as conn:
+            # Get page info
+            page_row = conn.execute(
+                """SELECT * FROM html_pages
+                   WHERE project_id = ? AND page_number = ?""",
+                (self._project_id, page_number),
+            ).fetchone()
+
+            if not page_row:
+                return None
+
+            # Get sessions for this page
+            session_rows = conn.execute(
+                """SELECT session_id FROM page_sessions
+                   WHERE page_id = ?
+                   ORDER BY session_order ASC""",
+                (page_row["id"],),
+            ).fetchall()
+
+            session_ids = [row["session_id"] for row in session_rows]
+
+        return PageCacheData(
+            page_number=page_row["page_number"],
+            html_path=page_row["html_path"],
+            page_size_config=page_row["page_size_config"],
+            message_count=page_row["message_count"],
+            session_ids=session_ids,
+            first_session_id=page_row["first_session_id"],
+            last_session_id=page_row["last_session_id"],
+            first_timestamp=page_row["first_timestamp"],
+            last_timestamp=page_row["last_timestamp"],
+            total_input_tokens=page_row["total_input_tokens"] or 0,
+            total_output_tokens=page_row["total_output_tokens"] or 0,
+            total_cache_creation_tokens=page_row["total_cache_creation_tokens"] or 0,
+            total_cache_read_tokens=page_row["total_cache_read_tokens"] or 0,
+            generated_at=page_row["generated_at"],
+            library_version=page_row["library_version"],
+        )
+
+    def get_all_pages(self) -> List[PageCacheData]:
+        """Get all cached pages for this project."""
+        if self._project_id is None:
+            return []
+
+        pages: List[PageCacheData] = []
+        with self._get_connection() as conn:
+            page_rows = conn.execute(
+                """SELECT * FROM html_pages
+                   WHERE project_id = ?
+                   ORDER BY page_number ASC""",
+                (self._project_id,),
+            ).fetchall()
+
+            for page_row in page_rows:
+                session_rows = conn.execute(
+                    """SELECT session_id FROM page_sessions
+                       WHERE page_id = ?
+                       ORDER BY session_order ASC""",
+                    (page_row["id"],),
+                ).fetchall()
+
+                session_ids = [row["session_id"] for row in session_rows]
+
+                pages.append(
+                    PageCacheData(
+                        page_number=page_row["page_number"],
+                        html_path=page_row["html_path"],
+                        page_size_config=page_row["page_size_config"],
+                        message_count=page_row["message_count"],
+                        session_ids=session_ids,
+                        first_session_id=page_row["first_session_id"],
+                        last_session_id=page_row["last_session_id"],
+                        first_timestamp=page_row["first_timestamp"],
+                        last_timestamp=page_row["last_timestamp"],
+                        total_input_tokens=page_row["total_input_tokens"] or 0,
+                        total_output_tokens=page_row["total_output_tokens"] or 0,
+                        total_cache_creation_tokens=page_row[
+                            "total_cache_creation_tokens"
+                        ]
+                        or 0,
+                        total_cache_read_tokens=page_row["total_cache_read_tokens"]
+                        or 0,
+                        generated_at=page_row["generated_at"],
+                        library_version=page_row["library_version"],
+                    )
+                )
+
+        return pages
+
+    def update_page_cache(
+        self,
+        page_number: int,
+        html_path: str,
+        page_size_config: int,
+        session_ids: List[str],
+        message_count: int,
+        first_timestamp: Optional[str],
+        last_timestamp: Optional[str],
+        total_input_tokens: int,
+        total_output_tokens: int,
+        total_cache_creation_tokens: int,
+        total_cache_read_tokens: int,
+    ) -> None:
+        """Update or insert page cache entry."""
+        if self._project_id is None or not session_ids:
+            return
+
+        with self._get_connection() as conn:
+            # Insert or update page
+            conn.execute(
+                """INSERT INTO html_pages
+                   (project_id, page_number, html_path, page_size_config, message_count,
+                    first_session_id, last_session_id, first_timestamp, last_timestamp,
+                    total_input_tokens, total_output_tokens,
+                    total_cache_creation_tokens, total_cache_read_tokens,
+                    generated_at, library_version)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                   ON CONFLICT(project_id, page_number)
+                   DO UPDATE SET
+                       html_path = excluded.html_path,
+                       page_size_config = excluded.page_size_config,
+                       message_count = excluded.message_count,
+                       first_session_id = excluded.first_session_id,
+                       last_session_id = excluded.last_session_id,
+                       first_timestamp = excluded.first_timestamp,
+                       last_timestamp = excluded.last_timestamp,
+                       total_input_tokens = excluded.total_input_tokens,
+                       total_output_tokens = excluded.total_output_tokens,
+                       total_cache_creation_tokens = excluded.total_cache_creation_tokens,
+                       total_cache_read_tokens = excluded.total_cache_read_tokens,
+                       generated_at = excluded.generated_at,
+                       library_version = excluded.library_version""",
+                (
+                    self._project_id,
+                    page_number,
+                    html_path,
+                    page_size_config,
+                    message_count,
+                    session_ids[0],
+                    session_ids[-1],
+                    first_timestamp,
+                    last_timestamp,
+                    total_input_tokens,
+                    total_output_tokens,
+                    total_cache_creation_tokens,
+                    total_cache_read_tokens,
+                    datetime.now().isoformat(),
+                    self.library_version,
+                ),
+            )
+
+            # Get the page ID
+            row = conn.execute(
+                """SELECT id FROM html_pages
+                   WHERE project_id = ? AND page_number = ?""",
+                (self._project_id, page_number),
+            ).fetchone()
+            page_id = row["id"]
+
+            # Delete existing session mappings
+            conn.execute("DELETE FROM page_sessions WHERE page_id = ?", (page_id,))
+
+            # Insert session mappings
+            for order, session_id in enumerate(session_ids):
+                conn.execute(
+                    """INSERT INTO page_sessions (page_id, session_id, session_order)
+                       VALUES (?, ?, ?)""",
+                    (page_id, session_id, order),
+                )
+
+            conn.commit()
+
+    def is_page_stale(
+        self, page_number: int, page_size_config: int
+    ) -> tuple[bool, str]:
+        """Check if a page needs regeneration.
+
+        Args:
+            page_number: The page number to check
+            page_size_config: The current page size configuration
+
+        Returns:
+            Tuple of (is_stale: bool, reason: str)
+        """
+        from .renderer import is_html_outdated
+
+        if self._project_id is None:
+            return True, "no_cache"
+
+        page_data = self.get_page_data(page_number)
+        if page_data is None:
+            return True, "not_cached"
+
+        # Check if page size config changed
+        if page_data.page_size_config != page_size_config:
+            return True, "page_size_changed"
+
+        # Check library version
+        if page_data.library_version != self.library_version:
+            return True, "version_mismatch"
+
+        # Check if HTML file exists and has correct version
+        actual_file = self.project_path / page_data.html_path
+        if not actual_file.exists():
+            return True, "file_missing"
+        if is_html_outdated(actual_file):
+            return True, "file_version_mismatch"
+
+        # Check if any session on this page has changed
+        with self._get_connection() as conn:
+            for session_id in page_data.session_ids:
+                row = conn.execute(
+                    """SELECT message_count FROM sessions
+                       WHERE project_id = ? AND session_id = ?""",
+                    (self._project_id, session_id),
+                ).fetchone()
+
+                if not row:
+                    return True, "session_missing"
+
+                # We need to check if session content changed
+                # For now, just check if session exists
+
+        return False, "up_to_date"
+
+    def invalidate_all_pages(self) -> List[str]:
+        """Delete all page cache entries for this project.
+
+        Returns:
+            List of HTML file paths that were invalidated (for cleanup)
+        """
+        if self._project_id is None:
+            return []
+
+        html_paths: List[str] = []
+
+        with self._get_connection() as conn:
+            # Get all page paths before deleting
+            rows = conn.execute(
+                """SELECT html_path FROM html_pages WHERE project_id = ?""",
+                (self._project_id,),
+            ).fetchall()
+            html_paths = [row["html_path"] for row in rows]
+
+            # Delete all pages (cascade deletes page_sessions)
+            conn.execute(
+                "DELETE FROM html_pages WHERE project_id = ?", (self._project_id,)
+            )
+            conn.commit()
+
+        return html_paths
+
+    def get_page_count(self) -> int:
+        """Get the number of cached pages for this project."""
+        if self._project_id is None:
+            return 0
+
+        with self._get_connection() as conn:
+            row = conn.execute(
+                """SELECT COUNT(*) as cnt FROM html_pages WHERE project_id = ?""",
+                (self._project_id,),
+            ).fetchone()
+
+        return row["cnt"] if row else 0
+
+
+__all__ = [
+    "CacheManager",
+    "CachedFileInfo",
+    "HtmlCacheEntry",
+    "PageCacheData",
+    "ProjectCache",
+    "SessionCacheData",
+    "get_library_version",
+]
diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py
index cef23725..90f0e4b3 100644
--- a/claude_code_log/cli.py
+++ b/claude_code_log/cli.py
@@ -193,24 +193,23 @@ def _find_relative_matches(
         try:
             # Load cache to check for working directories
             cache_manager = CacheManager(project_dir, get_library_version())
-            project_cache = cache_manager.get_cached_project_data()
+            working_directories = cache_manager.get_working_directories()
 
             # Build cache if needed
-            if not project_cache or not project_cache.working_directories:
+            if not working_directories:
                 jsonl_files = list(project_dir.glob("*.jsonl"))
                 if jsonl_files:
                     try:
                         convert_jsonl_to_html(project_dir, silent=True)
-                        project_cache = cache_manager.get_cached_project_data()
+                        working_directories = cache_manager.get_working_directories()
                     except Exception as e:
                         logging.warning(
                             f"Failed to build cache for project {project_dir.name}: {e}"
                         )
-                        project_cache = None
 
-            if project_cache and project_cache.working_directories:
+            if working_directories:
                 # Check for relative matches
-                for cwd in project_cache.working_directories:
+                for cwd in working_directories:
                     cwd_path = Path(cwd).resolve()
                     if current_cwd_path.is_relative_to(cwd_path):
                         relative_matches.append(project_dir)
@@ -263,6 +262,17 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None:
         if all_projects:
             # Clear cache for all project directories
             click.echo("Clearing caches for all projects...")
+
+            # Delete the shared SQLite cache database
+            cache_db = input_path / "cache.db"
+            if cache_db.exists():
+                try:
+                    cache_db.unlink()
+                    click.echo(f"  Deleted SQLite cache database: {cache_db}")
+                except Exception as e:
+                    click.echo(f"  Warning: Failed to delete cache database: {e}")
+
+            # Also clean up old JSON cache directories (migration cleanup)
             project_dirs = [
                 d
                 for d in input_path.iterdir()
@@ -271,12 +281,16 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None:
 
             for project_dir in project_dirs:
                 try:
-                    cache_manager = CacheManager(project_dir, library_version)
-                    cache_manager.clear_cache()
-                    click.echo(f"  Cleared cache for {project_dir.name}")
+                    # Clean up old JSON cache directory if it exists
+                    old_cache_dir = project_dir / "cache"
+                    if old_cache_dir.exists():
+                        import shutil
+
+                        shutil.rmtree(old_cache_dir)
+                        click.echo(f"  Cleared old JSON cache for {project_dir.name}")
                 except Exception as e:
                     click.echo(
-                        f"  Warning: Failed to clear cache for {project_dir.name}: {e}"
+                        f"  Warning: Failed to clear old cache for {project_dir.name}: {e}"
                     )
 
         elif input_path.is_dir():
@@ -284,6 +298,14 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None:
             click.echo(f"Clearing cache for {input_path}...")
             cache_manager = CacheManager(input_path, library_version)
             cache_manager.clear_cache()
+
+            # Also clean up old JSON cache directory if it exists
+            old_cache_dir = input_path / "cache"
+            if old_cache_dir.exists():
+                import shutil
+
+                shutil.rmtree(old_cache_dir)
+                click.echo("  Cleared old JSON cache directory")
         else:
             # Single file - no cache to clear
             click.echo("Cache clearing not applicable for single files.")
@@ -434,6 +456,12 @@ def _clear_output_files(input_path: Path, all_projects: bool, file_ext: str) ->
     default=None,
     help="Image export mode: placeholder (mark position), embedded (base64), referenced (PNG files). Default: embedded for HTML, referenced for Markdown.",
 )
+@click.option(
+    "--page-size",
+    type=int,
+    default=2000,
+    help="Maximum messages per page for combined transcript (default: 2000). Sessions are never split across pages.",
+)
 @click.option(
     "--debug",
     is_flag=True,
@@ -455,6 +483,7 @@ def main(
     projects_dir: Optional[Path],
     output_format: str,
     image_export_mode: Optional[str],
+    page_size: int,
     debug: bool,
 ) -> None:
     """Convert Claude transcript JSONL files to HTML or Markdown.
@@ -595,6 +624,7 @@ def main(
                 not no_individual_sessions,
                 output_format,
                 image_export_mode,
+                page_size=page_size,
             )
 
             # Count processed projects
@@ -646,6 +676,7 @@ def main(
             not no_individual_sessions,
             not no_cache,
             image_export_mode=image_export_mode,
+            page_size=page_size,
         )
         if input_path.is_file():
             click.echo(f"Successfully converted {input_path} to {output_path}")
diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index b6175992..b56db8ed 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -3,6 +3,7 @@
 
 import json
 import re
+from dataclasses import dataclass, field
 from pathlib import Path
 import traceback
 from typing import Optional, Any, TYPE_CHECKING
@@ -17,7 +18,6 @@
     get_project_display_name,
     should_use_as_session_starter,
     create_session_preview,
-    extract_working_directories,
     get_warmup_session_ids,
 )
 from .cache import CacheManager, SessionCacheData, get_library_version
@@ -403,6 +403,306 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr
     return deduplicated
 
 
+@dataclass
+class GenerationStats:
+    """Track statistics for HTML generation across a project."""
+
+    # Cache statistics
+    files_loaded_from_cache: int = 0
+    files_updated: int = 0
+
+    # HTML generation statistics
+    sessions_total: int = 0
+    sessions_regenerated: int = 0
+    combined_regenerated: bool = False
+
+    # Timing (seconds)
+    cache_time: float = 0.0
+    render_time: float = 0.0
+    total_time: float = 0.0
+
+    # Errors/warnings collected during processing
+    warnings: List[str] = field(default_factory=lambda: [])
+    errors: List[str] = field(default_factory=lambda: [])
+
+    def add_warning(self, msg: str) -> None:
+        """Add a warning message."""
+        self.warnings.append(msg)
+
+    def add_error(self, msg: str) -> None:
+        """Add an error message."""
+        self.errors.append(msg)
+
+    def summary(self, project_name: str) -> str:
+        """Generate a concise summary line for this project."""
+        parts: List[str] = [f"Project: {project_name}"]
+
+        # Cache info
+        cache_parts: List[str] = []
+        if self.files_loaded_from_cache > 0:
+            cache_parts.append(f"{self.files_loaded_from_cache} cached")
+        if self.files_updated > 0:
+            cache_parts.append(f"{self.files_updated} updated")
+        if cache_parts:
+            parts.append(f"  Cache: {', '.join(cache_parts)}")
+
+        # HTML info
+        html_parts: List[str] = []
+        if self.sessions_total > 0:
+            html_parts.append(
+                f"{self.sessions_regenerated}/{self.sessions_total} sessions"
+            )
+        if self.combined_regenerated:
+            html_parts.append("combined")
+        if html_parts:
+            parts.append(f"  HTML: {', '.join(html_parts)} regenerated")
+        elif self.sessions_total > 0:
+            parts.append("  HTML: up to date")
+
+        # Timing
+        if self.total_time > 0:
+            time_str = f"  Time: {self.total_time:.1f}s"
+            if self.cache_time > 0 or self.render_time > 0:
+                time_str += (
+                    f" (cache: {self.cache_time:.1f}s, render: {self.render_time:.1f}s)"
+                )
+            parts.append(time_str)
+
+        return "\n".join(parts)
+
+
+def _get_page_html_path(page_number: int) -> str:
+    """Get the HTML filename for a given page number.
+
+    Page 1 is combined_transcripts.html, page 2+ are combined_transcripts_N.html
+    """
+    if page_number == 1:
+        return "combined_transcripts.html"
+    return f"combined_transcripts_{page_number}.html"
+
+
+def _assign_sessions_to_pages(
+    sessions: Dict[str, SessionCacheData], page_size: int
+) -> List[List[str]]:
+    """Assign sessions to pages, never splitting sessions across pages.
+
+    Args:
+        sessions: Dict mapping session_id to SessionCacheData
+        page_size: Maximum messages per page (overflow allowed to keep sessions intact)
+
+    Returns:
+        List of pages, each containing a list of session_ids
+    """
+    pages: List[List[str]] = []
+    current_page: List[str] = []
+    current_count = 0
+
+    # Sort sessions chronologically by first_timestamp
+    sorted_sessions = sorted(sessions.values(), key=lambda s: s.first_timestamp or "")
+
+    for session in sorted_sessions:
+        # Add session to current page (never split sessions)
+        current_page.append(session.session_id)
+        current_count += session.message_count
+
+        # If page now exceeds limit, close it and start fresh
+        if current_count > page_size:
+            pages.append(current_page)
+            current_page = []
+            current_count = 0
+
+    # Don't forget the last page
+    if current_page:
+        pages.append(current_page)
+
+    return pages
+
+
+def _generate_paginated_html(
+    messages: List[TranscriptEntry],
+    output_dir: Path,
+    title: str,
+    page_size: int,
+    cache_manager: "CacheManager",
+    session_data: Dict[str, SessionCacheData],
+    working_directories: List[str],
+    silent: bool = False,
+) -> Path:
+    """Generate paginated HTML files for combined transcript.
+
+    Args:
+        messages: All messages (deduplicated)
+        output_dir: Directory to write HTML files
+        title: Base title for the pages
+        page_size: Maximum messages per page
+        cache_manager: Cache manager for the project
+        session_data: Session metadata from cache
+        working_directories: Working directories for project display name
+        silent: Suppress verbose output
+
+    Returns:
+        Path to the first page (combined_transcripts.html)
+    """
+    from .renderer import generate_html, format_timestamp
+
+    # Check if page size changed - if so, invalidate all pages
+    cached_page_size = cache_manager.get_page_size_config()
+    if cached_page_size is not None and cached_page_size != page_size:
+        if not silent:
+            print(
+                f"Page size changed from {cached_page_size} to {page_size}, regenerating all pages"
+            )
+        old_paths = cache_manager.invalidate_all_pages()
+        # Delete old page files
+        for html_path in old_paths:
+            page_file = output_dir / html_path
+            if page_file.exists():
+                page_file.unlink()
+
+    # Assign sessions to pages
+    pages: List[List[str]] = _assign_sessions_to_pages(session_data, page_size)
+
+    if not pages:
+        # No sessions, generate empty page
+        pages = [[]]
+
+    # Clean up orphan pages if page count decreased
+    old_page_count = cache_manager.get_page_count()
+    new_page_count = len(pages)
+    if old_page_count > new_page_count:
+        for orphan_page_num in range(new_page_count + 1, old_page_count + 1):
+            orphan_path = output_dir / _get_page_html_path(orphan_page_num)
+            if orphan_path.exists():
+                orphan_path.unlink()
+
+    # Group messages by session for fast lookup
+    messages_by_session: Dict[str, List[TranscriptEntry]] = {}
+    for msg in messages:
+        session_id = getattr(msg, "sessionId", None)
+        if session_id:
+            if session_id not in messages_by_session:
+                messages_by_session[session_id] = []
+            messages_by_session[session_id].append(msg)
+
+    first_page_path = output_dir / _get_page_html_path(1)
+
+    # Generate each page
+    for page_num, page_session_ids in enumerate(pages, start=1):
+        html_path = _get_page_html_path(page_num)
+        page_file = output_dir / html_path
+
+        # Check if page is stale
+        is_stale, reason = cache_manager.is_page_stale(page_num, page_size)
+
+        if not is_stale and page_file.exists():
+            if not silent:
+                print(f"Page {page_num} is current, skipping regeneration")
+            continue
+
+        if not silent:
+            print(f"Generating page {page_num} ({reason})...")
+
+        # Collect messages for this page
+        page_messages: List[TranscriptEntry] = []
+        for session_id in page_session_ids:
+            if session_id in messages_by_session:
+                page_messages.extend(messages_by_session[session_id])
+
+        # Calculate page stats
+        page_message_count = len(page_messages)
+        first_timestamp = None
+        last_timestamp = None
+        total_input_tokens = 0
+        total_output_tokens = 0
+        total_cache_creation_tokens = 0
+        total_cache_read_tokens = 0
+
+        for session_id in page_session_ids:
+            if session_id in session_data:
+                s = session_data[session_id]
+                if s.first_timestamp and (
+                    first_timestamp is None or s.first_timestamp < first_timestamp
+                ):
+                    first_timestamp = s.first_timestamp
+                if s.last_timestamp and (
+                    last_timestamp is None or s.last_timestamp > last_timestamp
+                ):
+                    last_timestamp = s.last_timestamp
+                total_input_tokens += s.total_input_tokens
+                total_output_tokens += s.total_output_tokens
+                total_cache_creation_tokens += s.total_cache_creation_tokens
+                total_cache_read_tokens += s.total_cache_read_tokens
+
+        # Build page_info for navigation
+        has_prev = page_num > 1
+        # Pre-enable next link if this page exceeds threshold (anticipating future pages)
+        # or if there are more pages
+        page_exceeds_threshold = page_message_count > page_size
+        has_next = page_num < len(pages) or page_exceeds_threshold
+
+        page_info = {
+            "page_number": page_num,
+            "prev_link": _get_page_html_path(page_num - 1) if has_prev else None,
+            "next_link": _get_page_html_path(page_num + 1) if has_next else None,
+        }
+
+        # Build page_stats
+        date_range = ""
+        if first_timestamp and last_timestamp:
+            first_fmt = format_timestamp(first_timestamp)
+            last_fmt = format_timestamp(last_timestamp)
+            if first_fmt == last_fmt:
+                date_range = first_fmt
+            else:
+                date_range = f"{first_fmt} - {last_fmt}"
+        elif first_timestamp:
+            date_range = format_timestamp(first_timestamp)
+
+        token_parts: List[str] = []
+        if total_input_tokens:
+            token_parts.append(f"Input: {total_input_tokens:,}")
+        if total_output_tokens:
+            token_parts.append(f"Output: {total_output_tokens:,}")
+        if total_cache_creation_tokens:
+            token_parts.append(f"Cache Create: {total_cache_creation_tokens:,}")
+        if total_cache_read_tokens:
+            token_parts.append(f"Cache Read: {total_cache_read_tokens:,}")
+        token_summary = " | ".join(token_parts) if token_parts else None
+
+        page_stats = {
+            "message_count": page_message_count,
+            "date_range": date_range,
+            "token_summary": token_summary,
+        }
+
+        # Generate HTML for this page
+        page_title = f"{title} - Page {page_num}" if page_num > 1 else title
+        html_content = generate_html(
+            page_messages,
+            page_title,
+            page_info=page_info,
+            page_stats=page_stats,
+        )
+        page_file.write_text(html_content, encoding="utf-8")
+
+        # Update cache
+        cache_manager.update_page_cache(
+            page_number=page_num,
+            html_path=html_path,
+            page_size_config=page_size,
+            session_ids=page_session_ids,
+            message_count=page_message_count,
+            first_timestamp=first_timestamp,
+            last_timestamp=last_timestamp,
+            total_input_tokens=total_input_tokens,
+            total_output_tokens=total_output_tokens,
+            total_cache_creation_tokens=total_cache_creation_tokens,
+            total_cache_read_tokens=total_cache_read_tokens,
+        )
+
+    return first_page_path
+
+
 def convert_jsonl_to_html(
     input_path: Path,
     output_path: Optional[Path] = None,
@@ -411,6 +711,7 @@ def convert_jsonl_to_html(
     generate_individual_sessions: bool = True,
     use_cache: bool = True,
     silent: bool = False,
+    page_size: int = 2000,
 ) -> Path:
     """Convert JSONL transcript(s) to HTML file(s).
 
@@ -466,6 +767,10 @@ def convert_jsonl_to(
             print(f"Warning: Failed to initialize cache manager: {e}")
 
     ext = get_file_extension(format)
+
+    # Initialize working_directories for both branches (used by pagination in directory mode)
+    working_directories: List[str] = []
+
     if input_path.is_file():
         # Single file mode - cache only available for directory mode
         if output_path is None:
@@ -483,13 +788,37 @@ def convert_jsonl_to(
             input_path, cache_manager, from_date, to_date, silent
         )
 
+        # Phase 1b: Early exit if nothing needs regeneration
+        # Skip expensive message loading if all HTML is up to date
+        if (
+            cache_manager is not None
+            and not cache_was_updated
+            and from_date is None
+            and to_date is None
+        ):
+            # Check if combined HTML is stale
+            combined_stale, _ = cache_manager.is_html_stale(output_path.name, None)
+            if not combined_stale and not is_html_outdated(output_path):
+                # Check if any session HTML is stale
+                stale_sessions = cache_manager.get_stale_sessions()
+                if not stale_sessions or not generate_individual_sessions:
+                    # Nothing needs regeneration - skip loading
+                    if not silent:
+                        print(
+                            f"All HTML files are current for {input_path.name}, "
+                            "skipping regeneration"
+                        )
+                    return output_path
+
         # Phase 2: Load messages (will use fresh cache when available)
         messages = load_directory_transcripts(
             input_path, cache_manager, from_date, to_date, silent
         )
 
-        # Extract working directories directly from parsed messages
-        working_directories = extract_working_directories(messages)
+        # Get working directories from cache
+        working_directories = (
+            cache_manager.get_working_directories() if cache_manager else []
+        )
 
         project_title = get_project_display_name(input_path.name, working_directories)
         title = f"Claude Transcripts - {project_title}"
@@ -513,26 +842,77 @@ def convert_jsonl_to(
     # Generate combined output file (check if regeneration needed)
     assert output_path is not None
     renderer = get_renderer(format, image_export_mode)
-    should_regenerate = (
-        renderer.is_outdated(output_path)
-        or from_date is not None
-        or to_date is not None
-        or not output_path.exists()
-        or (
-            input_path.is_dir() and cache_was_updated
-        )  # Regenerate if JSONL files changed
-    )
 
-    if should_regenerate:
-        # For referenced images, pass the output directory
-        output_dir = output_path.parent
-        content = renderer.generate(messages, title, output_dir=output_dir)
-        assert content is not None
-        output_path.write_text(content, encoding="utf-8")
-    else:
-        print(
-            f"{format.upper()} file {output_path.name} is current, skipping regeneration"
+    # Decide whether to use pagination (HTML only, directory mode, no date filter)
+    use_pagination = False
+    cached_data = cache_manager.get_cached_project_data() if cache_manager else None
+    total_message_count = (
+        cached_data.total_message_count if cached_data else len(messages)
+    )
+    existing_page_count = cache_manager.get_page_count() if cache_manager else 0
+
+    if (
+        format == "html"
+        and cache_manager is not None
+        and input_path.is_dir()
+        and from_date is None
+        and to_date is None
+    ):
+        # Use pagination if total messages exceed page_size or there are existing pages
+        use_pagination = total_message_count > page_size or existing_page_count > 1
+
+    if use_pagination:
+        # Use paginated HTML generation
+        assert cache_manager is not None  # Ensured by use_pagination condition
+        session_data = cached_data.sessions if cached_data else {}
+        output_path = _generate_paginated_html(
+            messages,
+            input_path,
+            title,
+            page_size,
+            cache_manager,
+            session_data,
+            working_directories,
+            silent=silent,
         )
+    else:
+        # Use single-file generation for small projects or filtered views
+        # Use incremental regeneration via html_cache when available
+        if cache_manager is not None and input_path.is_dir():
+            is_stale, _reason = cache_manager.is_html_stale(output_path.name, None)
+            should_regenerate = (
+                is_stale
+                or renderer.is_outdated(output_path)
+                or from_date is not None
+                or to_date is not None
+                or not output_path.exists()
+            )
+        else:
+            # Fallback: old logic for single file mode or no cache
+            should_regenerate = (
+                renderer.is_outdated(output_path)
+                or from_date is not None
+                or to_date is not None
+                or not output_path.exists()
+                or (input_path.is_dir() and cache_was_updated)
+            )
+
+        if should_regenerate:
+            # For referenced images, pass the output directory
+            output_dir = output_path.parent
+            content = renderer.generate(messages, title, output_dir=output_dir)
+            assert content is not None
+            output_path.write_text(content, encoding="utf-8")
+
+            # Update html_cache for combined transcript (HTML only)
+            if format == "html" and cache_manager is not None:
+                cache_manager.update_html_cache(
+                    output_path.name, None, total_message_count
+                )
+        elif not silent:
+            print(
+                f"{format.upper()} file {output_path.name} is current, skipping regeneration"
+            )
 
     # Generate individual session files if requested and in directory mode
     if generate_individual_sessions and input_path.is_dir():
@@ -545,11 +925,45 @@ def convert_jsonl_to(
             cache_manager,
             cache_was_updated,
             image_export_mode,
+            silent=silent,
         )
 
     return output_path
 
 
+def has_cache_changes(
+    project_dir: Path,
+    cache_manager: Optional[CacheManager],
+    from_date: Optional[str] = None,
+    to_date: Optional[str] = None,
+) -> bool:
+    """Check if cache needs updating (fast mtime comparison only).
+
+    Returns True if there are modified files or cache is stale.
+    Does NOT load any messages - that's deferred to ensure_fresh_cache.
+    """
+    if cache_manager is None:
+        return True  # No cache means we need to process
+
+    jsonl_files = list(project_dir.glob("*.jsonl"))
+    if not jsonl_files:
+        return False
+
+    # Get cached project data
+    cached_project_data = cache_manager.get_cached_project_data()
+
+    # Check various invalidation conditions
+    modified_files = cache_manager.get_modified_files(jsonl_files)
+
+    return (
+        cached_project_data is None
+        or from_date is not None
+        or to_date is not None
+        or bool(modified_files)
+        or (cached_project_data.total_message_count == 0 and bool(jsonl_files))
+    )
+
+
 def ensure_fresh_cache(
     project_dir: Path,
     cache_manager: Optional[CacheManager],
@@ -557,7 +971,11 @@ def ensure_fresh_cache(
     to_date: Optional[str] = None,
     silent: bool = False,
 ) -> bool:
-    """Ensure cache is fresh and populated. Returns True if cache was updated."""
+    """Ensure cache is fresh and populated. Returns True if cache was updated.
+
+    This does the heavy lifting of loading and parsing files.
+    Call has_cache_changes() first for a fast check.
+    """
     if cache_manager is None:
         return False
 
@@ -744,11 +1162,6 @@ def _update_cache_with_session_data(
     # Update cache with filtered session data
     cache_manager.update_session_cache(sessions_cache_data)
 
-    # Update cache with working directories (from filtered sessions)
-    cache_manager.update_working_directories(
-        extract_working_directories(list(sessions_cache_data.values()))
-    )
-
     # Update cache with project aggregates
     cache_manager.update_project_aggregates(
         total_message_count=total_message_count,
@@ -874,8 +1287,13 @@ def _generate_individual_session_files(
     cache_manager: Optional["CacheManager"] = None,
     cache_was_updated: bool = False,
     image_export_mode: Optional[str] = None,
-) -> None:
-    """Generate individual files for each session in the specified format."""
+    silent: bool = False,
+) -> int:
+    """Generate individual files for each session in the specified format.
+
+    Returns:
+        Number of sessions regenerated
+    """
     ext = get_file_extension(format)
     # Pre-compute warmup sessions to exclude them
     warmup_session_ids = get_warmup_session_ids(messages)
@@ -890,19 +1308,23 @@ def _generate_individual_session_files(
 
     # Get session data from cache for better titles
     session_data: dict[str, Any] = {}
-    working_directories = None
+    working_directories: list[str] = []
     if cache_manager is not None:
         project_cache = cache_manager.get_cached_project_data()
         if project_cache:
             session_data = {s.session_id: s for s in project_cache.sessions.values()}
-            # Get working directories for project title
-            if project_cache.working_directories:
-                working_directories = project_cache.working_directories
+        # Get working directories for project title
+        working_directories = cache_manager.get_working_directories()
+
+    # Only generate HTML for sessions that are tracked in the sessions table
+    # (filters out warmup-only and sessions without user messages)
+    session_ids = session_ids & set(session_data.keys())
 
     project_title = get_project_display_name(output_dir.name, working_directories)
 
     # Get renderer once outside the loop
     renderer = get_renderer(format, image_export_mode)
+    regenerated_count = 0
 
     # Generate HTML file for each session
     for session_id in session_ids:
@@ -937,15 +1359,29 @@ def _generate_individual_session_files(
 
         # Check if session file needs regeneration
         session_file_path = output_dir / f"session-{session_id}.{ext}"
+        session_file_name = f"session-{session_id}.{ext}"
 
-        # Only regenerate if outdated, doesn't exist, or date filtering is active
-        should_regenerate_session = (
-            renderer.is_outdated(session_file_path)
-            or from_date is not None
-            or to_date is not None
-            or not session_file_path.exists()
-            or cache_was_updated  # Regenerate if JSONL files changed
-        )
+        # Use incremental regeneration: check per-session staleness via html_cache
+        if cache_manager is not None and format == "html":
+            is_stale, _reason = cache_manager.is_html_stale(
+                session_file_name, session_id
+            )
+            should_regenerate_session = (
+                is_stale
+                or renderer.is_outdated(session_file_path)
+                or from_date is not None
+                or to_date is not None
+                or not session_file_path.exists()
+            )
+        else:
+            # Fallback without cache or non-HTML formats
+            should_regenerate_session = (
+                renderer.is_outdated(session_file_path)
+                or from_date is not None
+                or to_date is not None
+                or not session_file_path.exists()
+                or cache_was_updated
+            )
 
         if should_regenerate_session:
             # Generate session content
@@ -955,11 +1391,32 @@ def _generate_individual_session_files(
             assert session_content is not None
             # Write session file
             session_file_path.write_text(session_content, encoding="utf-8")
-        else:
+            regenerated_count += 1
+
+            # Update html_cache to track this generation (HTML only)
+            if cache_manager is not None and format == "html":
+                # Use message count from cache (pre-deduplication) to match
+                # the count used in is_html_stale()
+                if session_id in session_data:
+                    session_message_count = session_data[session_id].message_count
+                else:
+                    # Fallback: count from messages list (less accurate due to dedup)
+                    session_message_count = sum(
+                        1
+                        for m in messages
+                        if hasattr(m, "sessionId")
+                        and getattr(m, "sessionId") == session_id
+                    )
+                cache_manager.update_html_cache(
+                    session_file_name, session_id, session_message_count
+                )
+        elif not silent:
             print(
                 f"Session file {session_file_path.name} is current, skipping regeneration"
             )
 
+    return regenerated_count
+
 
 def process_projects_hierarchy(
     projects_path: Path,
@@ -969,8 +1426,26 @@ def process_projects_hierarchy(
     generate_individual_sessions: bool = True,
     output_format: str = "html",
     image_export_mode: Optional[str] = None,
+    silent: bool = True,
+    page_size: int = 2000,
 ) -> Path:
-    """Process the entire ~/.claude/projects/ hierarchy and create linked output files."""
+    """Process the entire ~/.claude/projects/ hierarchy and create linked output files.
+
+    Args:
+        projects_path: Path to the projects directory
+        from_date: Optional date filter start
+        to_date: Optional date filter end
+        use_cache: Whether to use SQLite cache
+        generate_individual_sessions: Whether to generate per-session HTML files
+        output_format: Output format (html, md, markdown)
+        image_export_mode: Image export mode for markdown
+        silent: If True, suppress verbose per-file logging (show summary only)
+        page_size: Maximum messages per page for combined transcript pagination
+    """
+    import time
+
+    start_time = time.time()
+
     if not projects_path.exists():
         raise FileNotFoundError(f"Projects path not found: {projects_path}")
 
@@ -991,7 +1466,19 @@ def process_projects_hierarchy(
     # Process each project directory
     project_summaries: list[dict[str, Any]] = []
     any_cache_updated = False  # Track if any project had cache updates
+
+    # Aggregated stats
+    total_projects = len(project_dirs)
+    projects_with_updates = 0
+    total_sessions = 0
+
+    # Per-project stats for summary output
+    project_stats: List[tuple[str, GenerationStats]] = []
+
     for project_dir in sorted(project_dirs):
+        project_start_time = time.time()
+        stats = GenerationStats()
+
         try:
             # Initialize cache manager for this project
             cache_manager = None
@@ -999,27 +1486,81 @@ def process_projects_hierarchy(
                 try:
                     cache_manager = CacheManager(project_dir, library_version)
                 except Exception as e:
-                    print(f"Warning: Failed to initialize cache for {project_dir}: {e}")
+                    stats.add_warning(f"Failed to initialize cache: {e}")
 
-            # Phase 1: Ensure cache is fresh and populated
-            cache_was_updated = ensure_fresh_cache(
-                project_dir, cache_manager, from_date, to_date
+            # Phase 1: Fast check if anything needs updating (mtime comparison only)
+            jsonl_files = list(project_dir.glob("*.jsonl"))
+            modified_files = (
+                cache_manager.get_modified_files(jsonl_files) if cache_manager else []
             )
-            if cache_was_updated:
-                any_cache_updated = True
-
-            # Phase 2: Generate output for this project (optionally individual session files)
-            output_path = convert_jsonl_to(
-                output_format,
-                project_dir,
-                None,
-                from_date,
-                to_date,
-                generate_individual_sessions,
-                use_cache,
-                image_export_mode=image_export_mode,
+            stale_sessions = cache_manager.get_stale_sessions() if cache_manager else []
+            output_path = project_dir / "combined_transcripts.html"
+            # Check combined_stale using the appropriate cache:
+            # - Paginated projects store data in html_pages table (via save_page_cache)
+            # - Non-paginated projects store data in html_cache table (via update_html_cache)
+            if cache_manager is not None:
+                existing_page_count = cache_manager.get_page_count()
+                if existing_page_count > 0:
+                    # Paginated project: check page 1 staleness
+                    combined_stale = cache_manager.is_page_stale(1, page_size)[0]
+                else:
+                    # Non-paginated project: check html_cache
+                    combined_stale = cache_manager.is_html_stale(
+                        output_path.name, None
+                    )[0]
+            else:
+                combined_stale = True
+
+            # Determine if we need to do any work
+            needs_work = (
+                bool(modified_files)
+                or bool(stale_sessions)
+                or combined_stale
+                or not output_path.exists()
             )
 
+            if not needs_work:
+                # Fast path: nothing to do, just collect stats for index
+                stats.files_loaded_from_cache = len(jsonl_files)
+                stats.total_time = time.time() - project_start_time
+                # Show progress
+                print(f"  {project_dir.name}: cached ({stats.total_time:.1f}s)")
+            else:
+                # Slow path: update cache and regenerate output
+                stats.files_updated = len(modified_files) if modified_files else 0
+                stats.files_loaded_from_cache = len(jsonl_files) - stats.files_updated
+                stats.sessions_regenerated = len(stale_sessions)
+
+                # Track if cache was updated (for index regeneration)
+                if modified_files:
+                    any_cache_updated = True
+                    projects_with_updates += 1
+
+                # Generate output for this project (handles cache updates internally)
+                output_path = convert_jsonl_to(
+                    output_format,
+                    project_dir,
+                    None,
+                    from_date,
+                    to_date,
+                    generate_individual_sessions,
+                    use_cache,
+                    silent=silent,
+                    image_export_mode=image_export_mode,
+                    page_size=page_size,
+                )
+
+                # Track timing
+                stats.total_time = time.time() - project_start_time
+                # Show progress
+                progress_parts: List[str] = []
+                if stats.files_updated > 0:
+                    progress_parts.append(f"{stats.files_updated} files updated")
+                if stats.sessions_regenerated > 0:
+                    progress_parts.append(f"{stats.sessions_regenerated} sessions")
+                detail = ", ".join(progress_parts) if progress_parts else "regenerated"
+                print(f"  {project_dir.name}: {detail} ({stats.total_time:.1f}s)")
+
             # Get project info for index - use cached data if available
             # Exclude agent files (they are loaded via session references)
             jsonl_files = [
@@ -1036,6 +1577,8 @@ def process_projects_hierarchy(
             if cache_manager is not None:
                 cached_project_data = cache_manager.get_cached_project_data()
                 if cached_project_data is not None:
+                    # Track total sessions for stats
+                    stats.sessions_total = len(cached_project_data.sessions)
                     # Use cached aggregation data
                     project_summaries.append(
                         {
@@ -1051,7 +1594,7 @@ def process_projects_hierarchy(
                             "total_cache_read_tokens": cached_project_data.total_cache_read_tokens,
                             "latest_timestamp": cached_project_data.latest_timestamp,
                             "earliest_timestamp": cached_project_data.earliest_timestamp,
-                            "working_directories": cached_project_data.working_directories,
+                            "working_directories": cache_manager.get_working_directories(),
                             "sessions": [
                                 {
                                     "id": session_data.session_id,
@@ -1073,6 +1616,8 @@ def process_projects_hierarchy(
                             ],
                         }
                     )
+                    # Add project stats
+                    project_stats.append((project_dir.name, stats))
                     continue
 
             # Fallback for when cache is not available (should be rare)
@@ -1080,8 +1625,11 @@ def process_projects_hierarchy(
                 f"Warning: No cached data available for {project_dir.name}, using fallback processing"
             )
             messages = load_directory_transcripts(
-                project_dir, cache_manager, from_date, to_date
+                project_dir, cache_manager, from_date, to_date, silent=silent
             )
+            # Ensure cache is populated with session data (including working directories)
+            if cache_manager:
+                _update_cache_with_session_data(cache_manager, messages)
             if from_date or to_date:
                 messages = filter_messages_by_date(messages, from_date, to_date)
 
@@ -1153,12 +1701,20 @@ def process_projects_hierarchy(
                     "total_cache_read_tokens": total_cache_read_tokens,
                     "latest_timestamp": latest_timestamp,
                     "earliest_timestamp": earliest_timestamp,
-                    "working_directories": extract_working_directories(messages),
+                    "working_directories": cache_manager.get_working_directories()
+                    if cache_manager
+                    else [],
                     "sessions": sessions_data,
                 }
             )
+            # Track session count in stats for fallback path
+            stats.sessions_total = len(sessions_data)
+            project_stats.append((project_dir.name, stats))
+
         except Exception as e:
             prev_project = project_summaries[-1] if project_summaries else "(none)"
+            stats.add_error(str(e))
+            project_stats.append((project_dir.name, stats))
             print(
                 f"Warning: Failed to process {project_dir}: {e}\n"
                 f"Previous (in alphabetical order) project before error: {prev_project}"
@@ -1170,13 +1726,38 @@ def process_projects_hierarchy(
     ext = get_file_extension(output_format)
     index_path = projects_path / f"index.{ext}"
     renderer = get_renderer(output_format, image_export_mode)
+    index_regenerated = False
     if renderer.is_outdated(index_path) or from_date or to_date or any_cache_updated:
         index_content = renderer.generate_projects_index(
             project_summaries, from_date, to_date
         )
         assert index_content is not None
         index_path.write_text(index_content, encoding="utf-8")
-    else:
+        index_regenerated = True
+    elif not silent:
         print(f"Index {ext.upper()} is current, skipping regeneration")
 
+    # Count total sessions from project summaries
+    for summary in project_summaries:
+        total_sessions += len(summary.get("sessions", []))
+
+    # Print summary
+    elapsed = time.time() - start_time
+
+    # Print any errors/warnings that occurred
+    for project_name, stats in project_stats:
+        for warning in stats.warnings:
+            print(f"  Warning ({project_name}): {warning}")
+        for error in stats.errors:
+            print(f"  Error ({project_name}): {error}")
+
+    # Global summary
+    summary_parts: List[str] = []
+    summary_parts.append(f"Processed {total_projects} projects in {elapsed:.1f}s")
+    if projects_with_updates > 0:
+        summary_parts.append(f"  {projects_with_updates} projects updated")
+    if index_regenerated:
+        summary_parts.append("  Index regenerated")
+    print("\n".join(summary_parts))
+
     return index_path
diff --git a/claude_code_log/html/templates/components/page_nav_styles.css b/claude_code_log/html/templates/components/page_nav_styles.css
new file mode 100644
index 00000000..d0254878
--- /dev/null
+++ b/claude_code_log/html/templates/components/page_nav_styles.css
@@ -0,0 +1,74 @@
+/* Page navigation styles for paginated combined transcripts */
+
+.page-navigation {
+    text-align: center;
+    margin-bottom: 20px;
+    padding: 15px;
+    background-color: #ffffff66;
+    border-radius: 8px;
+    box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011;
+    border-left: #ffffff66 1px solid;
+    border-top: #ffffff66 1px solid;
+    border-bottom: #00000017 1px solid;
+    border-right: #00000017 1px solid;
+}
+
+.page-header {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 10px;
+}
+
+.page-title {
+    font-size: 1.3em;
+    font-weight: 600;
+    color: var(--system-warning-color);
+}
+
+.page-stats {
+    display: flex;
+    gap: 20px;
+    flex-wrap: wrap;
+    justify-content: center;
+    color: var(--text-muted);
+    font-size: 0.9em;
+}
+
+.page-stats .stat {
+    display: flex;
+    align-items: center;
+    gap: 5px;
+}
+
+.page-nav-links {
+    display: flex;
+    justify-content: center;
+    gap: 30px;
+    margin-top: 12px;
+    padding-top: 12px;
+    border-top: 1px solid var(--border-light);
+}
+
+.page-nav-link {
+    text-decoration: none;
+    color: var(--system-warning-color);
+    font-weight: 500;
+    padding: 6px 16px;
+    border-radius: 4px;
+    background-color: var(--bg-hover);
+    transition: all 0.2s ease;
+}
+
+.page-nav-link:hover {
+    background-color: var(--session-bg-dimmed);
+    transform: translateY(-1px);
+}
+
+.page-nav-link.prev::before {
+    content: '';
+}
+
+.page-nav-link.next::after {
+    content: '';
+}
diff --git a/claude_code_log/html/templates/transcript.html b/claude_code_log/html/templates/transcript.html
index ef27d002..fed0bb53 100644
--- a/claude_code_log/html/templates/transcript.html
+++ b/claude_code_log/html/templates/transcript.html
@@ -17,12 +17,39 @@
 {% include 'components/search_styles.css' %}
 {% include 'components/edit_diff_styles.css' %}
 {% include 'components/pygments_styles.css' %}
+{% include 'components/page_nav_styles.css' %}
     </style>
 </head>
 
 <body>
     <h1 id="title">{{ title }}</h1>
 
+    {% if page_info %}
+    <!-- Page Navigation -->
+    <div class="page-navigation">
+        <div class="page-header">
+            <div class="page-title">Page {{ page_info.page_number }}</div>
+            {% if page_stats %}
+            <div class="page-stats">
+                <span class="stat">💬 {{ page_stats.message_count }} messages</span>
+                <span class="stat">🕒 {{ page_stats.date_range }}</span>
+                {% if page_stats.token_summary %}
+                <span class="stat">🪙 {{ page_stats.token_summary }}</span>
+                {% endif %}
+            </div>
+            {% endif %}
+        </div>
+        <div class="page-nav-links">
+            {% if page_info.prev_link %}
+            <a href="{{ page_info.prev_link }}" class="page-nav-link prev">← Previous</a>
+            {% endif %}
+            {% if page_info.next_link %}
+            <a href="{{ page_info.next_link }}" class="page-nav-link next">Next →</a>
+            {% endif %}
+        </div>
+    </div>
+    {% endif %}
+
     <!-- Timeline Component -->
     {% include 'components/timeline.html' %}
 
diff --git a/claude_code_log/migrations/001_initial_schema.sql b/claude_code_log/migrations/001_initial_schema.sql
new file mode 100644
index 00000000..f7c5946e
--- /dev/null
+++ b/claude_code_log/migrations/001_initial_schema.sql
@@ -0,0 +1,114 @@
+-- Initial schema for SQLite cache
+-- Migration: 001
+-- Description: Creates all tables and indexes for the cache system
+
+-- Project metadata
+CREATE TABLE IF NOT EXISTS projects (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    project_path TEXT UNIQUE NOT NULL,
+    version TEXT NOT NULL,
+    cache_created TEXT NOT NULL,
+    last_updated TEXT NOT NULL,
+    total_message_count INTEGER DEFAULT 0,
+    total_input_tokens INTEGER DEFAULT 0,
+    total_output_tokens INTEGER DEFAULT 0,
+    total_cache_creation_tokens INTEGER DEFAULT 0,
+    total_cache_read_tokens INTEGER DEFAULT 0,
+    earliest_timestamp TEXT DEFAULT '',
+    latest_timestamp TEXT DEFAULT ''
+);
+
+CREATE INDEX IF NOT EXISTS idx_projects_path ON projects(project_path);
+
+-- File tracking for invalidation
+CREATE TABLE IF NOT EXISTS cached_files (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    project_id INTEGER NOT NULL,
+    file_name TEXT NOT NULL,
+    file_path TEXT NOT NULL,
+    source_mtime REAL NOT NULL,
+    cached_mtime REAL NOT NULL,
+    message_count INTEGER DEFAULT 0,
+    FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+    UNIQUE(project_id, file_name)
+);
+
+CREATE INDEX IF NOT EXISTS idx_cached_files_project ON cached_files(project_id);
+CREATE INDEX IF NOT EXISTS idx_cached_files_name ON cached_files(file_name);
+
+-- Session aggregates
+CREATE TABLE IF NOT EXISTS sessions (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    project_id INTEGER NOT NULL,
+    session_id TEXT NOT NULL,
+    summary TEXT,
+    first_timestamp TEXT NOT NULL DEFAULT '',
+    last_timestamp TEXT NOT NULL DEFAULT '',
+    message_count INTEGER DEFAULT 0,
+    first_user_message TEXT DEFAULT '',
+    cwd TEXT,
+    total_input_tokens INTEGER DEFAULT 0,
+    total_output_tokens INTEGER DEFAULT 0,
+    total_cache_creation_tokens INTEGER DEFAULT 0,
+    total_cache_read_tokens INTEGER DEFAULT 0,
+    FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+    UNIQUE(project_id, session_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_id);
+CREATE INDEX IF NOT EXISTS idx_sessions_session_id ON sessions(session_id);
+CREATE INDEX IF NOT EXISTS idx_sessions_first_timestamp ON sessions(first_timestamp);
+CREATE INDEX IF NOT EXISTS idx_sessions_cwd ON sessions(cwd);
+
+-- Fully normalised messages
+CREATE TABLE IF NOT EXISTS messages (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    project_id INTEGER NOT NULL,
+    file_id INTEGER NOT NULL,
+
+    -- Core fields
+    type TEXT NOT NULL,
+    timestamp TEXT,
+    session_id TEXT,
+
+    -- BaseTranscriptEntry fields (prefixed)
+    _uuid TEXT,
+    _parent_uuid TEXT,
+    _is_sidechain INTEGER DEFAULT 0,
+    _user_type TEXT,
+    _cwd TEXT,
+    _version TEXT,
+    _is_meta INTEGER,
+    _agent_id TEXT,
+
+    -- AssistantTranscriptEntry
+    _request_id TEXT,
+
+    -- Flattened usage tokens
+    input_tokens INTEGER,
+    output_tokens INTEGER,
+    cache_creation_tokens INTEGER,
+    cache_read_tokens INTEGER,
+
+    -- SummaryTranscriptEntry
+    _leaf_uuid TEXT,
+
+    -- SystemTranscriptEntry
+    _level TEXT,
+
+    -- QueueOperationTranscriptEntry
+    _operation TEXT,
+
+    -- Message content as JSON
+    content JSON NOT NULL,
+
+    FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+    FOREIGN KEY (file_id) REFERENCES cached_files(id) ON DELETE CASCADE
+);
+
+-- Indexes for performance
+CREATE INDEX IF NOT EXISTS idx_messages_timestamp ON messages(timestamp);
+CREATE INDEX IF NOT EXISTS idx_messages_project_timestamp ON messages(project_id, timestamp);
+CREATE INDEX IF NOT EXISTS idx_messages_file ON messages(file_id);
+CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
+CREATE INDEX IF NOT EXISTS idx_messages_uuid ON messages(_uuid);
diff --git a/claude_code_log/migrations/002_html_cache.sql b/claude_code_log/migrations/002_html_cache.sql
new file mode 100644
index 00000000..00db64ec
--- /dev/null
+++ b/claude_code_log/migrations/002_html_cache.sql
@@ -0,0 +1,18 @@
+-- HTML cache for incremental regeneration
+-- Migration: 002
+-- Description: Tracks when HTML files were generated to enable incremental regeneration
+
+CREATE TABLE IF NOT EXISTS html_cache (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    project_id INTEGER NOT NULL,
+    html_path TEXT NOT NULL,          -- e.g., "session-abc123.html" or "combined_transcripts.html"
+    generated_at TEXT NOT NULL,       -- ISO timestamp when HTML was generated
+    source_session_id TEXT,           -- session_id for individual files, NULL for combined
+    message_count INTEGER,            -- for sanity checking
+    library_version TEXT NOT NULL,    -- which version generated it
+    FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+    UNIQUE(project_id, html_path)
+);
+
+CREATE INDEX IF NOT EXISTS idx_html_cache_project ON html_cache(project_id);
+CREATE INDEX IF NOT EXISTS idx_html_cache_session ON html_cache(source_session_id);
diff --git a/claude_code_log/migrations/003_html_pagination.sql b/claude_code_log/migrations/003_html_pagination.sql
new file mode 100644
index 00000000..61bd6f9c
--- /dev/null
+++ b/claude_code_log/migrations/003_html_pagination.sql
@@ -0,0 +1,39 @@
+-- HTML pagination for combined transcripts
+-- Migration: 003
+-- Description: Tracks page assignments for paginated combined transcript HTML files
+
+-- Pages table: tracks each generated page file
+CREATE TABLE IF NOT EXISTS html_pages (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    project_id INTEGER NOT NULL,
+    page_number INTEGER NOT NULL,
+    html_path TEXT NOT NULL,              -- e.g., "combined_transcripts.html" or "combined_transcripts_2.html"
+    page_size_config INTEGER NOT NULL,    -- the --page-size value used
+    message_count INTEGER NOT NULL,       -- total messages on this page
+    first_session_id TEXT NOT NULL,
+    last_session_id TEXT NOT NULL,
+    first_timestamp TEXT,
+    last_timestamp TEXT,
+    total_input_tokens INTEGER DEFAULT 0,
+    total_output_tokens INTEGER DEFAULT 0,
+    total_cache_creation_tokens INTEGER DEFAULT 0,
+    total_cache_read_tokens INTEGER DEFAULT 0,
+    generated_at TEXT NOT NULL,           -- ISO timestamp when page was generated
+    library_version TEXT NOT NULL,
+    FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+    UNIQUE(project_id, page_number)
+);
+
+-- Page-session mapping: tracks which sessions are on which page
+CREATE TABLE IF NOT EXISTS page_sessions (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    page_id INTEGER NOT NULL,
+    session_id TEXT NOT NULL,
+    session_order INTEGER NOT NULL,       -- order of session within the page
+    FOREIGN KEY (page_id) REFERENCES html_pages(id) ON DELETE CASCADE,
+    UNIQUE(page_id, session_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_html_pages_project ON html_pages(project_id);
+CREATE INDEX IF NOT EXISTS idx_page_sessions_page ON page_sessions(page_id);
+CREATE INDEX IF NOT EXISTS idx_page_sessions_session ON page_sessions(session_id);
diff --git a/claude_code_log/migrations/__init__.py b/claude_code_log/migrations/__init__.py
new file mode 100644
index 00000000..db9bb5bb
--- /dev/null
+++ b/claude_code_log/migrations/__init__.py
@@ -0,0 +1,5 @@
+"""Database migrations for Claude Code Log cache."""
+
+from .runner import run_migrations
+
+__all__ = ["run_migrations"]
diff --git a/claude_code_log/migrations/runner.py b/claude_code_log/migrations/runner.py
new file mode 100644
index 00000000..40e2b29d
--- /dev/null
+++ b/claude_code_log/migrations/runner.py
@@ -0,0 +1,163 @@
+"""Migration runner for SQLite cache database."""
+
+import hashlib
+import re
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+from typing import List, Tuple
+
+
+def _get_migrations_dir() -> Path:
+    """Get the migrations directory path."""
+    return Path(__file__).parent
+
+
+def _compute_checksum(content: str) -> str:
+    """Compute SHA256 checksum of migration content."""
+    return hashlib.sha256(content.encode("utf-8")).hexdigest()
+
+
+def _parse_migration_number(filename: str) -> int:
+    """Extract migration number from filename (e.g., '001_initial.sql' -> 1)."""
+    match = re.match(r"^(\d+)_", filename)
+    if match:
+        return int(match.group(1))
+    raise ValueError(f"Invalid migration filename: {filename}")
+
+
+def _ensure_schema_version_table(conn: sqlite3.Connection) -> None:
+    """Create _schema_version table if it doesn't exist or upgrade from old format."""
+    # Check if table exists
+    row = conn.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='_schema_version'"
+    ).fetchone()
+
+    if row:
+        # Check if it has the new schema (with checksum column)
+        columns = conn.execute("PRAGMA table_info(_schema_version)").fetchall()
+        column_names = {col[1] for col in columns}
+
+        if "checksum" not in column_names:
+            # Old format table - drop it and recreate
+            # This triggers a fresh start as per migration plan
+            conn.execute("DROP TABLE _schema_version")
+            conn.commit()
+
+    # Create table with new schema
+    conn.execute("""
+        CREATE TABLE IF NOT EXISTS _schema_version (
+            version INTEGER PRIMARY KEY,
+            filename TEXT NOT NULL,
+            applied_at TEXT NOT NULL,
+            checksum TEXT NOT NULL
+        )
+    """)
+    conn.commit()
+
+
+def get_applied_migrations(conn: sqlite3.Connection) -> List[Tuple[int, str]]:
+    """Get list of applied migrations as (version, checksum) tuples."""
+    _ensure_schema_version_table(conn)
+    rows = conn.execute(
+        "SELECT version, checksum FROM _schema_version ORDER BY version"
+    ).fetchall()
+    return [(row[0], row[1]) for row in rows]
+
+
+def get_available_migrations() -> List[Tuple[int, Path]]:
+    """Get list of available migration files as (version, path) tuples."""
+    migrations_dir = _get_migrations_dir()
+    sql_files = sorted(migrations_dir.glob("*.sql"))
+
+    migrations: List[Tuple[int, Path]] = []
+    for sql_file in sql_files:
+        try:
+            version = _parse_migration_number(sql_file.name)
+            migrations.append((version, sql_file))
+        except ValueError:
+            # Skip files that don't match the naming convention
+            continue
+
+    return migrations
+
+
+def get_pending_migrations(conn: sqlite3.Connection) -> List[Tuple[int, Path]]:
+    """Get list of migrations that haven't been applied yet."""
+    applied = {v for v, _ in get_applied_migrations(conn)}
+    available = get_available_migrations()
+    return [(v, p) for v, p in available if v not in applied]
+
+
+def apply_migration(
+    conn: sqlite3.Connection, version: int, migration_path: Path
+) -> None:
+    """Apply a single migration and record it in _schema_version."""
+    content = migration_path.read_text(encoding="utf-8")
+    checksum = _compute_checksum(content)
+
+    # Execute the migration SQL
+    conn.executescript(content)
+
+    # Record the migration
+    conn.execute(
+        """
+        INSERT INTO _schema_version (version, filename, applied_at, checksum)
+        VALUES (?, ?, ?, ?)
+        """,
+        (version, migration_path.name, datetime.now().isoformat(), checksum),
+    )
+    conn.commit()
+
+
+def verify_migrations(conn: sqlite3.Connection) -> List[str]:
+    """Verify applied migrations match their checksums.
+
+    Returns list of warnings for any mismatches.
+    """
+    warnings: List[str] = []
+    applied = get_applied_migrations(conn)
+    available = {v: p for v, p in get_available_migrations()}
+
+    for version, stored_checksum in applied:
+        if version in available:
+            current_content = available[version].read_text(encoding="utf-8")
+            current_checksum = _compute_checksum(current_content)
+            if current_checksum != stored_checksum:
+                warnings.append(
+                    f"Migration {version} ({available[version].name}) has been modified "
+                    f"since it was applied. This may indicate database inconsistency."
+                )
+
+    return warnings
+
+
+def run_migrations(db_path: Path) -> int:
+    """Apply all pending migrations to the database.
+
+    Args:
+        db_path: Path to the SQLite database file
+
+    Returns:
+        Number of migrations applied
+    """
+    conn = sqlite3.connect(db_path, timeout=30.0)
+    conn.execute("PRAGMA foreign_keys = ON")
+
+    try:
+        _ensure_schema_version_table(conn)
+        pending = get_pending_migrations(conn)
+
+        for version, migration_path in sorted(pending):
+            apply_migration(conn, version, migration_path)
+
+        return len(pending)
+    finally:
+        conn.close()
+
+
+def get_current_version(conn: sqlite3.Connection) -> int:
+    """Get the current schema version (highest applied migration number)."""
+    _ensure_schema_version_table(conn)
+    row = conn.execute("SELECT MAX(version) FROM _schema_version").fetchone()
+    return row[0] if row[0] is not None else 0
diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py
index 78e9c5f7..6eb2f1af 100644
--- a/claude_code_log/renderer.py
+++ b/claude_code_log/renderer.py
@@ -2303,3 +2303,18 @@ def get_renderer(format: str, image_export_mode: Optional[str] = None) -> Render
         mode = image_export_mode or "referenced"
         return MarkdownRenderer(image_export_mode=mode)
     raise ValueError(f"Unsupported format: {format}")
+
+
+def is_html_outdated(html_file_path: Path) -> bool:
+    """Check if an HTML file is outdated based on its version comment.
+
+    This is a convenience function that uses the HtmlRenderer's is_outdated method.
+
+    Returns:
+        True if the file should be regenerated (missing version, different version, or file doesn't exist).
+        False if the file is current.
+    """
+    from .html.renderer import HtmlRenderer
+
+    renderer = HtmlRenderer()
+    return renderer.is_outdated(html_file_path)
diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 760dd3d3..146a709c 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -521,11 +521,9 @@ def update_stats(self) -> None:
         )
 
         # Get project name using shared logic
-        working_directories = None
+        working_directories: List[str] = []
         try:
-            project_cache = self.cache_manager.get_cached_project_data()
-            if project_cache and project_cache.working_directories:
-                working_directories = project_cache.working_directories
+            working_directories = self.cache_manager.get_working_directories()
         except Exception:
             # Fall back to directory name if cache fails
             pass
diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py
index 9fe494c3..0456c868 100644
--- a/claude_code_log/utils.py
+++ b/claude_code_log/utils.py
@@ -6,7 +6,6 @@
 from pathlib import Path
 from typing import Optional
 
-from claude_code_log.cache import SessionCacheData
 from .models import ContentItem, TextContent, TranscriptEntry, UserTranscriptEntry
 from .factories import (
     IDE_DIAGNOSTICS_PATTERN,
@@ -201,6 +200,17 @@ def extract_working_directories(
     return [path for path, _ in sorted_dirs]
 
 
+# IDE tag patterns for compact preview rendering (same as renderer.py)
+IDE_OPENED_FILE_PATTERN = re.compile(
+    r"<ide_opened_file>(.*?)</ide_opened_file>", re.DOTALL
+)
+IDE_SELECTION_PATTERN = re.compile(r"<ide_selection>(.*?)</ide_selection>", re.DOTALL)
+IDE_DIAGNOSTICS_PATTERN = re.compile(
+    r"<post-tool-use-hook>\s*<ide_diagnostics>(.*?)</ide_diagnostics>\s*</post-tool-use-hook>",
+    re.DOTALL,
+)
+
+
 def _compact_ide_tags_for_preview(text_content: str) -> str:
     """Replace verbose IDE/system tags with compact emoji indicators for previews.
 
diff --git a/test/__snapshots__/test_snapshot_html.ambr b/test/__snapshots__/test_snapshot_html.ambr
index bb6c3651..863b29ec 100644
--- a/test/__snapshots__/test_snapshot_html.ambr
+++ b/test/__snapshots__/test_snapshot_html.ambr
@@ -4287,12 +4287,88 @@
   .highlight .vi { color: #19177C } /* Name.Variable.Instance */
   .highlight .vm { color: #19177C } /* Name.Variable.Magic */
   .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */
+  /* Page navigation styles for paginated combined transcripts */
+  
+  .page-navigation {
+      text-align: center;
+      margin-bottom: 20px;
+      padding: 15px;
+      background-color: #ffffff66;
+      border-radius: 8px;
+      box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011;
+      border-left: #ffffff66 1px solid;
+      border-top: #ffffff66 1px solid;
+      border-bottom: #00000017 1px solid;
+      border-right: #00000017 1px solid;
+  }
+  
+  .page-header {
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      gap: 10px;
+  }
+  
+  .page-title {
+      font-size: 1.3em;
+      font-weight: 600;
+      color: var(--system-warning-color);
+  }
+  
+  .page-stats {
+      display: flex;
+      gap: 20px;
+      flex-wrap: wrap;
+      justify-content: center;
+      color: var(--text-muted);
+      font-size: 0.9em;
+  }
+  
+  .page-stats .stat {
+      display: flex;
+      align-items: center;
+      gap: 5px;
+  }
+  
+  .page-nav-links {
+      display: flex;
+      justify-content: center;
+      gap: 30px;
+      margin-top: 12px;
+      padding-top: 12px;
+      border-top: 1px solid var(--border-light);
+  }
+  
+  .page-nav-link {
+      text-decoration: none;
+      color: var(--system-warning-color);
+      font-weight: 500;
+      padding: 6px 16px;
+      border-radius: 4px;
+      background-color: var(--bg-hover);
+      transition: all 0.2s ease;
+  }
+  
+  .page-nav-link:hover {
+      background-color: var(--session-bg-dimmed);
+      transform: translateY(-1px);
+  }
+  
+  .page-nav-link.prev::before {
+      content: '';
+  }
+  
+  .page-nav-link.next::after {
+      content: '';
+  }
       </style>
   </head>
   
   <body>
       <h1 id="title">Test Session</h1>
   
+      
+  
       <!-- Timeline Component -->
       <!-- Timeline Component Template -->
   <!-- vis-timeline integration for transcript visualization -->
@@ -9088,12 +9164,88 @@
   .highlight .vi { color: #19177C } /* Name.Variable.Instance */
   .highlight .vm { color: #19177C } /* Name.Variable.Magic */
   .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */
+  /* Page navigation styles for paginated combined transcripts */
+  
+  .page-navigation {
+      text-align: center;
+      margin-bottom: 20px;
+      padding: 15px;
+      background-color: #ffffff66;
+      border-radius: 8px;
+      box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011;
+      border-left: #ffffff66 1px solid;
+      border-top: #ffffff66 1px solid;
+      border-bottom: #00000017 1px solid;
+      border-right: #00000017 1px solid;
+  }
+  
+  .page-header {
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      gap: 10px;
+  }
+  
+  .page-title {
+      font-size: 1.3em;
+      font-weight: 600;
+      color: var(--system-warning-color);
+  }
+  
+  .page-stats {
+      display: flex;
+      gap: 20px;
+      flex-wrap: wrap;
+      justify-content: center;
+      color: var(--text-muted);
+      font-size: 0.9em;
+  }
+  
+  .page-stats .stat {
+      display: flex;
+      align-items: center;
+      gap: 5px;
+  }
+  
+  .page-nav-links {
+      display: flex;
+      justify-content: center;
+      gap: 30px;
+      margin-top: 12px;
+      padding-top: 12px;
+      border-top: 1px solid var(--border-light);
+  }
+  
+  .page-nav-link {
+      text-decoration: none;
+      color: var(--system-warning-color);
+      font-weight: 500;
+      padding: 6px 16px;
+      border-radius: 4px;
+      background-color: var(--bg-hover);
+      transition: all 0.2s ease;
+  }
+  
+  .page-nav-link:hover {
+      background-color: var(--session-bg-dimmed);
+      transform: translateY(-1px);
+  }
+  
+  .page-nav-link.prev::before {
+      content: '';
+  }
+  
+  .page-nav-link.next::after {
+      content: '';
+  }
       </style>
   </head>
   
   <body>
       <h1 id="title">Edge Cases</h1>
   
+      
+  
       <!-- Timeline Component -->
       <!-- Timeline Component Template -->
   <!-- vis-timeline integration for transcript visualization -->
@@ -11556,7 +11708,7 @@
   <head>
       <meta charset='UTF-8'>
       <meta name='viewport' content='width=device-width, initial-scale=1.0'>
-      <title>Claude Transcripts - tmp</title>
+      <title>Claude Transcripts - test_multi_session_html0</title>
       
       <style>
   /* Global styles shared across all templates */
@@ -13985,11 +14137,87 @@
   .highlight .vi { color: #19177C } /* Name.Variable.Instance */
   .highlight .vm { color: #19177C } /* Name.Variable.Magic */
   .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */
+  /* Page navigation styles for paginated combined transcripts */
+  
+  .page-navigation {
+      text-align: center;
+      margin-bottom: 20px;
+      padding: 15px;
+      background-color: #ffffff66;
+      border-radius: 8px;
+      box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011;
+      border-left: #ffffff66 1px solid;
+      border-top: #ffffff66 1px solid;
+      border-bottom: #00000017 1px solid;
+      border-right: #00000017 1px solid;
+  }
+  
+  .page-header {
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      gap: 10px;
+  }
+  
+  .page-title {
+      font-size: 1.3em;
+      font-weight: 600;
+      color: var(--system-warning-color);
+  }
+  
+  .page-stats {
+      display: flex;
+      gap: 20px;
+      flex-wrap: wrap;
+      justify-content: center;
+      color: var(--text-muted);
+      font-size: 0.9em;
+  }
+  
+  .page-stats .stat {
+      display: flex;
+      align-items: center;
+      gap: 5px;
+  }
+  
+  .page-nav-links {
+      display: flex;
+      justify-content: center;
+      gap: 30px;
+      margin-top: 12px;
+      padding-top: 12px;
+      border-top: 1px solid var(--border-light);
+  }
+  
+  .page-nav-link {
+      text-decoration: none;
+      color: var(--system-warning-color);
+      font-weight: 500;
+      padding: 6px 16px;
+      border-radius: 4px;
+      background-color: var(--bg-hover);
+      transition: all 0.2s ease;
+  }
+  
+  .page-nav-link:hover {
+      background-color: var(--session-bg-dimmed);
+      transform: translateY(-1px);
+  }
+  
+  .page-nav-link.prev::before {
+      content: '';
+  }
+  
+  .page-nav-link.next::after {
+      content: '';
+  }
       </style>
   </head>
   
   <body>
-      <h1 id="title">Claude Transcripts - tmp</h1>
+      <h1 id="title">Claude Transcripts - test_multi_session_html0</h1>
+  
+      
   
       <!-- Timeline Component -->
       <!-- Timeline Component Template -->
@@ -18923,12 +19151,88 @@
   .highlight .vi { color: #19177C } /* Name.Variable.Instance */
   .highlight .vm { color: #19177C } /* Name.Variable.Magic */
   .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */
+  /* Page navigation styles for paginated combined transcripts */
+  
+  .page-navigation {
+      text-align: center;
+      margin-bottom: 20px;
+      padding: 15px;
+      background-color: #ffffff66;
+      border-radius: 8px;
+      box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011;
+      border-left: #ffffff66 1px solid;
+      border-top: #ffffff66 1px solid;
+      border-bottom: #00000017 1px solid;
+      border-right: #00000017 1px solid;
+  }
+  
+  .page-header {
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      gap: 10px;
+  }
+  
+  .page-title {
+      font-size: 1.3em;
+      font-weight: 600;
+      color: var(--system-warning-color);
+  }
+  
+  .page-stats {
+      display: flex;
+      gap: 20px;
+      flex-wrap: wrap;
+      justify-content: center;
+      color: var(--text-muted);
+      font-size: 0.9em;
+  }
+  
+  .page-stats .stat {
+      display: flex;
+      align-items: center;
+      gap: 5px;
+  }
+  
+  .page-nav-links {
+      display: flex;
+      justify-content: center;
+      gap: 30px;
+      margin-top: 12px;
+      padding-top: 12px;
+      border-top: 1px solid var(--border-light);
+  }
+  
+  .page-nav-link {
+      text-decoration: none;
+      color: var(--system-warning-color);
+      font-weight: 500;
+      padding: 6px 16px;
+      border-radius: 4px;
+      background-color: var(--bg-hover);
+      transition: all 0.2s ease;
+  }
+  
+  .page-nav-link:hover {
+      background-color: var(--session-bg-dimmed);
+      transform: translateY(-1px);
+  }
+  
+  .page-nav-link.prev::before {
+      content: '';
+  }
+  
+  .page-nav-link.next::after {
+      content: '';
+  }
       </style>
   </head>
   
   <body>
       <h1 id="title">Test Transcript</h1>
   
+      
+  
       <!-- Timeline Component -->
       <!-- Timeline Component Template -->
   <!-- vis-timeline integration for transcript visualization -->
diff --git a/test/test_cache.py b/test/test_cache.py
index 4cb4a23f..8bb4302d 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -1,10 +1,8 @@
 #!/usr/bin/env python3
 """Tests for caching functionality."""
 
-import json
 import tempfile
 from pathlib import Path
-from datetime import datetime
 from unittest.mock import patch
 
 import pytest
@@ -12,7 +10,6 @@
 from claude_code_log.cache import (
     CacheManager,
     get_library_version,
-    ProjectCache,
     SessionCacheData,
 )
 from claude_code_log.models import (
@@ -30,7 +27,10 @@
 def temp_project_dir():
     """Create a temporary project directory for testing."""
     with tempfile.TemporaryDirectory() as temp_dir:
-        yield Path(temp_dir)
+        # Create project subdirectory so db_path (parent/cache.db) is unique per test
+        project_dir = Path(temp_dir) / "project"
+        project_dir.mkdir()
+        yield project_dir
 
 
 @pytest.fixture
@@ -101,16 +101,16 @@ def test_initialization(self, temp_project_dir, mock_version):
 
         assert cache_manager.project_path == temp_project_dir
         assert cache_manager.library_version == mock_version
-        assert cache_manager.cache_dir == temp_project_dir / "cache"
-        assert cache_manager.cache_dir.exists()
+        # SQLite database should be created at parent level
+        assert cache_manager.db_path == temp_project_dir.parent / "cache.db"
+        assert cache_manager.db_path.exists()
 
-    def test_cache_file_path(self, cache_manager, temp_project_dir):
-        """Test cache file path generation."""
-        jsonl_path = temp_project_dir / "test.jsonl"
-        cache_path = cache_manager._get_cache_file_path(jsonl_path)
-
-        expected = temp_project_dir / "cache" / "test.json"
-        assert cache_path == expected
+    def test_database_path(self, cache_manager, temp_project_dir):
+        """Test that SQLite database is created at the correct location."""
+        # Database should be at parent level (projects_dir/cache.db)
+        expected_db = temp_project_dir.parent / "cache.db"
+        assert cache_manager.db_path == expected_db
+        assert expected_db.exists()
 
     def test_save_and_load_entries(
         self, cache_manager, temp_project_dir, sample_entries
@@ -122,9 +122,8 @@ def test_save_and_load_entries(
         # Save entries to cache
         cache_manager.save_cached_entries(jsonl_path, sample_entries)
 
-        # Verify cache file exists
-        cache_file = cache_manager._get_cache_file_path(jsonl_path)
-        assert cache_file.exists()
+        # Verify file is cached
+        assert cache_manager.is_file_cached(jsonl_path)
 
         # Load entries from cache
         loaded_entries = cache_manager.load_cached_entries(jsonl_path)
@@ -136,30 +135,36 @@ def test_save_and_load_entries(
         assert loaded_entries[1].type == "assistant"
         assert loaded_entries[2].type == "summary"
 
-    def test_timestamp_based_cache_structure(
+    def test_message_storage_with_timestamps(
         self, cache_manager, temp_project_dir, sample_entries
     ):
-        """Test that cache uses timestamp-based structure."""
+        """Test that messages are stored with correct timestamps in SQLite."""
+        import sqlite3
+
         jsonl_path = temp_project_dir / "test.jsonl"
         jsonl_path.write_text("dummy content", encoding="utf-8")
 
         cache_manager.save_cached_entries(jsonl_path, sample_entries)
 
-        # Read raw cache file
-        cache_file = cache_manager._get_cache_file_path(jsonl_path)
-        with open(cache_file, "r") as f:
-            cache_data = json.load(f)
-
-        # Verify timestamp-based structure
-        assert isinstance(cache_data, dict)
-        assert "2023-01-01T10:00:00Z" in cache_data
-        assert "2023-01-01T10:01:00Z" in cache_data
-        assert "_no_timestamp" in cache_data  # Summary entry
-
-        # Verify entry grouping
-        assert len(cache_data["2023-01-01T10:00:00Z"]) == 1
-        assert len(cache_data["2023-01-01T10:01:00Z"]) == 1
-        assert len(cache_data["_no_timestamp"]) == 1
+        # Query the SQLite database directly to verify structure
+        # Filter by project_id since database is shared between tests
+        conn = sqlite3.connect(cache_manager.db_path)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.execute(
+            "SELECT timestamp, type FROM messages WHERE project_id = ? ORDER BY timestamp NULLS LAST",
+            (cache_manager._project_id,),
+        )
+        rows = cursor.fetchall()
+        conn.close()
+
+        # Verify entries are stored with timestamps
+        assert len(rows) == 3
+        assert rows[0]["timestamp"] == "2023-01-01T10:00:00Z"
+        assert rows[0]["type"] == "user"
+        assert rows[1]["timestamp"] == "2023-01-01T10:01:00Z"
+        assert rows[1]["type"] == "assistant"
+        assert rows[2]["timestamp"] is None  # Summary has no timestamp
+        assert rows[2]["type"] == "summary"
 
     def test_cache_invalidation_file_modification(
         self, cache_manager, temp_project_dir, sample_entries
@@ -186,17 +191,10 @@ def test_cache_invalidation_version_mismatch(self, temp_project_dir):
         # Create cache with version 1.0.0
         with patch("claude_code_log.cache.get_library_version", return_value="1.0.0"):
             cache_manager_v1 = CacheManager(temp_project_dir, "1.0.0")
-            # Create some cache data
-            index_data = ProjectCache(
-                version="1.0.0",
-                cache_created=datetime.now().isoformat(),
-                last_updated=datetime.now().isoformat(),
-                project_path=str(temp_project_dir),
-                cached_files={},
-                sessions={},
-            )
-            with open(cache_manager_v1.index_file, "w") as f:
-                json.dump(index_data.model_dump(), f)
+            # Verify project was created with version 1.0.0
+            cached_data = cache_manager_v1.get_cached_project_data()
+            assert cached_data is not None
+            assert cached_data.version == "1.0.0"
 
         # Create new cache manager with different version
         with patch("claude_code_log.cache.get_library_version", return_value="2.0.0"):
@@ -269,16 +267,22 @@ def test_clear_cache(self, cache_manager, temp_project_dir, sample_entries):
 
         # Create cache
         cache_manager.save_cached_entries(jsonl_path, sample_entries)
-        cache_file = cache_manager._get_cache_file_path(jsonl_path)
-        assert cache_file.exists()
-        assert cache_manager.index_file.exists()
+        assert cache_manager.is_file_cached(jsonl_path)
+
+        # Verify data exists before clearing
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert len(cached_data.cached_files) > 0
 
         # Clear cache
         cache_manager.clear_cache()
 
-        # Verify files are deleted
-        assert not cache_file.exists()
-        assert not cache_manager.index_file.exists()
+        # Verify cache is cleared (no more files or sessions)
+        assert not cache_manager.is_file_cached(jsonl_path)
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert len(cached_data.cached_files) == 0
+        assert len(cached_data.sessions) == 0
 
     def test_session_cache_updates(self, cache_manager):
         """Test updating session cache data."""
@@ -586,17 +590,15 @@ def test_breaking_changes_0_8_0(self, temp_project_dir):
 class TestCacheErrorHandling:
     """Test cache error handling and edge cases."""
 
-    def test_corrupted_cache_file(self, cache_manager, temp_project_dir):
-        """Test handling of corrupted cache files."""
+    def test_missing_cache_entry(self, cache_manager, temp_project_dir):
+        """Test handling when cache entry doesn't exist."""
         jsonl_path = temp_project_dir / "test.jsonl"
         jsonl_path.write_text("dummy content", encoding="utf-8")
 
-        # Create corrupted cache file
-        cache_file = cache_manager._get_cache_file_path(jsonl_path)
-        cache_file.parent.mkdir(exist_ok=True)
-        cache_file.write_text("invalid json content", encoding="utf-8")
+        # File exists but not cached
+        assert not cache_manager.is_file_cached(jsonl_path)
 
-        # Should handle gracefully
+        # Should return None when not cached
         result = cache_manager.load_cached_entries(jsonl_path)
         assert result is None
 
diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py
index 25ffcc7b..2bce66df 100644
--- a/test/test_cache_integration.py
+++ b/test/test_cache_integration.py
@@ -5,7 +5,6 @@
 import tempfile
 from pathlib import Path
 from unittest.mock import patch
-from datetime import datetime
 
 import pytest
 from click.testing import CliRunner
@@ -96,9 +95,9 @@ def test_cli_no_cache_flag(self, setup_test_project):
         result1 = runner.invoke(main, [str(project_dir)])
         assert result1.exit_code == 0
 
-        # Check if cache was created
-        cache_dir = project_dir / "cache"
-        assert cache_dir.exists()
+        # Check if SQLite cache was created at parent level
+        cache_db = project_dir.parent / "cache.db"
+        assert cache_db.exists()
 
         # Clear the cache
         runner.invoke(main, [str(project_dir), "--clear-cache"])
@@ -107,12 +106,14 @@ def test_cli_no_cache_flag(self, setup_test_project):
         result2 = runner.invoke(main, [str(project_dir), "--no-cache"])
         assert result2.exit_code == 0
 
-        # Cache should not be created
-        cache_files = list(cache_dir.glob("*.json")) if cache_dir.exists() else []
-        assert len(cache_files) == 0
+        # Cache should be empty (project should not be populated)
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert cached_data.total_message_count == 0
 
     def test_cli_clear_cache_flag(self, setup_test_project):
-        """Test --clear-cache flag removes cache files."""
+        """Test --clear-cache flag clears cache data."""
         project_dir = setup_test_project
 
         runner = CliRunner()
@@ -121,19 +122,21 @@ def test_cli_clear_cache_flag(self, setup_test_project):
         result1 = runner.invoke(main, [str(project_dir)])
         assert result1.exit_code == 0
 
-        # Verify cache exists
-        cache_dir = project_dir / "cache"
-        assert cache_dir.exists()
-        cache_files = list(cache_dir.glob("*.json"))
-        assert len(cache_files) > 0
+        # Verify cache exists with data
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert cached_data.total_message_count > 0
 
         # Clear cache
         result2 = runner.invoke(main, [str(project_dir), "--clear-cache"])
         assert result2.exit_code == 0
 
-        # Verify cache is cleared
-        cache_files = list(cache_dir.glob("*.json")) if cache_dir.exists() else []
-        assert len(cache_files) == 0
+        # Verify cache is cleared (no files or sessions)
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert len(cached_data.cached_files) == 0
 
     def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data):
         """Test caching with --all-projects flag."""
@@ -143,7 +146,7 @@ def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data):
             project_dir.mkdir()
 
             jsonl_file = project_dir / f"session-{i}.jsonl"
-            with open(jsonl_file, "w") as f:
+            with open(jsonl_file, "w", encoding="utf-8") as f:
                 for entry in sample_jsonl_data:
                     # Modify session ID for each project
                     entry_copy = entry.copy()
@@ -157,14 +160,17 @@ def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data):
         result = runner.invoke(main, [str(temp_projects_dir), "--all-projects"])
         assert result.exit_code == 0
 
-        # Verify cache created for each project
+        # Verify SQLite cache database created at projects level
+        cache_db = temp_projects_dir / "cache.db"
+        assert cache_db.exists()
+
+        # Verify cache data exists for each project
         for i in range(3):
             project_dir = temp_projects_dir / f"project-{i}"
-            cache_dir = project_dir / "cache"
-            assert cache_dir.exists()
-
-            cache_files = list(cache_dir.glob("*.json"))
-            assert len(cache_files) >= 1  # At least index.json
+            cache_manager = CacheManager(project_dir, "1.0.0")
+            cached_data = cache_manager.get_cached_project_data()
+            assert cached_data is not None
+            assert len(cached_data.cached_files) >= 1
 
     def test_cli_date_filtering_with_cache(self, setup_test_project):
         """Test date filtering works correctly with caching."""
@@ -195,11 +201,15 @@ def test_convert_jsonl_to_html_with_cache(self, setup_test_project):
         output1 = convert_jsonl_to_html(input_path=project_dir, use_cache=True)
         assert output1.exists()
 
-        # Verify cache was created
-        cache_dir = project_dir / "cache"
-        assert cache_dir.exists()
-        cache_files = list(cache_dir.glob("*.json"))
-        assert len(cache_files) >= 1
+        # Verify SQLite cache was created
+        cache_db = project_dir.parent / "cache.db"
+        assert cache_db.exists()
+
+        # Verify cache has data
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert len(cached_data.cached_files) >= 1
 
         # Second conversion (should use cache)
         output2 = convert_jsonl_to_html(input_path=project_dir, use_cache=True)
@@ -213,11 +223,11 @@ def test_convert_jsonl_to_html_no_cache(self, setup_test_project):
         output = convert_jsonl_to_html(input_path=project_dir, use_cache=False)
         assert output.exists()
 
-        # Cache should not be created
-        cache_dir = project_dir / "cache"
-        if cache_dir.exists():
-            cache_files = list(cache_dir.glob("*.json"))
-            assert len(cache_files) == 0
+        # SQLite db may still exist from fixture setup, but project data should be empty
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert cached_data.total_message_count == 0
 
     def test_process_projects_hierarchy_with_cache(
         self, temp_projects_dir, sample_jsonl_data
@@ -229,7 +239,7 @@ def test_process_projects_hierarchy_with_cache(
             project_dir.mkdir()
 
             jsonl_file = project_dir / f"session-{i}.jsonl"
-            with open(jsonl_file, "w") as f:
+            with open(jsonl_file, "w", encoding="utf-8") as f:
                 for entry in sample_jsonl_data:
                     entry_copy = entry.copy()
                     if "sessionId" in entry_copy:
@@ -242,11 +252,17 @@ def test_process_projects_hierarchy_with_cache(
         )
         assert output1.exists()
 
-        # Verify caches were created
+        # Verify SQLite cache database was created
+        cache_db = temp_projects_dir / "cache.db"
+        assert cache_db.exists()
+
+        # Verify cache data exists for each project
         for i in range(2):
             project_dir = temp_projects_dir / f"project-{i}"
-            cache_dir = project_dir / "cache"
-            assert cache_dir.exists()
+            cache_manager = CacheManager(project_dir, "1.0.0")
+            cached_data = cache_manager.get_cached_project_data()
+            assert cached_data is not None
+            assert len(cached_data.cached_files) >= 1
 
         # Second processing (should use cache)
         output2 = process_projects_hierarchy(
@@ -306,7 +322,7 @@ def test_cache_performance_with_large_project(self, temp_projects_dir):
             )
 
         jsonl_file = project_dir / "large-session.jsonl"
-        with open(jsonl_file, "w") as f:
+        with open(jsonl_file, "w", encoding="utf-8") as f:
             for entry in large_jsonl_data:
                 f.write(json.dumps(entry) + "\n")
 
@@ -415,19 +431,10 @@ def test_cache_version_upgrade_scenario(self, setup_test_project):
         # Create cache with old version
         with patch("claude_code_log.cache.get_library_version", return_value="1.0.0"):
             cache_manager_old = CacheManager(project_dir, "1.0.0")
-            # Create some dummy cache data
-            from claude_code_log.cache import ProjectCache
-
-            old_cache = ProjectCache(
-                version="1.0.0",
-                cache_created=datetime.now().isoformat(),
-                last_updated=datetime.now().isoformat(),
-                project_path=str(project_dir),
-                cached_files={},
-                sessions={},
-            )
-            with open(cache_manager_old.index_file, "w") as f:
-                json.dump(old_cache.model_dump(), f)
+            # Verify project was created in SQLite database
+            cached_data = cache_manager_old.get_cached_project_data()
+            assert cached_data is not None
+            assert cached_data.version == "1.0.0"
 
         # Process with new version (should handle version mismatch)
         with patch("claude_code_log.cache.get_library_version", return_value="2.0.0"):
diff --git a/test/test_cache_sqlite_integrity.py b/test/test_cache_sqlite_integrity.py
new file mode 100644
index 00000000..eaa14a9b
--- /dev/null
+++ b/test/test_cache_sqlite_integrity.py
@@ -0,0 +1,908 @@
+#!/usr/bin/env python3
+"""Comprehensive SQL-level integrity tests for SQLite cache."""
+
+import json
+import sqlite3
+import tempfile
+import threading
+import time
+from pathlib import Path
+
+import pytest
+
+from claude_code_log.cache import CacheManager, SessionCacheData
+from claude_code_log.models import (
+    AssistantMessage,
+    AssistantTranscriptEntry,
+    TextContent,
+    ThinkingContent,
+    ToolResultContent,
+    ToolUseContent,
+    UsageInfo,
+    UserMessage,
+    UserTranscriptEntry,
+)
+
+
+@pytest.fixture
+def temp_project_dir():
+    """Create a temporary project directory."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        project_dir = Path(temp_dir) / "test-project"
+        project_dir.mkdir()
+        yield project_dir
+
+
+@pytest.fixture
+def cache_manager(temp_project_dir):
+    """Create a cache manager for testing."""
+    return CacheManager(temp_project_dir, "1.0.0")
+
+
+@pytest.fixture
+def sample_user_entry():
+    """Create a sample user transcript entry."""
+    return UserTranscriptEntry(
+        type="user",
+        uuid="user-123",
+        timestamp="2024-01-01T10:00:00Z",
+        sessionId="session-1",
+        version="1.0.0",
+        parentUuid=None,
+        isSidechain=False,
+        userType="external",
+        cwd="/test/path",
+        message=UserMessage(role="user", content="Hello, world!"),
+    )
+
+
+@pytest.fixture
+def sample_assistant_entry():
+    """Create a sample assistant transcript entry with token usage."""
+    return AssistantTranscriptEntry(
+        type="assistant",
+        uuid="assistant-123",
+        timestamp="2024-01-01T10:01:00Z",
+        sessionId="session-1",
+        version="1.0.0",
+        parentUuid="user-123",
+        isSidechain=False,
+        userType="assistant",
+        cwd="/test/path",
+        requestId="req-123",
+        message=AssistantMessage(
+            id="msg-123",
+            type="message",
+            role="assistant",
+            model="claude-3",
+            content=[TextContent(type="text", text="Hi there!")],
+            usage=UsageInfo(
+                input_tokens=100,
+                output_tokens=50,
+                cache_creation_input_tokens=10,
+                cache_read_input_tokens=5,
+            ),
+        ),
+    )
+
+
+class TestCascadeDelete:
+    """Tests for cascade delete behaviour."""
+
+    def test_cascade_delete_project_removes_all_nested_records(
+        self, temp_project_dir, sample_user_entry, sample_assistant_entry
+    ):
+        """Deleting project cascades to files, messages, sessions."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Create a JSONL file with entries
+        jsonl_file = temp_project_dir / "test.jsonl"
+        jsonl_file.write_text(
+            json.dumps(sample_user_entry.model_dump())
+            + "\n"
+            + json.dumps(sample_assistant_entry.model_dump())
+            + "\n",
+            encoding="utf-8",
+        )
+
+        # Save entries to cache
+        cache_manager.save_cached_entries(
+            jsonl_file, [sample_user_entry, sample_assistant_entry]
+        )
+
+        # Update session cache
+        cache_manager.update_session_cache(
+            {
+                "session-1": SessionCacheData(
+                    session_id="session-1",
+                    summary="Test session",
+                    first_timestamp="2024-01-01T10:00:00Z",
+                    last_timestamp="2024-01-01T10:01:00Z",
+                    message_count=2,
+                    first_user_message="Hello, world!",
+                    cwd="/test/path",
+                    total_input_tokens=100,
+                    total_output_tokens=50,
+                )
+            }
+        )
+
+        # Get project ID
+        project_id = cache_manager._project_id
+
+        # Verify data exists
+        with cache_manager._get_connection() as conn:
+            files = conn.execute(
+                "SELECT COUNT(*) FROM cached_files WHERE project_id = ?",
+                (project_id,),
+            ).fetchone()[0]
+            messages = conn.execute(
+                "SELECT COUNT(*) FROM messages WHERE project_id = ?",
+                (project_id,),
+            ).fetchone()[0]
+            sessions = conn.execute(
+                "SELECT COUNT(*) FROM sessions WHERE project_id = ?",
+                (project_id,),
+            ).fetchone()[0]
+
+        assert files > 0
+        assert messages > 0
+        assert sessions > 0
+
+        # Delete the project
+        with cache_manager._get_connection() as conn:
+            conn.execute("DELETE FROM projects WHERE id = ?", (project_id,))
+            conn.commit()
+
+        # Verify cascade delete removed all nested records
+        with cache_manager._get_connection() as conn:
+            files = conn.execute(
+                "SELECT COUNT(*) FROM cached_files WHERE project_id = ?",
+                (project_id,),
+            ).fetchone()[0]
+            messages = conn.execute(
+                "SELECT COUNT(*) FROM messages WHERE project_id = ?",
+                (project_id,),
+            ).fetchone()[0]
+            sessions = conn.execute(
+                "SELECT COUNT(*) FROM sessions WHERE project_id = ?",
+                (project_id,),
+            ).fetchone()[0]
+
+        assert files == 0
+        assert messages == 0
+        assert sessions == 0
+
+
+class TestTokenSumVerification:
+    """Tests for token sum calculations."""
+
+    def test_session_token_totals_match_message_sums(
+        self, temp_project_dir, sample_assistant_entry
+    ):
+        """Session token totals equal sum of message tokens."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Create multiple assistant entries with known token values
+        entries = []
+        total_input = 0
+        total_output = 0
+
+        for i in range(5):
+            entry = AssistantTranscriptEntry(
+                type="assistant",
+                uuid=f"assistant-{i}",
+                timestamp=f"2024-01-01T10:{i:02d}:00Z",
+                sessionId="session-1",
+                version="1.0.0",
+                parentUuid=None,
+                isSidechain=False,
+                userType="assistant",
+                cwd="/test/path",
+                requestId=f"req-{i}",
+                message=AssistantMessage(
+                    id=f"msg-{i}",
+                    type="message",
+                    role="assistant",
+                    model="claude-3",
+                    content=[TextContent(type="text", text=f"Response {i}")],
+                    usage=UsageInfo(
+                        input_tokens=100 + i * 10,
+                        output_tokens=50 + i * 5,
+                    ),
+                ),
+            )
+            entries.append(entry)
+            total_input += 100 + i * 10
+            total_output += 50 + i * 5
+
+        # Save entries
+        jsonl_file = temp_project_dir / "test.jsonl"
+        jsonl_file.write_text(
+            "\n".join(json.dumps(e.model_dump()) for e in entries),
+            encoding="utf-8",
+        )
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Query actual sums from database
+        with cache_manager._get_connection() as conn:
+            row = conn.execute(
+                """
+                SELECT
+                    COALESCE(SUM(input_tokens), 0) as total_input,
+                    COALESCE(SUM(output_tokens), 0) as total_output
+                FROM messages
+                WHERE project_id = ? AND session_id = 'session-1'
+                """,
+                (cache_manager._project_id,),
+            ).fetchone()
+
+        assert row["total_input"] == total_input
+        assert row["total_output"] == total_output
+
+
+class TestForeignKeyConstraints:
+    """Tests for foreign key constraint enforcement."""
+
+    def test_cannot_insert_message_without_valid_file_id(self, cache_manager):
+        """Foreign key prevents orphaned messages."""
+        with cache_manager._get_connection() as conn:
+            # Attempt to insert message with non-existent file_id
+            with pytest.raises(sqlite3.IntegrityError):
+                conn.execute(
+                    """
+                    INSERT INTO messages (project_id, file_id, type, content)
+                    VALUES (?, 99999, 'user', '{}')
+                    """,
+                    (cache_manager._project_id,),
+                )
+
+    def test_cannot_insert_message_without_valid_project_id(self, cache_manager):
+        """Foreign key prevents messages with invalid project."""
+        with cache_manager._get_connection() as conn:
+            # First create a valid file
+            conn.execute(
+                """
+                INSERT INTO cached_files (project_id, file_name, file_path, source_mtime, cached_mtime)
+                VALUES (?, 'test.jsonl', '/test/test.jsonl', 0, 0)
+                """,
+                (cache_manager._project_id,),
+            )
+            file_id = conn.execute(
+                "SELECT id FROM cached_files WHERE file_name = 'test.jsonl'"
+            ).fetchone()[0]
+
+            # Attempt to insert message with non-existent project_id
+            with pytest.raises(sqlite3.IntegrityError):
+                conn.execute(
+                    """
+                    INSERT INTO messages (project_id, file_id, type, content)
+                    VALUES (99999, ?, 'user', '{}')
+                    """,
+                    (file_id,),
+                )
+
+
+class TestSerializationRoundTrip:
+    """Tests for message serialization/deserialization."""
+
+    def test_complex_message_types_roundtrip_correctly(self, temp_project_dir):
+        """Tool use, images, thinking content survive JSON serialization."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Create entries with complex content types
+        entries = [
+            # Tool use
+            AssistantTranscriptEntry(
+                type="assistant",
+                uuid="tool-use-msg",
+                timestamp="2024-01-01T10:00:00Z",
+                sessionId="session-1",
+                version="1.0.0",
+                parentUuid=None,
+                isSidechain=False,
+                userType="assistant",
+                cwd="/test",
+                requestId="req-1",
+                message=AssistantMessage(
+                    id="msg-tool",
+                    type="message",
+                    role="assistant",
+                    model="claude-3",
+                    content=[
+                        ToolUseContent(
+                            type="tool_use",
+                            id="tool-123",
+                            name="read_file",
+                            input={"path": "/test/file.txt"},
+                        )
+                    ],
+                ),
+            ),
+            # Tool result
+            UserTranscriptEntry(
+                type="user",
+                uuid="tool-result-msg",
+                timestamp="2024-01-01T10:01:00Z",
+                sessionId="session-1",
+                version="1.0.0",
+                parentUuid="tool-use-msg",
+                isSidechain=False,
+                userType="tool_result",
+                cwd="/test",
+                message=UserMessage(
+                    role="user",
+                    content=[
+                        ToolResultContent(
+                            type="tool_result",
+                            tool_use_id="tool-123",
+                            content="File contents here",
+                        )
+                    ],
+                ),
+            ),
+            # Thinking content
+            AssistantTranscriptEntry(
+                type="assistant",
+                uuid="thinking-msg",
+                timestamp="2024-01-01T10:02:00Z",
+                sessionId="session-1",
+                version="1.0.0",
+                parentUuid=None,
+                isSidechain=False,
+                userType="assistant",
+                cwd="/test",
+                requestId="req-2",
+                message=AssistantMessage(
+                    id="msg-thinking",
+                    type="message",
+                    role="assistant",
+                    model="claude-3",
+                    content=[
+                        ThinkingContent(
+                            type="thinking",
+                            thinking="Let me think about this...",
+                        ),
+                        TextContent(type="text", text="Here's my answer"),
+                    ],
+                ),
+            ),
+        ]
+
+        # Save entries
+        jsonl_file = temp_project_dir / "complex.jsonl"
+        jsonl_file.write_text(
+            "\n".join(json.dumps(e.model_dump()) for e in entries),
+            encoding="utf-8",
+        )
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Load and compare
+        loaded = cache_manager.load_cached_entries(jsonl_file)
+        assert loaded is not None
+        assert len(loaded) == len(entries)
+
+        for original, loaded_entry in zip(entries, loaded):
+            # Compare key fields - exact serialization may differ due to default values
+            assert original.type == loaded_entry.type
+            assert original.uuid == loaded_entry.uuid
+            assert original.timestamp == loaded_entry.timestamp
+            assert original.sessionId == loaded_entry.sessionId
+
+            # For assistant entries, verify message content types are preserved
+            if hasattr(original, "message") and hasattr(original.message, "content"):
+                orig_content = original.message.content
+                loaded_content = loaded_entry.message.content
+                assert len(orig_content) == len(loaded_content)
+                for orig_item, loaded_item in zip(orig_content, loaded_content):
+                    assert orig_item.type == loaded_item.type
+
+
+class TestIndexUniquenessConstraints:
+    """Tests for UNIQUE constraints on indexes."""
+
+    def test_duplicate_file_name_in_project_fails(self, cache_manager):
+        """UNIQUE(project_id, file_name) enforced."""
+        with cache_manager._get_connection() as conn:
+            # Insert first file
+            conn.execute(
+                """
+                INSERT INTO cached_files (project_id, file_name, file_path, source_mtime, cached_mtime)
+                VALUES (?, 'duplicate.jsonl', '/path1', 0, 0)
+                """,
+                (cache_manager._project_id,),
+            )
+            conn.commit()
+
+            # Attempt to insert duplicate file name
+            with pytest.raises(sqlite3.IntegrityError):
+                conn.execute(
+                    """
+                    INSERT INTO cached_files (project_id, file_name, file_path, source_mtime, cached_mtime)
+                    VALUES (?, 'duplicate.jsonl', '/path2', 0, 0)
+                    """,
+                    (cache_manager._project_id,),
+                )
+
+    def test_duplicate_session_id_in_project_fails(self, cache_manager):
+        """UNIQUE(project_id, session_id) enforced."""
+        with cache_manager._get_connection() as conn:
+            # Insert first session
+            conn.execute(
+                """
+                INSERT INTO sessions (project_id, session_id, first_timestamp, last_timestamp)
+                VALUES (?, 'dup-session', '2024-01-01', '2024-01-01')
+                """,
+                (cache_manager._project_id,),
+            )
+            conn.commit()
+
+            # Attempt to insert duplicate session_id
+            with pytest.raises(sqlite3.IntegrityError):
+                conn.execute(
+                    """
+                    INSERT INTO sessions (project_id, session_id, first_timestamp, last_timestamp)
+                    VALUES (?, 'dup-session', '2024-01-02', '2024-01-02')
+                    """,
+                    (cache_manager._project_id,),
+                )
+
+
+class TestTimestampOrdering:
+    """Tests for message timestamp ordering."""
+
+    def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry):
+        """Messages retrieved in timestamp order."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Create entries with out-of-order timestamps
+        entries = []
+        timestamps = [
+            "2024-01-01T10:05:00Z",
+            "2024-01-01T10:01:00Z",
+            "2024-01-01T10:03:00Z",
+            "2024-01-01T10:02:00Z",
+            "2024-01-01T10:04:00Z",
+        ]
+
+        for i, ts in enumerate(timestamps):
+            entry = UserTranscriptEntry(
+                type="user",
+                uuid=f"user-{i}",
+                timestamp=ts,
+                sessionId="session-1",
+                version="1.0.0",
+                parentUuid=None,
+                isSidechain=False,
+                userType="external",
+                cwd="/test",
+                message=UserMessage(role="user", content=f"Message {i}"),
+            )
+            entries.append(entry)
+
+        jsonl_file = temp_project_dir / "order.jsonl"
+        jsonl_file.write_text(
+            "\n".join(json.dumps(e.model_dump()) for e in entries),
+            encoding="utf-8",
+        )
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Load and verify order
+        loaded = cache_manager.load_cached_entries(jsonl_file)
+        assert loaded is not None
+
+        loaded_timestamps = [
+            ts for e in loaded if (ts := getattr(e, "timestamp", None)) is not None
+        ]
+        assert loaded_timestamps == sorted(loaded_timestamps)
+
+
+class TestNullTokenHandling:
+    """Tests for NULL token value handling."""
+
+    def test_null_tokens_handled_in_aggregates(self, temp_project_dir):
+        """NULL token values don't corrupt sums."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Create mix of entries with and without tokens
+        entries = [
+            # Entry with tokens
+            AssistantTranscriptEntry(
+                type="assistant",
+                uuid="with-tokens",
+                timestamp="2024-01-01T10:00:00Z",
+                sessionId="session-1",
+                version="1.0.0",
+                parentUuid=None,
+                isSidechain=False,
+                userType="assistant",
+                cwd="/test",
+                requestId="req-1",
+                message=AssistantMessage(
+                    id="msg-1",
+                    type="message",
+                    role="assistant",
+                    model="claude-3",
+                    content=[TextContent(type="text", text="With tokens")],
+                    usage=UsageInfo(input_tokens=100, output_tokens=50),
+                ),
+            ),
+            # Entry without usage (NULL tokens)
+            UserTranscriptEntry(
+                type="user",
+                uuid="without-tokens",
+                timestamp="2024-01-01T10:01:00Z",
+                sessionId="session-1",
+                version="1.0.0",
+                parentUuid=None,
+                isSidechain=False,
+                userType="external",
+                cwd="/test",
+                message=UserMessage(role="user", content="No tokens"),
+            ),
+        ]
+
+        jsonl_file = temp_project_dir / "mixed.jsonl"
+        jsonl_file.write_text(
+            "\n".join(json.dumps(e.model_dump()) for e in entries),
+            encoding="utf-8",
+        )
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Query sums - COALESCE should handle NULLs
+        with cache_manager._get_connection() as conn:
+            row = conn.execute(
+                """
+                SELECT
+                    COALESCE(SUM(input_tokens), 0) as total_input,
+                    COALESCE(SUM(output_tokens), 0) as total_output
+                FROM messages
+                WHERE project_id = ?
+                """,
+                (cache_manager._project_id,),
+            ).fetchone()
+
+        # Should only count the entry with tokens
+        assert row["total_input"] == 100
+        assert row["total_output"] == 50
+
+
+class TestMessageFileRelationship:
+    """Tests for message-file relationships."""
+
+    def test_cached_file_message_count_matches_actual(
+        self, temp_project_dir, sample_user_entry, sample_assistant_entry
+    ):
+        """message_count column matches COUNT(*) FROM messages."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        entries = [sample_user_entry, sample_assistant_entry]
+        jsonl_file = temp_project_dir / "count.jsonl"
+        jsonl_file.write_text(
+            "\n".join(json.dumps(e.model_dump()) for e in entries),
+            encoding="utf-8",
+        )
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        with cache_manager._get_connection() as conn:
+            # Get stored message count
+            file_row = conn.execute(
+                "SELECT id, message_count FROM cached_files WHERE file_name = ?",
+                ("count.jsonl",),
+            ).fetchone()
+
+            # Get actual count
+            actual_count = conn.execute(
+                "SELECT COUNT(*) FROM messages WHERE file_id = ?",
+                (file_row["id"],),
+            ).fetchone()[0]
+
+        assert file_row["message_count"] == actual_count
+        assert file_row["message_count"] == len(entries)
+
+
+class TestWALMode:
+    """Tests for WAL journal mode."""
+
+    def test_wal_journal_mode_enabled(self, cache_manager):
+        """Verify WAL mode is active."""
+        with cache_manager._get_connection() as conn:
+            row = conn.execute("PRAGMA journal_mode").fetchone()
+            assert row[0] == "wal"
+
+
+class TestConcurrentAccess:
+    """Tests for concurrent database access."""
+
+    def test_concurrent_readers_dont_block(self, temp_project_dir):
+        """Multiple readers can access simultaneously."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Add some data
+        entry = UserTranscriptEntry(
+            type="user",
+            uuid="user-1",
+            timestamp="2024-01-01T10:00:00Z",
+            sessionId="session-1",
+            version="1.0.0",
+            parentUuid=None,
+            isSidechain=False,
+            userType="external",
+            cwd="/test",
+            message=UserMessage(role="user", content="Test"),
+        )
+
+        jsonl_file = temp_project_dir / "concurrent.jsonl"
+        jsonl_file.write_text(json.dumps(entry.model_dump()), encoding="utf-8")
+        cache_manager.save_cached_entries(jsonl_file, [entry])
+
+        results = []
+        errors = []
+
+        def read_data():
+            try:
+                cm = CacheManager(temp_project_dir, "1.0.0")
+                data = cm.get_cached_project_data()
+                results.append(data is not None)
+            except Exception as e:
+                errors.append(str(e))
+
+        # Start multiple reader threads
+        threads = [threading.Thread(target=read_data) for _ in range(5)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join(timeout=5)
+
+        assert len(errors) == 0, f"Errors occurred: {errors}"
+        assert all(results), "Not all reads succeeded"
+
+
+class TestLargeDatasetPerformance:
+    """Tests for performance with large datasets."""
+
+    def test_query_performance_with_large_dataset(self, temp_project_dir):
+        """Queries complete in reasonable time with large datasets."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Create 1000 entries (reduced from 10k for test speed)
+        entries = []
+        for i in range(1000):
+            entry = UserTranscriptEntry(
+                type="user",
+                uuid=f"user-{i}",
+                timestamp=f"2024-01-{(i % 30) + 1:02d}T{i % 24:02d}:00:00Z",
+                sessionId=f"session-{i % 10}",
+                version="1.0.0",
+                parentUuid=None,
+                isSidechain=False,
+                userType="external",
+                cwd="/test",
+                message=UserMessage(role="user", content=f"Message {i}"),
+            )
+            entries.append(entry)
+
+        jsonl_file = temp_project_dir / "large.jsonl"
+        jsonl_file.write_text(
+            "\n".join(json.dumps(e.model_dump()) for e in entries),
+            encoding="utf-8",
+        )
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Time filtered loading
+        start = time.time()
+        loaded = cache_manager.load_cached_entries_filtered(
+            jsonl_file, "2024-01-15", "2024-01-20"
+        )
+        elapsed = time.time() - start
+
+        assert loaded is not None
+        assert elapsed < 2.0, f"Query took too long: {elapsed:.2f}s"
+
+
+class TestSessionBoundaryDetection:
+    """Tests for session boundary correctness."""
+
+    def test_sessions_contain_correct_messages(self, temp_project_dir):
+        """Each session contains only its messages."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Create entries for multiple sessions
+        entries = []
+        for session_num in range(3):
+            for msg_num in range(5):
+                entry = UserTranscriptEntry(
+                    type="user",
+                    uuid=f"user-s{session_num}-m{msg_num}",
+                    timestamp=f"2024-01-01T{10 + session_num}:{msg_num * 10:02d}:00Z",
+                    sessionId=f"session-{session_num}",
+                    version="1.0.0",
+                    parentUuid=None,
+                    isSidechain=False,
+                    userType="external",
+                    cwd="/test",
+                    message=UserMessage(
+                        role="user",
+                        content=f"Session {session_num} message {msg_num}",
+                    ),
+                )
+                entries.append(entry)
+
+        jsonl_file = temp_project_dir / "sessions.jsonl"
+        jsonl_file.write_text(
+            "\n".join(json.dumps(e.model_dump()) for e in entries),
+            encoding="utf-8",
+        )
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Verify each session has exactly 5 messages
+        with cache_manager._get_connection() as conn:
+            for session_num in range(3):
+                count = conn.execute(
+                    "SELECT COUNT(*) FROM messages WHERE project_id = ? AND session_id = ?",
+                    (cache_manager._project_id, f"session-{session_num}"),
+                ).fetchone()[0]
+                assert count == 5, (
+                    f"Session {session_num} has {count} messages, expected 5"
+                )
+
+
+class TestCacheStatsAccuracy:
+    """Tests for cache statistics accuracy."""
+
+    def test_cache_stats_match_actual_counts(
+        self, temp_project_dir, sample_user_entry, sample_assistant_entry
+    ):
+        """get_cache_stats() returns accurate data."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        entries = [sample_user_entry, sample_assistant_entry]
+        jsonl_file = temp_project_dir / "stats.jsonl"
+        jsonl_file.write_text(
+            "\n".join(json.dumps(e.model_dump()) for e in entries),
+            encoding="utf-8",
+        )
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Update aggregates
+        cache_manager.update_project_aggregates(
+            total_message_count=2,
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=10,
+            total_cache_read_tokens=5,
+            earliest_timestamp="2024-01-01T10:00:00Z",
+            latest_timestamp="2024-01-01T10:01:00Z",
+        )
+
+        cache_manager.update_session_cache(
+            {
+                "session-1": SessionCacheData(
+                    session_id="session-1",
+                    summary=None,
+                    first_timestamp="2024-01-01T10:00:00Z",
+                    last_timestamp="2024-01-01T10:01:00Z",
+                    message_count=2,
+                    first_user_message="Hello, world!",
+                )
+            }
+        )
+
+        stats = cache_manager.get_cache_stats()
+
+        assert stats["cache_enabled"] is True
+        assert stats["cached_files_count"] == 1
+        assert stats["total_cached_messages"] == 2
+        assert stats["total_sessions"] == 1
+
+
+class TestWorkingDirectoryQuery:
+    """Tests for working directory queries."""
+
+    def test_get_working_directories_returns_distinct_cwds(self, temp_project_dir):
+        """get_working_directories() returns unique values."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        # Create sessions with duplicate cwds
+        cache_manager.update_session_cache(
+            {
+                "session-1": SessionCacheData(
+                    session_id="session-1",
+                    summary=None,
+                    first_timestamp="2024-01-01T10:00:00Z",
+                    last_timestamp="2024-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    cwd="/path/to/project",
+                ),
+                "session-2": SessionCacheData(
+                    session_id="session-2",
+                    summary=None,
+                    first_timestamp="2024-01-02T10:00:00Z",
+                    last_timestamp="2024-01-02T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    cwd="/path/to/project",  # Same cwd
+                ),
+                "session-3": SessionCacheData(
+                    session_id="session-3",
+                    summary=None,
+                    first_timestamp="2024-01-03T10:00:00Z",
+                    last_timestamp="2024-01-03T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    cwd="/different/path",
+                ),
+            }
+        )
+
+        cwds = cache_manager.get_working_directories()
+
+        # Should be deduplicated
+        assert len(cwds) == 2
+        assert set(cwds) == {"/path/to/project", "/different/path"}
+
+
+class TestFileModificationDetection:
+    """Tests for file modification time detection."""
+
+    def test_mtime_change_invalidates_cache(self, temp_project_dir, sample_user_entry):
+        """Changing file mtime marks cache as stale."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        jsonl_file = temp_project_dir / "mtime.jsonl"
+        jsonl_file.write_text(
+            json.dumps(sample_user_entry.model_dump()), encoding="utf-8"
+        )
+        cache_manager.save_cached_entries(jsonl_file, [sample_user_entry])
+
+        # Verify cache is valid
+        assert cache_manager.is_file_cached(jsonl_file) is True
+
+        # Wait and touch file to change mtime
+        time.sleep(1.1)
+        jsonl_file.write_text(
+            json.dumps(sample_user_entry.model_dump()) + "\n", encoding="utf-8"
+        )
+
+        # Cache should be invalidated
+        assert cache_manager.is_file_cached(jsonl_file) is False
+
+
+class TestMigrationIntegrity:
+    """Tests for migration system integrity."""
+
+    def test_migration_checksum_stored(self, temp_project_dir):
+        """Migration checksums are stored in _schema_version."""
+        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+
+        with cache_manager._get_connection() as conn:
+            rows = conn.execute(
+                "SELECT version, filename, checksum FROM _schema_version"
+            ).fetchall()
+
+        assert len(rows) >= 1
+        for row in rows:
+            assert row["version"] > 0
+            assert row["filename"].endswith(".sql")
+            assert len(row["checksum"]) == 64  # SHA256 hex length
+
+    def test_migration_applied_only_once(self, temp_project_dir):
+        """Migrations are not re-applied on subsequent runs."""
+        # First run
+        cm1 = CacheManager(temp_project_dir, "1.0.0")
+
+        with cm1._get_connection() as conn:
+            initial_count = conn.execute(
+                "SELECT COUNT(*) FROM _schema_version"
+            ).fetchone()[0]
+
+        # Second run
+        cm2 = CacheManager(temp_project_dir, "1.0.0")
+
+        with cm2._get_connection() as conn:
+            final_count = conn.execute(
+                "SELECT COUNT(*) FROM _schema_version"
+            ).fetchone()[0]
+
+        assert initial_count == final_count
diff --git a/test/test_html_regeneration.py b/test/test_html_regeneration.py
index 26da5d32..a0ade6c3 100644
--- a/test/test_html_regeneration.py
+++ b/test/test_html_regeneration.py
@@ -48,9 +48,13 @@ def test_combined_transcript_regeneration_on_jsonl_change(self, tmp_path):
         # Second run: No changes, should skip regeneration
         with patch("builtins.print") as mock_print:
             convert_jsonl_to_html(project_dir)
-            mock_print.assert_any_call(
-                "HTML file combined_transcripts.html is current, skipping regeneration"
+            # Check for either the per-file skip message (legacy) or the early-exit message (new)
+            printed_calls = [str(call) for call in mock_print.call_args_list]
+            skip_found = any(
+                "skipping regeneration" in call or "All HTML files are current" in call
+                for call in printed_calls
             )
+            assert skip_found, f"Expected skip message, got: {printed_calls}"
 
         # Verify file wasn't regenerated
         assert output_file.stat().st_mtime == original_mtime
@@ -101,15 +105,13 @@ def test_individual_session_regeneration_on_jsonl_change(self, tmp_path):
         # Second run: No changes, should skip regeneration
         with patch("builtins.print") as mock_print:
             convert_jsonl_to_html(project_dir, generate_individual_sessions=True)
-            # Check that session file regeneration was skipped
+            # Check for either per-file skip message (legacy) or early-exit message (new)
             printed_calls = [str(call) for call in mock_print.call_args_list]
-            session_skip_found = any(
-                "Session file" in call and "skipping regeneration" in call
+            skip_found = any(
+                "skipping regeneration" in call or "All HTML files are current" in call
                 for call in printed_calls
             )
-            assert session_skip_found, (
-                f"Expected session skip message, got: {printed_calls}"
-            )
+            assert skip_found, f"Expected skip message, got: {printed_calls}"
 
         # Verify file wasn't regenerated
         assert session_file.stat().st_mtime == original_mtime
@@ -170,7 +172,7 @@ def test_projects_index_regeneration_on_jsonl_change(self, tmp_path):
 
         # Second run: No changes, should skip regeneration
         with patch("builtins.print") as mock_print:
-            process_projects_hierarchy(projects_dir)
+            process_projects_hierarchy(projects_dir, silent=False)
             mock_print.assert_any_call("Index HTML is current, skipping regeneration")
 
         # Verify file wasn't regenerated
@@ -314,3 +316,177 @@ def test_single_file_mode_regeneration_behavior(self, tmp_path):
 
         # Verify file wasn't regenerated (this is expected behavior for single file mode)
         assert output_file.stat().st_mtime == original_mtime
+
+
+class TestIncrementalHtmlCache:
+    """Test incremental HTML regeneration using html_cache table."""
+
+    def test_html_cache_tracks_generation(self, tmp_path):
+        """Test that html_cache table correctly tracks when HTML was generated."""
+        # Setup project with JSONL
+        project_dir = tmp_path / "test_project"
+        project_dir.mkdir()
+
+        test_data_dir = Path(__file__).parent / "test_data"
+        jsonl_file = project_dir / "test.jsonl"
+        jsonl_file.write_text(
+            (test_data_dir / "representative_messages.jsonl").read_text(
+                encoding="utf-8"
+            ),
+            encoding="utf-8",
+        )
+
+        # Generate HTML
+        convert_jsonl_to_html(project_dir, generate_individual_sessions=True)
+
+        # Check that html_cache has entries
+        cache_manager = CacheManager(project_dir, get_library_version())
+        combined_cache = cache_manager.get_html_cache("combined_transcripts.html")
+
+        assert combined_cache is not None
+        assert combined_cache.html_path == "combined_transcripts.html"
+        assert combined_cache.source_session_id is None  # Combined has no session
+        assert combined_cache.message_count > 0
+
+        # Check session HTML cache
+        session_files = list(project_dir.glob("session-*.html"))
+        assert len(session_files) >= 1
+
+        session_file = session_files[0]
+        session_id = session_file.stem.replace("session-", "")
+        session_cache = cache_manager.get_html_cache(session_file.name)
+
+        assert session_cache is not None
+        assert session_cache.source_session_id == session_id
+        assert session_cache.message_count > 0
+
+    def test_incremental_regeneration_only_updates_changed_sessions(self, tmp_path):
+        """Test that only sessions with changes are regenerated."""
+        # Setup project with two JSONL files (different sessions)
+        project_dir = tmp_path / "test_project"
+        project_dir.mkdir()
+
+        # Create two sessions in separate files
+        session1 = (
+            '{"type":"user","timestamp":"2025-01-01T10:00:00Z","parentUuid":null,'
+            '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"session1",'
+            '"version":"1.0.0","uuid":"msg1","message":{"role":"user",'
+            '"content":[{"type":"text","text":"Session 1 message"}]}}\n'
+        )
+        session2 = (
+            '{"type":"user","timestamp":"2025-01-01T11:00:00Z","parentUuid":null,'
+            '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"session2",'
+            '"version":"1.0.0","uuid":"msg2","message":{"role":"user",'
+            '"content":[{"type":"text","text":"Session 2 message"}]}}\n'
+        )
+
+        (project_dir / "session1.jsonl").write_text(session1, encoding="utf-8")
+        (project_dir / "session2.jsonl").write_text(session2, encoding="utf-8")
+
+        # First run: Generate all HTML
+        convert_jsonl_to_html(project_dir, generate_individual_sessions=True)
+
+        session1_html = project_dir / "session-session1.html"
+        session2_html = project_dir / "session-session2.html"
+        assert session1_html.exists()
+        assert session2_html.exists()
+
+        session1_mtime = session1_html.stat().st_mtime
+        session2_mtime = session2_html.stat().st_mtime
+
+        # Wait and modify only session1
+        time.sleep(1.1)  # Ensure > 1.0 second difference
+        new_msg = (
+            '{"type":"user","timestamp":"2025-01-01T10:05:00Z","parentUuid":"msg1",'
+            '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"session1",'
+            '"version":"1.0.0","uuid":"msg1b","message":{"role":"user",'
+            '"content":[{"type":"text","text":"New message in session 1"}]}}\n'
+        )
+        with open(project_dir / "session1.jsonl", "a", encoding="utf-8") as f:
+            f.write(new_msg)
+
+        # Second run: Should only regenerate session1
+        convert_jsonl_to_html(project_dir, generate_individual_sessions=True)
+
+        # Session 1 should be regenerated (newer mtime)
+        assert session1_html.stat().st_mtime > session1_mtime
+
+        # Session 2 should NOT be regenerated (same mtime)
+        assert session2_html.stat().st_mtime == session2_mtime
+
+    def test_html_cache_detects_library_version_change(self, tmp_path):
+        """Test that HTML is regenerated when library version changes."""
+        # Setup project
+        project_dir = tmp_path / "test_project"
+        project_dir.mkdir()
+
+        test_data_dir = Path(__file__).parent / "test_data"
+        jsonl_file = project_dir / "test.jsonl"
+        jsonl_file.write_text(
+            (test_data_dir / "representative_messages.jsonl").read_text(
+                encoding="utf-8"
+            ),
+            encoding="utf-8",
+        )
+
+        # Generate HTML with current version
+        convert_jsonl_to_html(project_dir)
+
+        cache_manager = CacheManager(project_dir, get_library_version())
+
+        # Check staleness with same version
+        is_stale, reason = cache_manager.is_html_stale("combined_transcripts.html")
+        assert not is_stale
+        assert reason == "up_to_date"
+
+        # Create new cache manager with different version
+        cache_manager_new = CacheManager(project_dir, "999.999.999")
+        is_stale, reason = cache_manager_new.is_html_stale("combined_transcripts.html")
+        assert is_stale
+        assert reason == "version_mismatch"
+
+    def test_html_cache_message_count_validation(self, tmp_path):
+        """Test that session HTML regenerates when message count changes."""
+        # Setup project
+        project_dir = tmp_path / "test_project"
+        project_dir.mkdir()
+
+        session_msg = (
+            '{"type":"user","timestamp":"2025-01-01T10:00:00Z","parentUuid":null,'
+            '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"test_session",'
+            '"version":"1.0.0","uuid":"msg1","message":{"role":"user",'
+            '"content":[{"type":"text","text":"Test message"}]}}\n'
+        )
+        (project_dir / "test.jsonl").write_text(session_msg, encoding="utf-8")
+
+        # Generate HTML
+        convert_jsonl_to_html(project_dir, generate_individual_sessions=True)
+
+        cache_manager = CacheManager(project_dir, get_library_version())
+
+        # Check session is not stale
+        is_stale, reason = cache_manager.is_html_stale(
+            "session-test_session.html", "test_session"
+        )
+        assert not is_stale
+
+        # Add a new message (increases count)
+        time.sleep(1.1)
+        new_msg = (
+            '{"type":"user","timestamp":"2025-01-01T10:01:00Z","parentUuid":"msg1",'
+            '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"test_session",'
+            '"version":"1.0.0","uuid":"msg2","message":{"role":"user",'
+            '"content":[{"type":"text","text":"Second message"}]}}\n'
+        )
+        with open(project_dir / "test.jsonl", "a", encoding="utf-8") as f:
+            f.write(new_msg)
+
+        # Update cache to reflect new message
+        ensure_fresh_cache(project_dir, cache_manager)
+
+        # Now session should be stale (message count changed)
+        is_stale, reason = cache_manager.is_html_stale(
+            "session-test_session.html", "test_session"
+        )
+        assert is_stale
+        assert reason == "session_updated"
diff --git a/test/test_integration_realistic.py b/test/test_integration_realistic.py
index 4431024c..1c051290 100644
--- a/test/test_integration_realistic.py
+++ b/test/test_integration_realistic.py
@@ -183,7 +183,9 @@ def test_projects_dont_merge_by_prefix(self, temp_projects_copy: Path) -> None:
                 content=f"test message {suffix}",
                 session_id=f"session{suffix}",
             )
-            (project_dir / "test.jsonl").write_text(json.dumps(entry) + "\n")
+            (project_dir / "test.jsonl").write_text(
+                json.dumps(entry) + "\n", encoding="utf-8"
+            )
 
         # Process all projects
         process_projects_hierarchy(temp_projects_copy)
@@ -243,13 +245,9 @@ def test_clear_cache_with_projects_dir(self, temp_projects_copy: Path) -> None:
         )
         assert result.exit_code == 0
 
-        # Verify caches were created
-        cache_exists = False
-        for project_dir in temp_projects_copy.iterdir():
-            if project_dir.is_dir() and (project_dir / "cache").exists():
-                cache_exists = True
-                break
-        assert cache_exists, "Cache should exist after processing"
+        # Verify SQLite cache was created
+        cache_db = temp_projects_copy / "cache.db"
+        assert cache_db.exists(), "SQLite cache should exist after processing"
 
         # Clear caches
         result = runner.invoke(
@@ -265,18 +263,8 @@ def test_clear_cache_with_projects_dir(self, temp_projects_copy: Path) -> None:
         assert result.exit_code == 0
         assert "clear" in result.output.lower()
 
-        # Verify all cache files were actually deleted
-        remaining_cache_files: list[Path] = []
-        for project_dir in temp_projects_copy.iterdir():
-            if not project_dir.is_dir():
-                continue
-            cache_dir = project_dir / "cache"
-            if cache_dir.exists():
-                remaining_cache_files.extend(cache_dir.glob("*.json"))
-
-        assert not remaining_cache_files, (
-            f"Cache files should be deleted but found: {remaining_cache_files}"
-        )
+        # Verify SQLite database was deleted
+        assert not cache_db.exists(), "SQLite cache database should be deleted"
 
     def test_clear_html_with_projects_dir(self, temp_projects_copy: Path) -> None:
         """Test HTML clearing with custom projects directory."""
@@ -430,17 +418,19 @@ def test_cache_creation_all_projects(self, temp_projects_copy: Path) -> None:
         """Test cache is created correctly for all projects."""
         process_projects_hierarchy(temp_projects_copy)
 
+        # Verify SQLite cache database was created
+        cache_db = temp_projects_copy / "cache.db"
+        assert cache_db.exists(), "SQLite cache database should exist"
+
         for project_dir in temp_projects_copy.iterdir():
             if not project_dir.is_dir() or not list(project_dir.glob("*.jsonl")):
                 continue
 
-            cache_file = project_dir / "cache" / "index.json"
-            assert cache_file.exists(), f"Cache index missing for {project_dir.name}"
-
-            # Verify cache structure
-            cache_data = json.loads(cache_file.read_text())
-            assert "version" in cache_data
-            assert "sessions" in cache_data
+            cache_manager = CacheManager(project_dir, get_library_version())
+            cached_data = cache_manager.get_cached_project_data()
+            assert cached_data is not None, f"Cache missing for {project_dir.name}"
+            assert cached_data.version is not None
+            assert isinstance(cached_data.sessions, dict)
 
     def test_cache_invalidation_on_modification(self, temp_projects_copy: Path) -> None:
         """Test cache detects file modifications."""
@@ -459,12 +449,14 @@ def test_cache_invalidation_on_modification(self, temp_projects_copy: Path) -> N
 
         # Modify a file
         test_file = jsonl_files[0]
-        original_content = test_file.read_text()
+        original_content = test_file.read_text(encoding="utf-8")
         entry = make_valid_user_entry(
             content="test modification",
             session_id="test-modification",
         )
-        test_file.write_text(original_content + "\n" + json.dumps(entry) + "\n")
+        test_file.write_text(
+            original_content + "\n" + json.dumps(entry) + "\n", encoding="utf-8"
+        )
 
         # Check if modification is detected
         modified = cache_manager.get_modified_files(list(project_dir.glob("*.jsonl")))
@@ -478,11 +470,11 @@ def test_cache_version_stored(self, temp_projects_copy: Path) -> None:
 
         convert_jsonl_to_html(project_dir)
 
-        cache_file = project_dir / "cache" / "index.json"
-        cache_data = json.loads(cache_file.read_text())
+        cache_manager = CacheManager(project_dir, get_library_version())
+        cached_data = cache_manager.get_cached_project_data()
 
-        assert "version" in cache_data
-        assert cache_data["version"] == get_library_version()
+        assert cached_data is not None
+        assert cached_data.version == get_library_version()
 
 
 @pytest.mark.integration
@@ -506,7 +498,9 @@ def test_worktree_projects_stay_separate(self, temp_projects_copy: Path) -> None
                 content=f"worktree test {suffix}",
                 session_id=f"session{suffix}",
             )
-            (project_dir / "test.jsonl").write_text(json.dumps(entry) + "\n")
+            (project_dir / "test.jsonl").write_text(
+                json.dumps(entry) + "\n", encoding="utf-8"
+            )
 
         # Process all
         process_projects_hierarchy(temp_projects_copy)
@@ -671,15 +665,15 @@ def test_cache_version_mismatch_triggers_rebuild(
             pytest.skip("Cache not generated by fixture")
 
         # Corrupt version in cache
-        cache_data = json.loads(cache_index.read_text())
+        cache_data = json.loads(cache_index.read_text(encoding="utf-8"))
         cache_data["version"] = "0.0.0-fake"
-        cache_index.write_text(json.dumps(cache_data))
+        cache_index.write_text(json.dumps(cache_data), encoding="utf-8")
 
         # Process should rebuild cache
         convert_jsonl_to_html(project)
 
         # Cache should have correct version now
-        new_cache_data = json.loads(cache_index.read_text())
+        new_cache_data = json.loads(cache_index.read_text(encoding="utf-8"))
         assert new_cache_data["version"] == get_library_version()
 
     def test_missing_cache_files_regenerated(self, projects_with_cache: Path) -> None:
@@ -761,7 +755,7 @@ def test_adding_lines_triggers_cache_update(
             content="New message added",
             session_id="test-incremental",
         )
-        with open(jsonl_file, "a") as f:
+        with open(jsonl_file, "a", encoding="utf-8") as f:
             f.write("\n" + json.dumps(entry) + "\n")
 
         time.sleep(0.01)
@@ -796,7 +790,7 @@ def test_adding_lines_triggers_html_regeneration(
             content="Another new message",
             session_id="test-incremental",
         )
-        with open(jsonl_file, "a") as f:
+        with open(jsonl_file, "a", encoding="utf-8") as f:
             f.write("\n" + json.dumps(entry) + "\n")
 
         time.sleep(0.01)
@@ -825,7 +819,7 @@ def test_new_content_appears_in_html(self, projects_with_cache: Path) -> None:
             content=unique_content,
             session_id="test-content-check",
         )
-        with open(jsonl_file, "a") as f:
+        with open(jsonl_file, "a", encoding="utf-8") as f:
             f.write("\n" + json.dumps(entry) + "\n")
 
         convert_jsonl_to_html(project)
@@ -848,7 +842,7 @@ def test_new_file_detected_and_processed(self, projects_with_cache: Path) -> Non
         if not cache_index.exists():
             pytest.skip("Cache not generated by fixture")
 
-        original_cache = json.loads(cache_index.read_text())
+        original_cache = json.loads(cache_index.read_text(encoding="utf-8"))
         original_session_count = len(original_cache.get("sessions", {}))
 
         # Add new JSONL file
@@ -857,12 +851,12 @@ def test_new_file_detected_and_processed(self, projects_with_cache: Path) -> Non
             content="First message in new file",
             session_id="brand-new-session",
         )
-        new_file.write_text(json.dumps(entry) + "\n")
+        new_file.write_text(json.dumps(entry) + "\n", encoding="utf-8")
 
         convert_jsonl_to_html(project)
 
         # Cache should include new session
-        new_cache = json.loads(cache_index.read_text())
+        new_cache = json.loads(cache_index.read_text(encoding="utf-8"))
         assert len(new_cache.get("sessions", {})) > original_session_count
 
     def test_new_session_html_generated(self, projects_with_cache: Path) -> None:
@@ -877,7 +871,7 @@ def test_new_session_html_generated(self, projects_with_cache: Path) -> None:
             content="Message for new session",
             session_id=new_session_id,
         )
-        new_file.write_text(json.dumps(entry) + "\n")
+        new_file.write_text(json.dumps(entry) + "\n", encoding="utf-8")
 
         convert_jsonl_to_html(project)
 
@@ -905,7 +899,7 @@ def test_index_html_updated_with_new_project_stats(
             content="Extra session message",
             session_id="extra-session",
         )
-        new_file.write_text(json.dumps(entry) + "\n")
+        new_file.write_text(json.dumps(entry) + "\n", encoding="utf-8")
 
         time.sleep(0.01)
 
@@ -963,7 +957,7 @@ def test_output_overwrites_existing(self, temp_projects_copy: Path) -> None:
             pytest.skip("JSSoundRecorder test data not available")
 
         custom_output = temp_projects_copy / "overwrite_test.html"
-        custom_output.write_text("original content")
+        custom_output.write_text("original content", encoding="utf-8")
 
         result = runner.invoke(main, [str(project), "-o", str(custom_output)])
 
@@ -1127,7 +1121,7 @@ def test_index_regenerated_when_project_cache_updates(
             content="Trigger index update",
             session_id="index-test",
         )
-        with open(jsonl_file, "a") as f:
+        with open(jsonl_file, "a", encoding="utf-8") as f:
             f.write(json.dumps(entry) + "\n")
 
         time.sleep(0.01)
@@ -1198,13 +1192,13 @@ def test_corrupted_cache_index_handled(self, projects_with_cache: Path) -> None:
             pytest.skip("Cache not generated by fixture")
 
         # Corrupt the cache index
-        cache_index.write_text("{invalid json")
+        cache_index.write_text("{invalid json", encoding="utf-8")
 
         # Should recover and reprocess
         convert_jsonl_to_html(project)
 
         # Cache should be valid again
-        cache_data = json.loads(cache_index.read_text())
+        cache_data = json.loads(cache_index.read_text(encoding="utf-8"))
         assert "version" in cache_data
 
     def test_missing_cache_directory_handled(self, projects_with_cache: Path) -> None:
@@ -1510,7 +1504,7 @@ def test_project_with_many_sessions(self, temp_projects_copy: Path) -> None:
                 session_id=f"stress-{i}",
                 timestamp=f"2024-12-{10 + i % 20:02d}T10:00:00Z",
             )
-            session_file.write_text(json.dumps(entry) + "\n")
+            session_file.write_text(json.dumps(entry) + "\n", encoding="utf-8")
 
         # Should handle many files without error
         convert_jsonl_to_html(project)
@@ -1526,7 +1520,7 @@ def test_large_single_session(self, temp_projects_copy: Path) -> None:
         large_file = project / "large-session.jsonl"
 
         # Create file with 500 messages
-        with open(large_file, "w") as f:
+        with open(large_file, "w", encoding="utf-8") as f:
             for i in range(500):
                 timestamp = f"2024-12-15T{10 + (i // 60):02d}:{i % 60:02d}:00Z"
                 content = f"Message number {i} with some content"
diff --git a/test/test_pagination.py b/test/test_pagination.py
new file mode 100644
index 00000000..44db8b0c
--- /dev/null
+++ b/test/test_pagination.py
@@ -0,0 +1,550 @@
+#!/usr/bin/env python3
+"""Tests for pagination functionality."""
+
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from claude_code_log.cache import (
+    CacheManager,
+    SessionCacheData,
+)
+from claude_code_log.converter import (
+    _get_page_html_path,
+    _assign_sessions_to_pages,
+)
+
+
+class TestPageHtmlPath:
+    """Tests for _get_page_html_path function."""
+
+    def test_page_1_returns_base_filename(self):
+        """Page 1 should return combined_transcripts.html."""
+        assert _get_page_html_path(1) == "combined_transcripts.html"
+
+    def test_page_2_returns_numbered_filename(self):
+        """Page 2 should return combined_transcripts_2.html."""
+        assert _get_page_html_path(2) == "combined_transcripts_2.html"
+
+    def test_page_10_returns_numbered_filename(self):
+        """Page 10 should return combined_transcripts_10.html."""
+        assert _get_page_html_path(10) == "combined_transcripts_10.html"
+
+
+class TestAssignSessionsToPages:
+    """Tests for _assign_sessions_to_pages function."""
+
+    def _make_session(
+        self, session_id: str, message_count: int, timestamp: str
+    ) -> SessionCacheData:
+        """Helper to create a SessionCacheData instance."""
+        return SessionCacheData(
+            session_id=session_id,
+            message_count=message_count,
+            first_timestamp=timestamp,
+            last_timestamp=timestamp,
+            first_user_message="Test message",
+            total_input_tokens=0,
+            total_output_tokens=0,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+
+    def test_single_session_below_threshold(self):
+        """Single session below page_size should result in one page."""
+        sessions = {
+            "s1": self._make_session("s1", 100, "2023-01-01T10:00:00Z"),
+        }
+        pages = _assign_sessions_to_pages(sessions, page_size=5000)
+
+        assert len(pages) == 1
+        assert pages[0] == ["s1"]
+
+    def test_multiple_sessions_below_threshold(self):
+        """Multiple sessions below page_size should be on one page."""
+        sessions = {
+            "s1": self._make_session("s1", 1000, "2023-01-01T10:00:00Z"),
+            "s2": self._make_session("s2", 2000, "2023-01-02T10:00:00Z"),
+            "s3": self._make_session("s3", 1500, "2023-01-03T10:00:00Z"),
+        }
+        pages = _assign_sessions_to_pages(sessions, page_size=5000)
+
+        assert len(pages) == 1
+        assert sorted(pages[0]) == ["s1", "s2", "s3"]
+
+    def test_session_exceeds_threshold_creates_new_page(self):
+        """When adding a session exceeds threshold, it becomes last on current page."""
+        sessions = {
+            "s1": self._make_session("s1", 3000, "2023-01-01T10:00:00Z"),
+            "s2": self._make_session("s2", 3000, "2023-01-02T10:00:00Z"),
+            "s3": self._make_session("s3", 2000, "2023-01-03T10:00:00Z"),
+        }
+        pages = _assign_sessions_to_pages(sessions, page_size=5000)
+
+        # s1 (3000) + s2 (3000) > 5000, so s2 becomes last on page 1
+        # s3 (2000) goes to page 2
+        assert len(pages) == 2
+        assert pages[0] == ["s1", "s2"]
+        assert pages[1] == ["s3"]
+
+    def test_large_session_allows_overflow(self):
+        """A single large session is allowed to exceed page_size (no splitting)."""
+        sessions = {
+            "s1": self._make_session("s1", 10000, "2023-01-01T10:00:00Z"),
+        }
+        pages = _assign_sessions_to_pages(sessions, page_size=5000)
+
+        # Single session, even if large, stays on one page
+        assert len(pages) == 1
+        assert pages[0] == ["s1"]
+
+    def test_sessions_sorted_chronologically(self):
+        """Sessions should be assigned to pages in chronological order."""
+        sessions = {
+            "s3": self._make_session("s3", 1000, "2023-01-03T10:00:00Z"),
+            "s1": self._make_session("s1", 1000, "2023-01-01T10:00:00Z"),
+            "s2": self._make_session("s2", 1000, "2023-01-02T10:00:00Z"),
+        }
+        pages = _assign_sessions_to_pages(sessions, page_size=5000)
+
+        assert len(pages) == 1
+        # Should be in chronological order
+        assert pages[0] == ["s1", "s2", "s3"]
+
+    def test_multiple_pages_with_overflow(self):
+        """Test complex pagination with multiple pages."""
+        sessions = {
+            "s1": self._make_session("s1", 2000, "2023-01-01T10:00:00Z"),
+            "s2": self._make_session("s2", 4000, "2023-01-02T10:00:00Z"),  # exceeds
+            "s3": self._make_session("s3", 3000, "2023-01-03T10:00:00Z"),
+            "s4": self._make_session("s4", 3000, "2023-01-04T10:00:00Z"),  # exceeds
+            "s5": self._make_session("s5", 1000, "2023-01-05T10:00:00Z"),
+        }
+        pages = _assign_sessions_to_pages(sessions, page_size=5000)
+
+        # s1 (2000) + s2 (4000) > 5000, s2 last on page 1
+        # s3 (3000) + s4 (3000) > 5000, s4 last on page 2
+        # s5 (1000) on page 3
+        assert len(pages) == 3
+        assert pages[0] == ["s1", "s2"]
+        assert pages[1] == ["s3", "s4"]
+        assert pages[2] == ["s5"]
+
+    def test_empty_sessions(self):
+        """Empty sessions dict should return empty list."""
+        pages = _assign_sessions_to_pages({}, page_size=5000)
+        assert pages == []
+
+
+@pytest.fixture
+def temp_project_dir():
+    """Create a temporary project directory for testing."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        yield Path(temp_dir)
+
+
+@pytest.fixture
+def mock_version():
+    """Mock library version for consistent testing."""
+    return "1.0.0-test"
+
+
+@pytest.fixture
+def cache_manager(temp_project_dir, mock_version):
+    """Create a cache manager for testing."""
+    with patch("claude_code_log.cache.get_library_version", return_value=mock_version):
+        return CacheManager(temp_project_dir, mock_version)
+
+
+class TestPageCacheMethods:
+    """Tests for page cache methods in CacheManager."""
+
+    def test_get_page_count_empty(self, cache_manager):
+        """get_page_count should return 0 when no pages exist."""
+        assert cache_manager.get_page_count() == 0
+
+    def test_get_page_size_config_empty(self, cache_manager):
+        """get_page_size_config should return None when no pages exist."""
+        assert cache_manager.get_page_size_config() is None
+
+    def test_update_and_get_page_cache(self, cache_manager):
+        """Test updating and retrieving page cache data."""
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1", "s2"],
+            message_count=3000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-02T10:00:00Z",
+            total_input_tokens=1000,
+            total_output_tokens=500,
+            total_cache_creation_tokens=200,
+            total_cache_read_tokens=100,
+        )
+
+        page_data = cache_manager.get_page_data(1)
+        assert page_data is not None
+        assert page_data.page_number == 1
+        assert page_data.html_path == "combined_transcripts.html"
+        assert page_data.page_size_config == 5000
+        assert page_data.session_ids == ["s1", "s2"]
+        assert page_data.message_count == 3000
+        assert page_data.first_timestamp == "2023-01-01T10:00:00Z"
+        assert page_data.last_timestamp == "2023-01-02T10:00:00Z"
+        assert page_data.total_input_tokens == 1000
+        assert page_data.total_output_tokens == 500
+
+    def test_get_page_count_after_adding_pages(self, cache_manager):
+        """get_page_count should return correct count after adding pages."""
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+        cache_manager.update_page_cache(
+            page_number=2,
+            html_path="combined_transcripts_2.html",
+            page_size_config=5000,
+            session_ids=["s2"],
+            message_count=2000,
+            first_timestamp="2023-01-02T10:00:00Z",
+            last_timestamp="2023-01-02T11:00:00Z",
+            total_input_tokens=200,
+            total_output_tokens=100,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+
+        assert cache_manager.get_page_count() == 2
+
+    def test_get_page_size_config_after_adding_page(self, cache_manager):
+        """get_page_size_config should return the configured page size."""
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+
+        assert cache_manager.get_page_size_config() == 5000
+
+    def test_is_page_stale_no_cache(self, cache_manager):
+        """is_page_stale should return True when page not in cache."""
+        is_stale, reason = cache_manager.is_page_stale(1, 5000)
+        assert is_stale is True
+        assert "not_cached" in reason or "not in cache" in reason.lower()
+
+    def test_is_page_stale_page_size_changed(self, cache_manager):
+        """is_page_stale should return True when page_size changed."""
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+
+        is_stale, reason = cache_manager.is_page_stale(1, 10000)  # Different page_size
+        assert is_stale is True
+        assert "page_size" in reason.lower() or "size" in reason.lower()
+
+    def test_invalidate_all_pages(self, cache_manager):
+        """invalidate_all_pages should remove all page cache entries."""
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+        cache_manager.update_page_cache(
+            page_number=2,
+            html_path="combined_transcripts_2.html",
+            page_size_config=5000,
+            session_ids=["s2"],
+            message_count=2000,
+            first_timestamp="2023-01-02T10:00:00Z",
+            last_timestamp="2023-01-02T11:00:00Z",
+            total_input_tokens=200,
+            total_output_tokens=100,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+
+        old_paths = cache_manager.invalidate_all_pages()
+
+        assert len(old_paths) == 2
+        assert cache_manager.get_page_count() == 0
+        assert cache_manager.get_page_data(1) is None
+        assert cache_manager.get_page_data(2) is None
+
+    def test_get_all_pages(self, cache_manager):
+        """get_all_pages should return all page cache entries."""
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+        cache_manager.update_page_cache(
+            page_number=2,
+            html_path="combined_transcripts_2.html",
+            page_size_config=5000,
+            session_ids=["s2"],
+            message_count=2000,
+            first_timestamp="2023-01-02T10:00:00Z",
+            last_timestamp="2023-01-02T11:00:00Z",
+            total_input_tokens=200,
+            total_output_tokens=100,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+
+        all_pages = cache_manager.get_all_pages()
+
+        assert len(all_pages) == 2
+        assert all_pages[0].page_number == 1
+        assert all_pages[1].page_number == 2
+
+
+# Integration tests for pagination with converter
+
+
+def _create_session_messages(session_id: str, num_messages: int, base_timestamp: str):
+    """Helper to create messages for a session."""
+    messages = []
+    for i in range(num_messages):
+        # Alternate between user and assistant messages
+        if i % 2 == 0:
+            messages.append(
+                {
+                    "type": "user",
+                    "uuid": f"{session_id}-user-{i}",
+                    "timestamp": f"{base_timestamp}T{10 + i // 60:02d}:{i % 60:02d}:00Z",
+                    "sessionId": session_id,
+                    "version": "1.0.0",
+                    "parentUuid": None,
+                    "isSidechain": False,
+                    "userType": "user",
+                    "cwd": "/test",
+                    "message": {"role": "user", "content": f"Message {i} from user"},
+                }
+            )
+        else:
+            messages.append(
+                {
+                    "type": "assistant",
+                    "uuid": f"{session_id}-assistant-{i}",
+                    "timestamp": f"{base_timestamp}T{10 + i // 60:02d}:{i % 60:02d}:00Z",
+                    "sessionId": session_id,
+                    "version": "1.0.0",
+                    "parentUuid": None,
+                    "isSidechain": False,
+                    "userType": "assistant",
+                    "cwd": "/test",
+                    "requestId": f"req-{session_id}-{i}",
+                    "message": {
+                        "id": f"msg-{session_id}-{i}",
+                        "type": "message",
+                        "role": "assistant",
+                        "model": "claude-3",
+                        "content": [{"type": "text", "text": f"Response {i}"}],
+                        "usage": {"input_tokens": 10, "output_tokens": 15},
+                    },
+                }
+            )
+    return messages
+
+
+class TestPaginationIntegration:
+    """Integration tests for pagination with the converter."""
+
+    def test_small_project_no_pagination(self, temp_project_dir):
+        """Projects below page_size should create single combined file."""
+        from claude_code_log.converter import convert_jsonl_to_html
+
+        # Create a project with 50 messages (below default 5000)
+        jsonl_file = temp_project_dir / "session1.jsonl"
+        messages = _create_session_messages("session1", 50, "2023-01-01")
+        with open(jsonl_file, "w", encoding="utf-8") as f:
+            for msg in messages:
+                f.write(json.dumps(msg) + "\n")
+
+        # Convert with default page_size
+        output = convert_jsonl_to_html(temp_project_dir, page_size=5000, silent=True)
+
+        # Should create single combined file
+        assert output.name == "combined_transcripts.html"
+        assert (temp_project_dir / "combined_transcripts.html").exists()
+        assert not (temp_project_dir / "combined_transcripts_2.html").exists()
+
+    def test_large_project_creates_multiple_pages(self, temp_project_dir):
+        """Projects above page_size should create multiple page files."""
+        from claude_code_log.converter import convert_jsonl_to_html
+
+        # Create multiple sessions totaling > 30 messages with page_size=10
+        for i, session_id in enumerate(
+            ["session1", "session2", "session3", "session4"]
+        ):
+            jsonl_file = temp_project_dir / f"{session_id}.jsonl"
+            messages = _create_session_messages(session_id, 15, f"2023-01-0{i + 1}")
+            with open(jsonl_file, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(json.dumps(msg) + "\n")
+
+        # Convert with small page_size to force pagination
+        output = convert_jsonl_to_html(temp_project_dir, page_size=20, silent=True)
+
+        # Should create multiple page files
+        assert output.name == "combined_transcripts.html"
+        assert (temp_project_dir / "combined_transcripts.html").exists()
+        # With 4 sessions x 15 messages = 60 messages, page_size=20
+        # Should create at least 2 pages
+        assert (temp_project_dir / "combined_transcripts_2.html").exists()
+
+    def test_page_size_change_regenerates_all(self, temp_project_dir):
+        """Changing page_size should regenerate all pages."""
+        from claude_code_log.converter import convert_jsonl_to_html
+        from claude_code_log.cache import CacheManager, get_library_version
+
+        # Create sessions
+        for i, session_id in enumerate(["session1", "session2", "session3"]):
+            jsonl_file = temp_project_dir / f"{session_id}.jsonl"
+            messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}")
+            with open(jsonl_file, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(json.dumps(msg) + "\n")
+
+        # First conversion with page_size=30
+        convert_jsonl_to_html(temp_project_dir, page_size=30, silent=True)
+
+        # Check cache has page_size=30
+        cache_manager = CacheManager(temp_project_dir, get_library_version())
+        assert cache_manager.get_page_size_config() == 30
+
+        # Second conversion with different page_size=25
+        convert_jsonl_to_html(temp_project_dir, page_size=25, silent=True)
+
+        # Cache should now have page_size=25
+        cache_manager2 = CacheManager(temp_project_dir, get_library_version())
+        assert cache_manager2.get_page_size_config() == 25
+
+    def test_pagination_with_very_small_page_size(self, temp_project_dir):
+        """Test pagination with very small page size respects session boundaries."""
+        from claude_code_log.converter import convert_jsonl_to_html
+
+        # Create 4 sessions with 10 messages each
+        for i, session_id in enumerate(["s1", "s2", "s3", "s4"]):
+            jsonl_file = temp_project_dir / f"{session_id}.jsonl"
+            messages = _create_session_messages(session_id, 10, f"2023-01-0{i + 1}")
+            with open(jsonl_file, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(json.dumps(msg) + "\n")
+
+        # Convert with tiny page_size=5 (each session has 10 messages)
+        # New simpler pagination logic:
+        # - Add session, then check if page > limit
+        # - If over, close page immediately
+        # s1: add, count=10 > 5 -> page 1 = [s1]
+        # s2: add, count=10 > 5 -> page 2 = [s2]
+        # s3: add, count=10 > 5 -> page 3 = [s3]
+        # s4: add, count=10 > 5 -> page 4 = [s4]
+        convert_jsonl_to_html(temp_project_dir, page_size=5, silent=True)
+
+        # Should create 4 pages (one per session, each exceeds threshold)
+        assert (temp_project_dir / "combined_transcripts.html").exists()
+        assert (temp_project_dir / "combined_transcripts_2.html").exists()
+        assert (temp_project_dir / "combined_transcripts_3.html").exists()
+        assert (temp_project_dir / "combined_transcripts_4.html").exists()
+
+    def test_pagination_html_contains_navigation(self, temp_project_dir):
+        """Paginated pages should contain navigation links."""
+        from claude_code_log.converter import convert_jsonl_to_html
+
+        # Create 4 sessions that will span multiple pages
+        # With page_size=15 and sessions of 10 messages:
+        # s1 (10): page empty, add s1 (count=10)
+        # s2 (10): 10+10 > 15 and page not empty -> s2 becomes last, page 1 = [s1, s2]
+        # s3 (10): page empty, add s3 (count=10)
+        # s4 (10): 10+10 > 15 and page not empty -> s4 becomes last, page 2 = [s3, s4]
+        for i, session_id in enumerate(["s1", "s2", "s3", "s4"]):
+            jsonl_file = temp_project_dir / f"{session_id}.jsonl"
+            messages = _create_session_messages(session_id, 10, f"2023-01-0{i + 1}")
+            with open(jsonl_file, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(json.dumps(msg) + "\n")
+
+        convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True)
+
+        # Check page 1 has Next link (pre-enabled when page exceeds threshold)
+        page1_content = (temp_project_dir / "combined_transcripts.html").read_text(
+            encoding="utf-8"
+        )
+        assert "Next" in page1_content or "combined_transcripts_2.html" in page1_content
+
+        # Check page 2 has Previous link
+        page2_content = (temp_project_dir / "combined_transcripts_2.html").read_text(
+            encoding="utf-8"
+        )
+        assert (
+            "Previous" in page2_content or "combined_transcripts.html" in page2_content
+        )
+
+    def test_page_contains_stats(self, temp_project_dir):
+        """Paginated pages should contain stats (message count, date range)."""
+        from claude_code_log.converter import convert_jsonl_to_html
+
+        # Create sessions
+        for i, session_id in enumerate(["s1", "s2"]):
+            jsonl_file = temp_project_dir / f"{session_id}.jsonl"
+            messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}")
+            with open(jsonl_file, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(json.dumps(msg) + "\n")
+
+        convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True)
+
+        # Check page contains stats
+        page1_content = (temp_project_dir / "combined_transcripts.html").read_text(
+            encoding="utf-8"
+        )
+        assert "messages" in page1_content.lower()
+        assert "Page 1" in page1_content or "page-navigation" in page1_content
diff --git a/test/test_performance.py b/test/test_performance.py
index 8f9541b3..f813e9a3 100644
--- a/test/test_performance.py
+++ b/test/test_performance.py
@@ -206,7 +206,7 @@ def test_benchmark_all_projects(self, real_projects_path: Path) -> None:
 
     def _write_github_summary(self, summary_path: str, results: List[dict]) -> None:
         """Write benchmark results to GitHub Actions Job Summary."""
-        with open(summary_path, "a") as f:
+        with open(summary_path, "a", encoding="utf-8") as f:
             f.write("\n## 📊 Performance Benchmark Results\n\n")
             f.write(
                 "| Project | Messages | Input (MB) | Output (MB) | "
diff --git a/test/test_project_display_name.py b/test/test_project_display_name.py
index e4e60560..ae092294 100644
--- a/test/test_project_display_name.py
+++ b/test/test_project_display_name.py
@@ -73,15 +73,6 @@ def test_empty_working_directories(self):
         # Should fall back to path conversion
         assert result == "Users/dain/workspace/platform/frontend/next"
 
-    def test_none_working_directories(self):
-        """Test fallback when working directories is None."""
-        project_dir_name = "-Users-dain-workspace-platform-frontend-next"
-        working_directories = None
-
-        result = get_project_display_name(project_dir_name, working_directories)
-        # Should fall back to path conversion
-        assert result == "Users/dain/workspace/platform/frontend/next"
-
     def test_single_working_directory(self):
         """Test with a single working directory."""
         project_dir_name = "-Users-dain-workspace-simple-project"
@@ -123,3 +114,27 @@ def test_working_directories_same_name_different_paths(self):
         result = get_project_display_name(project_dir_name, working_directories)
         # Should pick the root directory
         assert result == "shared-names"
+
+    def test_tmp_paths_filtered_out(self):
+        """Test that temporary paths (pytest, macOS temp) are filtered out."""
+        project_dir_name = "-tmp-pytest-123-test_foo0"
+        working_directories = [
+            "/private/var/folders/4n/2f7pppjd2_n0fftzg8vrlg040000gn/T/pytest-91/test_foo0",
+            "/Users/dain/workspace/real-project",
+        ]
+
+        result = get_project_display_name(project_dir_name, working_directories)
+        # Should use the real project, not the pytest temp dir
+        assert result == "real-project"
+
+    def test_only_tmp_paths_falls_back(self):
+        """Test fallback when all working directories are tmp paths."""
+        project_dir_name = "-tmp-pytest-123-test_foo0"
+        working_directories = [
+            "/private/var/folders/4n/test",
+            "/tmp/pytest-91/test_foo0",
+        ]
+
+        result = get_project_display_name(project_dir_name, working_directories)
+        # Should fall back to converting project directory name
+        assert result == "tmp/pytest/123/test_foo0"
diff --git a/test/test_project_matching.py b/test/test_project_matching.py
index c0283e70..86a73548 100644
--- a/test/test_project_matching.py
+++ b/test/test_project_matching.py
@@ -26,27 +26,20 @@ def test_find_projects_by_cwd_with_cache(self):
             (project1 / "test1.jsonl").touch()
             (project2 / "test2.jsonl").touch()
 
-            # Mock cache data for projects
-            mock_cache1 = Mock()
-            mock_cache1.working_directories = ["/Users/test/workspace/myproject"]
-
-            mock_cache2 = Mock()
-            mock_cache2.working_directories = ["/Users/test/other/project"]
-
             with patch("claude_code_log.cli.CacheManager") as mock_cache_manager:
 
                 def cache_side_effect(project_dir, version):
                     cache_instance = Mock()
                     if project_dir == project1:
-                        cache_instance.get_cached_project_data.return_value = (
-                            mock_cache1
-                        )
+                        cache_instance.get_working_directories.return_value = [
+                            "/Users/test/workspace/myproject"
+                        ]
                     elif project_dir == project2:
-                        cache_instance.get_cached_project_data.return_value = (
-                            mock_cache2
-                        )
+                        cache_instance.get_working_directories.return_value = [
+                            "/Users/test/other/project"
+                        ]
                     else:
-                        cache_instance.get_cached_project_data.return_value = None
+                        cache_instance.get_working_directories.return_value = []
                     return cache_instance
 
                 mock_cache_manager.side_effect = cache_side_effect
@@ -74,20 +67,16 @@ def test_find_projects_by_cwd_subdirectory_matching(self):
             project1.mkdir()
             (project1 / "test1.jsonl").touch()
 
-            # Mock cache data with parent directory
-            mock_cache1 = Mock()
-            mock_cache1.working_directories = ["/Users/test/workspace/myproject"]
-
             with patch("claude_code_log.cli.CacheManager") as mock_cache_manager:
 
                 def cache_side_effect(project_dir, version):
                     cache_instance = Mock()
                     if project_dir == project1:
-                        cache_instance.get_cached_project_data.return_value = (
-                            mock_cache1
-                        )
+                        cache_instance.get_working_directories.return_value = [
+                            "/Users/test/workspace/myproject"
+                        ]
                     else:
-                        cache_instance.get_cached_project_data.return_value = None
+                        cache_instance.get_working_directories.return_value = []
                     return cache_instance
 
                 mock_cache_manager.side_effect = cache_side_effect
diff --git a/test/test_sidechain_agents.py b/test/test_sidechain_agents.py
index d0423b6d..195608ef 100644
--- a/test/test_sidechain_agents.py
+++ b/test/test_sidechain_agents.py
@@ -18,13 +18,19 @@ def test_agent_insertion():
         # Write main transcript
         main_file = tmpdir_path / "main.jsonl"
         main_file.write_text(
-            (Path(__file__).parent / "test_data" / "sidechain_main.jsonl").read_text()
+            (Path(__file__).parent / "test_data" / "sidechain_main.jsonl").read_text(
+                encoding="utf-8"
+            ),
+            encoding="utf-8",
         )
 
         # Write agent transcript (must match agentId in main file)
         agent_file = tmpdir_path / "agent-e1c84ba5.jsonl"
         agent_file.write_text(
-            (Path(__file__).parent / "test_data" / "sidechain_agent.jsonl").read_text()
+            (Path(__file__).parent / "test_data" / "sidechain_agent.jsonl").read_text(
+                encoding="utf-8"
+            ),
+            encoding="utf-8",
         )
 
         # Load transcript with agent insertion (agent files discovered automatically)
@@ -58,12 +64,18 @@ def test_deduplication_task_result_vs_sidechain():
         # Write deduplication test data
         main_file = tmpdir_path / "main.jsonl"
         main_file.write_text(
-            (Path(__file__).parent / "test_data" / "dedup_main.jsonl").read_text()
+            (Path(__file__).parent / "test_data" / "dedup_main.jsonl").read_text(
+                encoding="utf-8"
+            ),
+            encoding="utf-8",
         )
 
         agent_file = tmpdir_path / "agent-e1c84ba5.jsonl"
         agent_file.write_text(
-            (Path(__file__).parent / "test_data" / "dedup_agent.jsonl").read_text()
+            (Path(__file__).parent / "test_data" / "dedup_agent.jsonl").read_text(
+                encoding="utf-8"
+            ),
+            encoding="utf-8",
         )
 
         # Load and render (agent files discovered automatically)
@@ -91,13 +103,15 @@ def test_no_deduplication_when_content_different():
         main_file.write_text(
             '{"parentUuid":null,"isSidechain":false,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"text","text":"Do something"}]},"uuid":"d-0","timestamp":"2025-01-15T12:00:00.000Z"}\n'
             '{"parentUuid":"d-0","isSidechain":false,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01test1","type":"message","role":"assistant","content":[{"type":"tool_use","id":"task-3","name":"Task","input":{"prompt":"Do it"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":20}},"requestId":"req_01test1","type":"assistant","uuid":"d-1","timestamp":"2025-01-15T12:00:05.000Z"}\n'
-            '{"parentUuid":"d-1","isSidechain":false,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-3","content":"Done A"}]},"uuid":"d-2","timestamp":"2025-01-15T12:00:15.000Z","toolUseResult":{"agentId":"ghi789","content":"Done A"},"agentId":"ghi789"}\n'
+            '{"parentUuid":"d-1","isSidechain":false,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-3","content":"Done A"}]},"uuid":"d-2","timestamp":"2025-01-15T12:00:15.000Z","toolUseResult":{"agentId":"ghi789","content":"Done A"},"agentId":"ghi789"}\n',
+            encoding="utf-8",
         )
 
         agent_file = tmpdir_path / "agent-ghi789.jsonl"
         agent_file.write_text(
             '{"parentUuid":null,"isSidechain":true,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","agentId":"ghi789","type":"user","message":{"role":"user","content":[{"type":"text","text":"Do it"}]},"uuid":"agent-d-0","timestamp":"2025-01-15T12:00:06.000Z"}\n'
-            '{"parentUuid":"agent-d-0","isSidechain":true,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","agentId":"ghi789","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01testagent1","type":"message","role":"assistant","content":[{"type":"text","text":"Done B"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":5,"output_tokens":10}},"requestId":"req_01testagent1","type":"assistant","uuid":"agent-d-1","timestamp":"2025-01-15T12:00:14.000Z"}\n'
+            '{"parentUuid":"agent-d-0","isSidechain":true,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","agentId":"ghi789","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01testagent1","type":"message","role":"assistant","content":[{"type":"text","text":"Done B"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":5,"output_tokens":10}},"requestId":"req_01testagent1","type":"assistant","uuid":"agent-d-1","timestamp":"2025-01-15T12:00:14.000Z"}\n',
+            encoding="utf-8",
         )
 
         messages = load_transcript(main_file)
@@ -116,12 +130,18 @@ def test_agent_messages_marked_as_sidechain():
 
         main_file = tmpdir_path / "main.jsonl"
         main_file.write_text(
-            (Path(__file__).parent / "test_data" / "sidechain_main.jsonl").read_text()
+            (Path(__file__).parent / "test_data" / "sidechain_main.jsonl").read_text(
+                encoding="utf-8"
+            ),
+            encoding="utf-8",
         )
 
         agent_file = tmpdir_path / "agent-e1c84ba5.jsonl"
         agent_file.write_text(
-            (Path(__file__).parent / "test_data" / "sidechain_agent.jsonl").read_text()
+            (Path(__file__).parent / "test_data" / "sidechain_agent.jsonl").read_text(
+                encoding="utf-8"
+            ),
+            encoding="utf-8",
         )
 
         messages = load_transcript(main_file)
@@ -153,7 +173,8 @@ def test_sidechain_tool_results_rendered():
         main_file.write_text(
             '{"parentUuid":null,"isSidechain":false,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"text","text":"Search for files"}]},"uuid":"u-0","timestamp":"2025-01-15T14:00:00.000Z"}\n'
             '{"parentUuid":"u-0","isSidechain":false,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_main","type":"message","role":"assistant","content":[{"type":"tool_use","id":"task-glob","name":"Task","input":{"prompt":"Find test files"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":100,"output_tokens":50}},"requestId":"req_main","type":"assistant","uuid":"a-0","timestamp":"2025-01-15T14:00:05.000Z"}\n'
-            '{"parentUuid":"a-0","isSidechain":false,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-glob","content":"Found files: test.py"}]},"uuid":"u-1","timestamp":"2025-01-15T14:00:20.000Z","toolUseResult":{"agentId":"glob-agent","content":"Found files: test.py"},"agentId":"glob-agent"}\n'
+            '{"parentUuid":"a-0","isSidechain":false,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-glob","content":"Found files: test.py"}]},"uuid":"u-1","timestamp":"2025-01-15T14:00:20.000Z","toolUseResult":{"agentId":"glob-agent","content":"Found files: test.py"},"agentId":"glob-agent"}\n',
+            encoding="utf-8",
         )
 
         # Create agent file with tool use (Glob) and its result
@@ -166,7 +187,8 @@ def test_sidechain_tool_results_rendered():
             # Tool result comes in a sidechain user message - THIS SHOULD BE RENDERED
             '{"parentUuid":"agent-a-0","isSidechain":true,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","agentId":"glob-agent","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"glob-123","content":"/workspace/test.py"}]},"uuid":"agent-u-1","timestamp":"2025-01-15T14:00:10.000Z"}\n'
             # Final assistant message
-            '{"parentUuid":"agent-u-1","isSidechain":true,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","agentId":"glob-agent","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_final","type":"message","role":"assistant","content":[{"type":"text","text":"Found files: test.py"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":60,"output_tokens":15}},"requestId":"req_agent_final","type":"assistant","uuid":"agent-a-1","timestamp":"2025-01-15T14:00:15.000Z"}\n'
+            '{"parentUuid":"agent-u-1","isSidechain":true,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","agentId":"glob-agent","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_final","type":"message","role":"assistant","content":[{"type":"text","text":"Found files: test.py"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":60,"output_tokens":15}},"requestId":"req_agent_final","type":"assistant","uuid":"agent-a-1","timestamp":"2025-01-15T14:00:15.000Z"}\n',
+            encoding="utf-8",
         )
 
         messages = load_transcript(main_file)
@@ -260,17 +282,20 @@ def test_multiple_agent_invocations():
             '{"parentUuid":"d-0","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01","type":"message","role":"assistant","content":[{"type":"tool_use","id":"task-4a","name":"Task","input":{"prompt":"First task"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":100,"output_tokens":50}},"requestId":"req_01","type":"assistant","uuid":"d-1","timestamp":"2025-01-15T13:00:05.000Z"}\n'
             '{"parentUuid":"d-1","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-4a","content":"First done"}]},"uuid":"d-2","timestamp":"2025-01-15T13:00:15.000Z","toolUseResult":{"status":"completed","agentId":"first","content":[{"type":"text","text":"First done"}]},"agentId":"first"}\n'
             '{"parentUuid":"d-2","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_02","type":"message","role":"assistant","content":[{"type":"tool_use","id":"task-4b","name":"Task","input":{"prompt":"Second task"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":150,"output_tokens":60}},"requestId":"req_02","type":"assistant","uuid":"d-3","timestamp":"2025-01-15T13:00:20.000Z"}\n'
-            '{"parentUuid":"d-3","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-4b","content":"Second done"}]},"uuid":"d-4","timestamp":"2025-01-15T13:00:30.000Z","toolUseResult":{"status":"completed","agentId":"second","content":[{"type":"text","text":"Second done"}]},"agentId":"second"}\n'
+            '{"parentUuid":"d-3","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-4b","content":"Second done"}]},"uuid":"d-4","timestamp":"2025-01-15T13:00:30.000Z","toolUseResult":{"status":"completed","agentId":"second","content":[{"type":"text","text":"Second done"}]},"agentId":"second"}\n',
+            encoding="utf-8",
         )
 
         (tmpdir_path / "agent-first.jsonl").write_text(
             '{"parentUuid":null,"isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"first","type":"user","message":{"role":"user","content":[{"type":"text","text":"First task"}]},"uuid":"agent-d-0","timestamp":"2025-01-15T13:00:06.000Z"}\n'
-            '{"parentUuid":"agent-d-0","isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"first","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_01","type":"message","role":"assistant","content":[{"type":"text","text":"First done"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":50,"output_tokens":25}},"requestId":"req_agent_01","type":"assistant","uuid":"agent-d-1","timestamp":"2025-01-15T13:00:14.000Z"}\n'
+            '{"parentUuid":"agent-d-0","isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"first","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_01","type":"message","role":"assistant","content":[{"type":"text","text":"First done"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":50,"output_tokens":25}},"requestId":"req_agent_01","type":"assistant","uuid":"agent-d-1","timestamp":"2025-01-15T13:00:14.000Z"}\n',
+            encoding="utf-8",
         )
 
         (tmpdir_path / "agent-second.jsonl").write_text(
             '{"parentUuid":null,"isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"second","type":"user","message":{"role":"user","content":[{"type":"text","text":"Second task"}]},"uuid":"agent2-d-0","timestamp":"2025-01-15T13:00:21.000Z"}\n'
-            '{"parentUuid":"agent2-d-0","isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"second","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_02","type":"message","role":"assistant","content":[{"type":"text","text":"Second done"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":55,"output_tokens":30}},"requestId":"req_agent_02","type":"assistant","uuid":"agent2-d-1","timestamp":"2025-01-15T13:00:29.000Z"}\n'
+            '{"parentUuid":"agent2-d-0","isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"second","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_02","type":"message","role":"assistant","content":[{"type":"text","text":"Second done"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":55,"output_tokens":30}},"requestId":"req_agent_02","type":"assistant","uuid":"agent2-d-1","timestamp":"2025-01-15T13:00:29.000Z"}\n',
+            encoding="utf-8",
         )
 
         messages = load_transcript(main_file)
diff --git a/test/test_tui.py b/test/test_tui.py
index 1cdd007c..30856317 100644
--- a/test/test_tui.py
+++ b/test/test_tui.py
@@ -89,7 +89,7 @@ def temp_project_dir():
 
         # Write test data to JSONL file
         jsonl_file = project_path / "test-transcript.jsonl"
-        with open(jsonl_file, "w") as f:
+        with open(jsonl_file, "w", encoding="utf-8") as f:
             for entry in test_data:
                 f.write(json.dumps(entry) + "\n")
 
@@ -797,7 +797,7 @@ def test_run_session_browser_not_directory(self, capsys, temp_project_dir):
         """Test running session browser with a file instead of directory."""
         # Create a file
         test_file = temp_project_dir / "test.txt"
-        test_file.write_text("test")
+        test_file.write_text("test", encoding="utf-8")
 
         run_session_browser(test_file)
 

From a534b79a5840e149250710c8ed2c5fdc95036c74 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Sat, 3 Jan 2026 22:57:28 +0000
Subject: [PATCH 02/23] Fixes after rebase

---
 claude_code_log/cache.py                      |  8 ++--
 claude_code_log/converter.py                  | 21 +++++++--
 claude_code_log/html/renderer.py              | 32 ++++++++++++-
 claude_code_log/tui.py                        |  2 +-
 claude_code_log/utils.py                      | 44 ++++++++++++------
 .../__snapshots__/test_snapshot_markdown.ambr |  2 +-
 test/test_cache_sqlite_integrity.py           | 45 ++++++++++++-------
 test/test_message_types.py                    | 39 ++++++++++++++++
 8 files changed, 154 insertions(+), 39 deletions(-)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 81522d80..d7546b52 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -11,6 +11,7 @@
 from packaging import version
 from pydantic import BaseModel
 
+from .factories import create_transcript_entry
 from .migrations.runner import run_migrations
 from .models import (
     AssistantTranscriptEntry,
@@ -19,7 +20,6 @@
     SystemTranscriptEntry,
     TranscriptEntry,
     UserTranscriptEntry,
-    parse_transcript_entry,
 )
 
 
@@ -322,7 +322,7 @@ def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, An
     def _deserialize_entry(self, row: sqlite3.Row) -> TranscriptEntry:
         """Convert SQLite row back to TranscriptEntry."""
         content_dict = json.loads(row["content"])
-        return parse_transcript_entry(content_dict)
+        return create_transcript_entry(content_dict)
 
     def _get_file_id(self, jsonl_path: Path) -> Optional[int]:
         """Get the file ID for a JSONL file."""
@@ -692,7 +692,9 @@ def _is_cache_version_compatible(self, cache_version: str) -> bool:
 
         # Define compatibility rules
         breaking_changes: dict[str, str] = {
-            # Example: "0.3.3": "0.3.4" means cache from 0.3.3 needs invalidation if lib is >= 0.3.4
+            # 0.9.0 introduced _compact_ide_tags_for_preview() which transforms
+            # first_user_message to use emoji indicators instead of raw IDE tags
+            "0.8.0": "0.9.0",
         }
 
         cache_ver = version.parse(cache_version)
diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index b56db8ed..52e5b4b6 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -6,7 +6,7 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 import traceback
-from typing import Optional, Any, TYPE_CHECKING
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
 
 import dateparser
 
@@ -31,7 +31,7 @@
     UserTranscriptEntry,
     ToolResultContent,
 )
-from .renderer import get_renderer
+from .renderer import get_renderer, is_html_outdated
 
 
 def get_file_extension(format: str) -> str:
@@ -149,7 +149,16 @@ def load_transcript(
     messages: list[TranscriptEntry] = []
     agent_ids: set[str] = set()  # Collect agentId references while parsing
 
-    with open(jsonl_path, "r", encoding="utf-8", errors="replace") as f:
+    try:
+        f = open(jsonl_path, "r", encoding="utf-8", errors="replace")
+    except FileNotFoundError:
+        # Handle race condition: file may have been deleted between glob and open
+        # (e.g., Claude Code session cleanup)
+        if not silent:
+            print(f"Warning: File not found (may have been deleted): {jsonl_path}")
+        return []
+
+    with f:
         if not silent:
             print(f"Processing {jsonl_path}...")
         for line_no, line in enumerate(f, 1):  # Start counting from 1
@@ -543,7 +552,8 @@ def _generate_paginated_html(
     Returns:
         Path to the first page (combined_transcripts.html)
     """
-    from .renderer import generate_html, format_timestamp
+    from .html.renderer import generate_html
+    from .utils import format_timestamp
 
     # Check if page size changed - if so, invalidate all pages
     cached_page_size = cache_manager.get_page_size_config()
@@ -726,6 +736,7 @@ def convert_jsonl_to_html(
         generate_individual_sessions,
         use_cache,
         silent,
+        page_size=page_size,
     )
 
 
@@ -739,6 +750,7 @@ def convert_jsonl_to(
     use_cache: bool = True,
     silent: bool = False,
     image_export_mode: Optional[str] = None,
+    page_size: int = 2000,
 ) -> Path:
     """Convert JSONL transcript(s) to the specified format.
 
@@ -752,6 +764,7 @@ def convert_jsonl_to(
         use_cache: Whether to use caching.
         silent: Whether to suppress output.
         image_export_mode: Image export mode ("placeholder", "embedded", "referenced").
+        page_size: Maximum messages per page for combined transcript pagination.
             If None, uses format default (embedded for HTML, referenced for Markdown).
     """
     if not input_path.exists():
diff --git a/claude_code_log/html/renderer.py b/claude_code_log/html/renderer.py
index 747671b0..0a47375f 100644
--- a/claude_code_log/html/renderer.py
+++ b/claude_code_log/html/renderer.py
@@ -478,8 +478,19 @@ def generate(
         title: Optional[str] = None,
         combined_transcript_link: Optional[str] = None,
         output_dir: Optional[Path] = None,
+        page_info: Optional[dict[str, Any]] = None,
+        page_stats: Optional[dict[str, Any]] = None,
     ) -> str:
-        """Generate HTML from transcript messages."""
+        """Generate HTML from transcript messages.
+
+        Args:
+            messages: List of transcript entries to render.
+            title: Optional title for the output.
+            combined_transcript_link: Optional link to combined transcript.
+            output_dir: Optional output directory for referenced images.
+            page_info: Optional pagination info (page_number, prev_link, next_link).
+            page_stats: Optional page statistics (message_count, date_range, token_summary).
+        """
         import time
 
         t_start = time.time()
@@ -516,6 +527,8 @@ def generate(
                     css_class_from_message=css_class_from_message,
                     get_message_emoji=get_message_emoji,
                     is_session_header=is_session_header,
+                    page_info=page_info,
+                    page_stats=page_stats,
                 )
             )
 
@@ -592,12 +605,27 @@ def generate_html(
     messages: list[TranscriptEntry],
     title: Optional[str] = None,
     combined_transcript_link: Optional[str] = None,
+    page_info: Optional[dict[str, Any]] = None,
+    page_stats: Optional[dict[str, Any]] = None,
 ) -> str:
     """Generate HTML from transcript messages using Jinja2 templates.
 
     This is a convenience function that delegates to HtmlRenderer.generate.
+
+    Args:
+        messages: List of transcript entries to render.
+        title: Optional title for the output.
+        combined_transcript_link: Optional link to combined transcript.
+        page_info: Optional pagination info (page_number, prev_link, next_link).
+        page_stats: Optional page statistics (message_count, date_range, token_summary).
     """
-    return HtmlRenderer().generate(messages, title, combined_transcript_link)
+    return HtmlRenderer().generate(
+        messages,
+        title,
+        combined_transcript_link,
+        page_info=page_info,
+        page_stats=page_stats,
+    )
 
 
 def generate_session_html(
diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 146a709c..7f1b398a 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -5,7 +5,7 @@
 import webbrowser
 from datetime import datetime
 from pathlib import Path
-from typing import Any, ClassVar, Optional, cast
+from typing import Any, ClassVar, List, Optional, cast
 
 from textual.app import App, ComposeResult
 from textual.binding import Binding, BindingType
diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py
index 0456c868..4a7241d0 100644
--- a/claude_code_log/utils.py
+++ b/claude_code_log/utils.py
@@ -4,7 +4,10 @@
 import re
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Optional
+from typing import TYPE_CHECKING, Any, Optional
+
+if TYPE_CHECKING:
+    from .cache import SessionCacheData
 
 from .models import ContentItem, TextContent, TranscriptEntry, UserTranscriptEntry
 from .factories import (
@@ -53,6 +56,16 @@ def format_timestamp_range(first_timestamp: str, last_timestamp: str) -> str:
         return ""
 
 
+def _is_temp_path(path_str: str) -> bool:
+    """Check if a path is a temporary/test path that should be filtered out."""
+    temp_patterns = [
+        "/private/var/folders/",  # macOS temp
+        "/tmp/",  # Unix temp
+        "/var/folders/",  # macOS temp (alternate)
+    ]
+    return any(pattern in path_str for pattern in temp_patterns)
+
+
 def get_project_display_name(
     project_dir_name: str, working_directories: Optional[list[str]] = None
 ) -> str:
@@ -66,8 +79,18 @@ def get_project_display_name(
         The project display name (e.g., "claude-code-log")
     """
     if working_directories:
+        # Filter out temporary paths (pytest, macOS temp dirs, etc.)
+        real_dirs = [wd for wd in working_directories if not _is_temp_path(wd)]
+
+        # If all directories were filtered out, fall back to project_dir_name conversion
+        if not real_dirs:
+            display_name = project_dir_name
+            if display_name.startswith("-"):
+                display_name = display_name[1:].replace("-", "/")
+            return display_name
+
         # Convert to Path objects with their original indices for tracking recency
-        paths_with_indices = [(Path(wd), i) for i, wd in enumerate(working_directories)]
+        paths_with_indices = [(Path(wd), i) for i, wd in enumerate(real_dirs)]
 
         # Sort by: 1) path depth (fewer parts = less nested), 2) recency (lower index = more recent)
         # This gives us the least nested path, with ties broken by recency
@@ -166,18 +189,21 @@ def extract_text_content_length(content: list[ContentItem]) -> int:
 
 
 def extract_working_directories(
-    entries: list[TranscriptEntry] | list[SessionCacheData],
+    entries: "list[TranscriptEntry] | list[SessionCacheData] | list[Any]",
 ) -> list[str]:
     """Extract unique working directories from a list of entries.
 
     Ordered by timestamp (most recent first).
 
     Args:
-        entries: List of entries to extract working directories from
+        entries: List of TranscriptEntry or SessionCacheData to extract working directories from
 
     Returns:
         List of unique working directory paths found in the entries
     """
+    # Import here to avoid circular dependency at runtime
+    from .cache import SessionCacheData
+
     working_directories: dict[str, str] = {}
 
     for entry in entries:
@@ -200,15 +226,7 @@ def extract_working_directories(
     return [path for path, _ in sorted_dirs]
 
 
-# IDE tag patterns for compact preview rendering (same as renderer.py)
-IDE_OPENED_FILE_PATTERN = re.compile(
-    r"<ide_opened_file>(.*?)</ide_opened_file>", re.DOTALL
-)
-IDE_SELECTION_PATTERN = re.compile(r"<ide_selection>(.*?)</ide_selection>", re.DOTALL)
-IDE_DIAGNOSTICS_PATTERN = re.compile(
-    r"<post-tool-use-hook>\s*<ide_diagnostics>(.*?)</ide_diagnostics>\s*</post-tool-use-hook>",
-    re.DOTALL,
-)
+# IDE tag patterns imported from factories for compact preview rendering
 
 
 def _compact_ide_tags_for_preview(text_content: str) -> str:
diff --git a/test/__snapshots__/test_snapshot_markdown.ambr b/test/__snapshots__/test_snapshot_markdown.ambr
index fafc9b58..67a1d9ce 100644
--- a/test/__snapshots__/test_snapshot_markdown.ambr
+++ b/test/__snapshots__/test_snapshot_markdown.ambr
@@ -323,7 +323,7 @@
   
   
   
-  # Claude Transcripts - tmp
+  # Claude Transcripts - test_multi_session_markdown0
   
   ## Sessions
   
diff --git a/test/test_cache_sqlite_integrity.py b/test/test_cache_sqlite_integrity.py
index eaa14a9b..15d2ee3b 100644
--- a/test/test_cache_sqlite_integrity.py
+++ b/test/test_cache_sqlite_integrity.py
@@ -12,14 +12,14 @@
 
 from claude_code_log.cache import CacheManager, SessionCacheData
 from claude_code_log.models import (
-    AssistantMessage,
+    AssistantMessageModel,
     AssistantTranscriptEntry,
     TextContent,
     ThinkingContent,
     ToolResultContent,
     ToolUseContent,
     UsageInfo,
-    UserMessage,
+    UserMessageModel,
     UserTranscriptEntry,
 )
 
@@ -52,7 +52,9 @@ def sample_user_entry():
         isSidechain=False,
         userType="external",
         cwd="/test/path",
-        message=UserMessage(role="user", content="Hello, world!"),
+        message=UserMessageModel(
+            role="user", content=[TextContent(type="text", text="Hello, world!")]
+        ),
     )
 
 
@@ -70,7 +72,7 @@ def sample_assistant_entry():
         userType="assistant",
         cwd="/test/path",
         requestId="req-123",
-        message=AssistantMessage(
+        message=AssistantMessageModel(
             id="msg-123",
             type="message",
             role="assistant",
@@ -200,7 +202,7 @@ def test_session_token_totals_match_message_sums(
                 userType="assistant",
                 cwd="/test/path",
                 requestId=f"req-{i}",
-                message=AssistantMessage(
+                message=AssistantMessageModel(
                     id=f"msg-{i}",
                     type="message",
                     role="assistant",
@@ -304,7 +306,7 @@ def test_complex_message_types_roundtrip_correctly(self, temp_project_dir):
                 userType="assistant",
                 cwd="/test",
                 requestId="req-1",
-                message=AssistantMessage(
+                message=AssistantMessageModel(
                     id="msg-tool",
                     type="message",
                     role="assistant",
@@ -330,7 +332,7 @@ def test_complex_message_types_roundtrip_correctly(self, temp_project_dir):
                 isSidechain=False,
                 userType="tool_result",
                 cwd="/test",
-                message=UserMessage(
+                message=UserMessageModel(
                     role="user",
                     content=[
                         ToolResultContent(
@@ -353,7 +355,7 @@ def test_complex_message_types_roundtrip_correctly(self, temp_project_dir):
                 userType="assistant",
                 cwd="/test",
                 requestId="req-2",
-                message=AssistantMessage(
+                message=AssistantMessageModel(
                     id="msg-thinking",
                     type="message",
                     role="assistant",
@@ -476,7 +478,9 @@ def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry
                 isSidechain=False,
                 userType="external",
                 cwd="/test",
-                message=UserMessage(role="user", content=f"Message {i}"),
+                message=UserMessageModel(
+                    role="user", content=[TextContent(type="text", text=f"Message {i}")]
+                ),
             )
             entries.append(entry)
 
@@ -518,7 +522,7 @@ def test_null_tokens_handled_in_aggregates(self, temp_project_dir):
                 userType="assistant",
                 cwd="/test",
                 requestId="req-1",
-                message=AssistantMessage(
+                message=AssistantMessageModel(
                     id="msg-1",
                     type="message",
                     role="assistant",
@@ -538,7 +542,9 @@ def test_null_tokens_handled_in_aggregates(self, temp_project_dir):
                 isSidechain=False,
                 userType="external",
                 cwd="/test",
-                message=UserMessage(role="user", content="No tokens"),
+                message=UserMessageModel(
+                    role="user", content=[TextContent(type="text", text="No tokens")]
+                ),
             ),
         ]
 
@@ -629,7 +635,9 @@ def test_concurrent_readers_dont_block(self, temp_project_dir):
             isSidechain=False,
             userType="external",
             cwd="/test",
-            message=UserMessage(role="user", content="Test"),
+            message=UserMessageModel(
+                role="user", content=[TextContent(type="text", text="Test")]
+            ),
         )
 
         jsonl_file = temp_project_dir / "concurrent.jsonl"
@@ -678,7 +686,9 @@ def test_query_performance_with_large_dataset(self, temp_project_dir):
                 isSidechain=False,
                 userType="external",
                 cwd="/test",
-                message=UserMessage(role="user", content=f"Message {i}"),
+                message=UserMessageModel(
+                    role="user", content=[TextContent(type="text", text=f"Message {i}")]
+                ),
             )
             entries.append(entry)
 
@@ -721,9 +731,14 @@ def test_sessions_contain_correct_messages(self, temp_project_dir):
                     isSidechain=False,
                     userType="external",
                     cwd="/test",
-                    message=UserMessage(
+                    message=UserMessageModel(
                         role="user",
-                        content=f"Session {session_num} message {msg_num}",
+                        content=[
+                            TextContent(
+                                type="text",
+                                text=f"Session {session_num} message {msg_num}",
+                            )
+                        ],
                     ),
                 )
                 entries.append(entry)
diff --git a/test/test_message_types.py b/test/test_message_types.py
index 9403ba95..9be0e4c9 100644
--- a/test/test_message_types.py
+++ b/test/test_message_types.py
@@ -148,7 +148,46 @@ def test_queue_operation_type_support():
         test_file_path.unlink()
 
 
+def test_load_transcript_missing_file_returns_empty_list(capsys):
+    """Test that load_transcript handles missing files gracefully.
+
+    This handles the race condition where a file exists when globbed but
+    is deleted before being read (e.g., Claude Code session cleanup).
+    """
+    nonexistent_file = Path("/tmp/nonexistent-session-abc123.jsonl")
+    # Ensure it doesn't exist
+    if nonexistent_file.exists():
+        nonexistent_file.unlink()
+
+    # Should return empty list, not raise FileNotFoundError
+    messages = load_transcript(nonexistent_file)
+    assert messages == [], f"Expected empty list, got {messages}"
+
+    # Should print a warning
+    captured = capsys.readouterr()
+    assert "Warning: File not found" in captured.out
+    assert str(nonexistent_file) in captured.out
+
+    print("✓ Test passed: Missing file returns empty list with warning")
+
+
+def test_load_transcript_missing_file_silent_mode():
+    """Test that load_transcript handles missing files in silent mode."""
+    nonexistent_file = Path("/tmp/nonexistent-session-xyz789.jsonl")
+    # Ensure it doesn't exist
+    if nonexistent_file.exists():
+        nonexistent_file.unlink()
+
+    # Should return empty list without printing
+    messages = load_transcript(nonexistent_file, silent=True)
+    assert messages == [], f"Expected empty list, got {messages}"
+
+    print("✓ Test passed: Missing file in silent mode returns empty list")
+
+
 if __name__ == "__main__":
     test_summary_type_support()
     test_queue_operation_type_support()
+    test_load_transcript_missing_file_returns_empty_list(None)  # type: ignore
+    test_load_transcript_missing_file_silent_mode()
     print("\n✅ All message type tests passed!")

From 46b976c30761df01b0e5e048c3d0e6d7470072d4 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Sun, 4 Jan 2026 00:35:16 +0000
Subject: [PATCH 03/23] Add next page link hiding to prevent broken links
 before next session is processed

---
 claude_code_log/converter.py                  |  56 ++++++-
 .../templates/components/page_nav_styles.css  |   5 +
 .../html/templates/transcript.html            |   6 +-
 test/__snapshots__/test_snapshot_html.ambr    |  20 +++
 test/test_pagination.py                       | 137 ++++++++++++++++++
 5 files changed, 216 insertions(+), 8 deletions(-)

diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index 52e5b4b6..151f47b0 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -490,6 +490,50 @@ def _get_page_html_path(page_number: int) -> str:
     return f"combined_transcripts_{page_number}.html"
 
 
+# Regex pattern to match and update the next link marker block
+_NEXT_LINK_PATTERN = re.compile(
+    r'(<!-- PAGINATION_NEXT_LINK_START -->.*?class="page-nav-link next) last-page(".*?<!-- PAGINATION_NEXT_LINK_END -->)',
+    re.DOTALL,
+)
+
+
+def _enable_next_link_on_previous_page(output_dir: Path, page_number: int) -> bool:
+    """Enable the next link on a previous page by removing the last-page class.
+
+    When a new page is created, the previous page's "Next" link (which was hidden
+    with the last-page CSS class) needs to be revealed. This function performs
+    an in-place edit to remove that class.
+
+    Args:
+        output_dir: Directory containing the HTML files
+        page_number: The page number whose next link should be enabled
+
+    Returns:
+        True if the file was modified, False otherwise
+    """
+    if page_number < 1:
+        return False
+
+    page_path = output_dir / _get_page_html_path(page_number)
+    if not page_path.exists():
+        return False
+
+    content = page_path.read_text(encoding="utf-8")
+
+    # Check if there's a last-page class to remove
+    if "last-page" not in content:
+        return False
+
+    # Replace the pattern to remove last-page class
+    new_content, count = _NEXT_LINK_PATTERN.subn(r"\1\2", content)
+
+    if count > 0:
+        page_path.write_text(new_content, encoding="utf-8")
+        return True
+
+    return False
+
+
 def _assign_sessions_to_pages(
     sessions: Dict[str, SessionCacheData], page_size: int
 ) -> List[List[str]]:
@@ -645,17 +689,19 @@ def _generate_paginated_html(
 
         # Build page_info for navigation
         has_prev = page_num > 1
-        # Pre-enable next link if this page exceeds threshold (anticipating future pages)
-        # or if there are more pages
-        page_exceeds_threshold = page_message_count > page_size
-        has_next = page_num < len(pages) or page_exceeds_threshold
+        is_last_page = page_num == len(pages)
 
         page_info = {
             "page_number": page_num,
             "prev_link": _get_page_html_path(page_num - 1) if has_prev else None,
-            "next_link": _get_page_html_path(page_num + 1) if has_next else None,
+            "next_link": _get_page_html_path(page_num + 1),  # Always provide
+            "is_last_page": is_last_page,
         }
 
+        # Enable previous page's next link when creating a new page
+        if page_num > 1:
+            _enable_next_link_on_previous_page(output_dir, page_num - 1)
+
         # Build page_stats
         date_range = ""
         if first_timestamp and last_timestamp:
diff --git a/claude_code_log/html/templates/components/page_nav_styles.css b/claude_code_log/html/templates/components/page_nav_styles.css
index d0254878..dfa81398 100644
--- a/claude_code_log/html/templates/components/page_nav_styles.css
+++ b/claude_code_log/html/templates/components/page_nav_styles.css
@@ -72,3 +72,8 @@
 .page-nav-link.next::after {
     content: '';
 }
+
+/* Hide next link on last page (will be revealed via in-place editing when new page is created) */
+.page-nav-link.next.last-page {
+    display: none;
+}
diff --git a/claude_code_log/html/templates/transcript.html b/claude_code_log/html/templates/transcript.html
index fed0bb53..5863b50c 100644
--- a/claude_code_log/html/templates/transcript.html
+++ b/claude_code_log/html/templates/transcript.html
@@ -43,9 +43,9 @@ <h1 id="title">{{ title }}</h1>
             {% if page_info.prev_link %}
             <a href="{{ page_info.prev_link }}" class="page-nav-link prev">← Previous</a>
             {% endif %}
-            {% if page_info.next_link %}
-            <a href="{{ page_info.next_link }}" class="page-nav-link next">Next →</a>
-            {% endif %}
+            <!-- PAGINATION_NEXT_LINK_START -->
+            <a href="{{ page_info.next_link }}" class="page-nav-link next{% if page_info.is_last_page %} last-page{% endif %}">Next →</a>
+            <!-- PAGINATION_NEXT_LINK_END -->
         </div>
     </div>
     {% endif %}
diff --git a/test/__snapshots__/test_snapshot_html.ambr b/test/__snapshots__/test_snapshot_html.ambr
index 863b29ec..65bf5df3 100644
--- a/test/__snapshots__/test_snapshot_html.ambr
+++ b/test/__snapshots__/test_snapshot_html.ambr
@@ -4361,6 +4361,11 @@
   .page-nav-link.next::after {
       content: '';
   }
+  
+  /* Hide next link on last page (will be revealed via in-place editing when new page is created) */
+  .page-nav-link.next.last-page {
+      display: none;
+  }
       </style>
   </head>
   
@@ -9238,6 +9243,11 @@
   .page-nav-link.next::after {
       content: '';
   }
+  
+  /* Hide next link on last page (will be revealed via in-place editing when new page is created) */
+  .page-nav-link.next.last-page {
+      display: none;
+  }
       </style>
   </head>
   
@@ -14211,6 +14221,11 @@
   .page-nav-link.next::after {
       content: '';
   }
+  
+  /* Hide next link on last page (will be revealed via in-place editing when new page is created) */
+  .page-nav-link.next.last-page {
+      display: none;
+  }
       </style>
   </head>
   
@@ -19225,6 +19240,11 @@
   .page-nav-link.next::after {
       content: '';
   }
+  
+  /* Hide next link on last page (will be revealed via in-place editing when new page is created) */
+  .page-nav-link.next.last-page {
+      display: none;
+  }
       </style>
   </head>
   
diff --git a/test/test_pagination.py b/test/test_pagination.py
index 44db8b0c..fc9b0fd6 100644
--- a/test/test_pagination.py
+++ b/test/test_pagination.py
@@ -548,3 +548,140 @@ def test_page_contains_stats(self, temp_project_dir):
         )
         assert "messages" in page1_content.lower()
         assert "Page 1" in page1_content or "page-navigation" in page1_content
+
+
+class TestNextLinkInPlaceUpdate:
+    """Tests for in-place next link updates."""
+
+    def test_enable_next_link_removes_last_page_class(self, temp_project_dir):
+        """_enable_next_link_on_previous_page should remove last-page class."""
+        from claude_code_log.converter import (
+            _enable_next_link_on_previous_page,
+            _get_page_html_path,
+        )
+
+        # Create a page with hidden next link
+        page_path = temp_project_dir / _get_page_html_path(1)
+        page_path.write_text(
+            """
+        <!-- PAGINATION_NEXT_LINK_START -->
+        <a href="combined_transcripts_2.html" class="page-nav-link next last-page">Next →</a>
+        <!-- PAGINATION_NEXT_LINK_END -->
+        """,
+            encoding="utf-8",
+        )
+
+        result = _enable_next_link_on_previous_page(temp_project_dir, 1)
+
+        assert result is True
+        content = page_path.read_text(encoding="utf-8")
+        assert "last-page" not in content
+        assert 'class="page-nav-link next"' in content
+
+    def test_enable_next_link_no_op_if_already_visible(self, temp_project_dir):
+        """_enable_next_link_on_previous_page should not modify if already visible."""
+        from claude_code_log.converter import (
+            _enable_next_link_on_previous_page,
+            _get_page_html_path,
+        )
+
+        page_path = temp_project_dir / _get_page_html_path(1)
+        original_content = """
+        <!-- PAGINATION_NEXT_LINK_START -->
+        <a href="combined_transcripts_2.html" class="page-nav-link next">Next →</a>
+        <!-- PAGINATION_NEXT_LINK_END -->
+        """
+        page_path.write_text(original_content, encoding="utf-8")
+
+        result = _enable_next_link_on_previous_page(temp_project_dir, 1)
+
+        assert result is False
+        assert page_path.read_text(encoding="utf-8") == original_content
+
+    def test_enable_next_link_handles_missing_file(self, temp_project_dir):
+        """_enable_next_link_on_previous_page should handle missing files gracefully."""
+        from claude_code_log.converter import _enable_next_link_on_previous_page
+
+        result = _enable_next_link_on_previous_page(temp_project_dir, 99)
+
+        assert result is False
+
+    def test_enable_next_link_handles_invalid_page_number(self, temp_project_dir):
+        """_enable_next_link_on_previous_page should handle invalid page numbers."""
+        from claude_code_log.converter import _enable_next_link_on_previous_page
+
+        result = _enable_next_link_on_previous_page(temp_project_dir, 0)
+        assert result is False
+
+        result = _enable_next_link_on_previous_page(temp_project_dir, -1)
+        assert result is False
+
+
+class TestPaginationNextLinkVisibility:
+    """Integration tests for next link visibility across pages."""
+
+    def test_single_page_has_hidden_next_link(self, temp_project_dir):
+        """Single page should have next link with last-page class when pagination is enabled."""
+        from claude_code_log.converter import convert_jsonl_to_html
+
+        # Create a session with enough messages to trigger pagination
+        # but only enough to fit on one page
+        jsonl_file = temp_project_dir / "session1.jsonl"
+        messages = _create_session_messages("session1", 15, "2023-01-01")
+        with open(jsonl_file, "w", encoding="utf-8") as f:
+            for msg in messages:
+                f.write(json.dumps(msg) + "\n")
+
+        # Use page_size=10 to trigger pagination (15 messages > 10)
+        # This will result in a single page since session can't be split
+        convert_jsonl_to_html(temp_project_dir, page_size=10, silent=True)
+
+        content = (temp_project_dir / "combined_transcripts.html").read_text(
+            encoding="utf-8"
+        )
+        assert "last-page" in content
+        assert "PAGINATION_NEXT_LINK_START" in content
+
+    def test_multi_page_first_has_visible_next_link(self, temp_project_dir):
+        """First page of multi-page should have visible next link (no last-page class)."""
+        from claude_code_log.converter import convert_jsonl_to_html
+
+        # Create sessions that will span 2 pages
+        for i, session_id in enumerate(["s1", "s2"]):
+            jsonl_file = temp_project_dir / f"{session_id}.jsonl"
+            messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}")
+            with open(jsonl_file, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(json.dumps(msg) + "\n")
+
+        convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True)
+
+        # Page 1 should have visible next link (not last page)
+        page1 = (temp_project_dir / "combined_transcripts.html").read_text(
+            encoding="utf-8"
+        )
+        assert "PAGINATION_NEXT_LINK_START" in page1
+        # Should NOT have last-page class on its next link
+        # The pattern should be: class="page-nav-link next" without last-page
+        assert 'class="page-nav-link next"' in page1 or 'next "' not in page1
+
+    def test_multi_page_last_has_hidden_next_link(self, temp_project_dir):
+        """Last page of multi-page should have hidden next link (with last-page class)."""
+        from claude_code_log.converter import convert_jsonl_to_html
+
+        # Create sessions that will span 2 pages
+        for i, session_id in enumerate(["s1", "s2"]):
+            jsonl_file = temp_project_dir / f"{session_id}.jsonl"
+            messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}")
+            with open(jsonl_file, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(json.dumps(msg) + "\n")
+
+        convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True)
+
+        # Page 2 should have hidden next link (is last page)
+        page2 = (temp_project_dir / "combined_transcripts_2.html").read_text(
+            encoding="utf-8"
+        )
+        assert "PAGINATION_NEXT_LINK_START" in page2
+        assert "last-page" in page2

From a50bfb9acd4ef665b8058e5ab085bcc071796380 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Sun, 4 Jan 2026 00:38:21 +0000
Subject: [PATCH 04/23] Possible fix for Windows test fail

---
 test/test_timeline_browser.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/test/test_timeline_browser.py b/test/test_timeline_browser.py
index 8c2260a8..7373b9c5 100644
--- a/test/test_timeline_browser.py
+++ b/test/test_timeline_browser.py
@@ -1042,9 +1042,21 @@ def test_timezone_conversion_functionality(self, page: Page):
 
         page.goto(f"file://{temp_file}")
 
-        # Wait for page to load and timestamp conversion to occur
+        # Wait for page to load
         page.wait_for_load_state("networkidle")
-        page.wait_for_timeout(500)  # Give time for JavaScript to run
+
+        # Wait for JavaScript timestamp conversion to complete
+        # The conversion adds timezone info in parentheses, e.g., "(UTC)" or "(PST)"
+        # Using wait_for_function instead of fixed timeout for deterministic behaviour
+        page.wait_for_function(
+            """
+            () => {
+                const ts = document.querySelector('.timestamp[data-timestamp]');
+                return ts && ts.textContent.includes('(');
+            }
+            """,
+            timeout=5000,
+        )
 
         # Check that timestamp elements have data-timestamp attributes
         timestamp_elements = page.locator(".timestamp[data-timestamp]")

From 43f07afa6a9b0b6ce0769748479b63368a530047 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Sun, 4 Jan 2026 01:31:53 +0000
Subject: [PATCH 05/23] Fix false cache invalidation and preserve archived
 sessions

Two fixes for cache invalidation issues:

1. Filter agent files in modification check
   - get_modified_files() checked all .jsonl including agent-*.jsonl
   - But load_directory_transcripts() excludes agent files
   - Result: unreferenced agent files always marked as "modified"

2. Skip archived sessions in staleness check (preserve data)
   - When JSONL files are deleted, cached sessions remain
   - Instead of pruning, skip them in get_stale_sessions()
   - Show "N archived" count in output for visibility
   - Preserves data for potential future archive/restore features

Output now shows:
  project-name: cached, 3 archived (0.0s)
---
 claude_code_log/cache.py     | 41 +++++++++++++++++++++++++++++++++++-
 claude_code_log/converter.py | 35 ++++++++++++++++++++++++++----
 2 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index d7546b52..97aa7fdf 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -868,9 +868,16 @@ def is_html_stale(
 
         return False, "up_to_date"
 
-    def get_stale_sessions(self) -> List[tuple[str, str]]:
+    def get_stale_sessions(
+        self, valid_session_ids: Optional[set[str]] = None
+    ) -> List[tuple[str, str]]:
         """Get list of sessions that need HTML regeneration.
 
+        Args:
+            valid_session_ids: If provided, only check sessions in this set.
+                Sessions not in this set are considered "archived" (JSONL deleted)
+                and are skipped to avoid perpetual staleness.
+
         Returns:
             List of (session_id, reason) tuples for sessions needing regeneration
         """
@@ -889,6 +896,14 @@ def get_stale_sessions(self) -> List[tuple[str, str]]:
 
             for row in session_rows:
                 session_id = row["session_id"]
+
+                # Skip archived sessions (JSONL deleted but cache remains)
+                if (
+                    valid_session_ids is not None
+                    and session_id not in valid_session_ids
+                ):
+                    continue
+
                 html_path = f"session-{session_id}.html"
 
                 is_stale, reason = self.is_html_stale(html_path, session_id)
@@ -897,6 +912,30 @@ def get_stale_sessions(self) -> List[tuple[str, str]]:
 
         return stale_sessions
 
+    def get_archived_session_count(self, valid_session_ids: set[str]) -> int:
+        """Count sessions in cache whose JSONL files have been deleted.
+
+        These are preserved for potential future archiving/restore features.
+
+        Args:
+            valid_session_ids: Set of session IDs that currently exist in source data
+
+        Returns:
+            Number of archived (orphan) sessions
+        """
+        if self._project_id is None:
+            return 0
+
+        with self._get_connection() as conn:
+            cached_rows = conn.execute(
+                "SELECT session_id FROM sessions WHERE project_id = ?",
+                (self._project_id,),
+            ).fetchall()
+
+            return sum(
+                1 for row in cached_rows if row["session_id"] not in valid_session_ids
+            )
+
     # ========== Page Cache Methods (Pagination) ==========
 
     def get_page_size_config(self) -> Optional[int]:
diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index 151f47b0..07e31e00 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -1548,11 +1548,29 @@ def process_projects_hierarchy(
                     stats.add_warning(f"Failed to initialize cache: {e}")
 
             # Phase 1: Fast check if anything needs updating (mtime comparison only)
-            jsonl_files = list(project_dir.glob("*.jsonl"))
+            # Exclude agent files - they are loaded via session references, not directly
+            jsonl_files = [
+                f
+                for f in project_dir.glob("*.jsonl")
+                if not f.name.startswith("agent-")
+            ]
+            # Valid session IDs are from existing JSONL files (file stem = session ID)
+            valid_session_ids = {f.stem for f in jsonl_files}
             modified_files = (
                 cache_manager.get_modified_files(jsonl_files) if cache_manager else []
             )
-            stale_sessions = cache_manager.get_stale_sessions() if cache_manager else []
+            # Pass valid_session_ids to skip archived sessions (JSONL deleted)
+            stale_sessions = (
+                cache_manager.get_stale_sessions(valid_session_ids)
+                if cache_manager
+                else []
+            )
+            # Count archived sessions (cached but JSONL deleted)
+            archived_count = (
+                cache_manager.get_archived_session_count(valid_session_ids)
+                if cache_manager
+                else 0
+            )
             output_path = project_dir / "combined_transcripts.html"
             # Check combined_stale using the appropriate cache:
             # - Paginated projects store data in html_pages table (via save_page_cache)
@@ -1578,12 +1596,19 @@ def process_projects_hierarchy(
                 or not output_path.exists()
             )
 
+            # Build archived suffix for output (shown on both cached and work paths)
+            archived_suffix = (
+                f", {archived_count} archived" if archived_count > 0 else ""
+            )
+
             if not needs_work:
                 # Fast path: nothing to do, just collect stats for index
                 stats.files_loaded_from_cache = len(jsonl_files)
                 stats.total_time = time.time() - project_start_time
                 # Show progress
-                print(f"  {project_dir.name}: cached ({stats.total_time:.1f}s)")
+                print(
+                    f"  {project_dir.name}: cached{archived_suffix} ({stats.total_time:.1f}s)"
+                )
             else:
                 # Slow path: update cache and regenerate output
                 stats.files_updated = len(modified_files) if modified_files else 0
@@ -1618,7 +1643,9 @@ def process_projects_hierarchy(
                 if stats.sessions_regenerated > 0:
                     progress_parts.append(f"{stats.sessions_regenerated} sessions")
                 detail = ", ".join(progress_parts) if progress_parts else "regenerated"
-                print(f"  {project_dir.name}: {detail} ({stats.total_time:.1f}s)")
+                print(
+                    f"  {project_dir.name}: {detail}{archived_suffix} ({stats.total_time:.1f}s)"
+                )
 
             # Get project info for index - use cached data if available
             # Exclude agent files (they are loaded via session references)

From 4a7ce7ae5d9f3c6f78c83e8e77a471642a0d1750 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <hello@danieldemmel.me>
Date: Sat, 10 Jan 2026 00:40:13 +0000
Subject: [PATCH 06/23] Add archived project support and session deletion (#76)

* Add archived project support and session deletion

Features:
- Show fully archived projects (cached but no JSONL files) in TUI and HTML index
- Projects with [ARCHIVED] badge in project selector and greyed-out styling in index
- Archived projects automatically open in archived view mode
- Delete sessions from cache via 'd' key with confirmation dialog
- Restore JSONL now produces compact JSON format matching original files
- Project list refreshes after restore to update archived status

Implementation:
- Add get_all_cached_projects() to discover archived projects from cache.db
- Add delete_session() and delete_project() methods to CacheManager
- Update CLI to discover and track archived projects, refresh on return
- Update SessionBrowser to handle is_archived_project flag
- Add DeleteConfirmScreen modal for delete confirmation

Tests:
- Add tests for compact JSON export, session/project deletion
- Add tests for get_all_cached_projects with active/archived detection
- Add TUI test for archived project loading in archived mode

* Simplify views to show both archived and current + implement delete as well as archive and restore
---
 claude_code_log/cache.py                      |  209 ++++
 claude_code_log/cli.py                        |  119 +-
 claude_code_log/converter.py                  |  129 +-
 .../components/project_card_styles.css        |   24 +
 claude_code_log/html/templates/index.html     |    6 +-
 claude_code_log/tui.py                        |  987 +++++++++++++++-
 dev-docs/restoring-archived-sessions.md       |  100 ++
 test/__snapshots__/test_snapshot_html.ambr    |   28 +
 test/test_cache_integration.py                |  421 +++++++
 test/test_tui.py                              | 1036 ++++++++++++++++-
 10 files changed, 2973 insertions(+), 86 deletions(-)
 create mode 100644 dev-docs/restoring-archived-sessions.md

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 97aa7fdf..8e37d477 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -936,6 +936,105 @@ def get_archived_session_count(self, valid_session_ids: set[str]) -> int:
                 1 for row in cached_rows if row["session_id"] not in valid_session_ids
             )
 
+    def get_archived_sessions(
+        self, valid_session_ids: set[str]
+    ) -> Dict[str, SessionCacheData]:
+        """Get session data for archived sessions (cached but JSONL deleted).
+
+        Args:
+            valid_session_ids: Set of session IDs that currently exist in source data
+
+        Returns:
+            Dict mapping session_id to SessionCacheData for archived sessions
+        """
+        if self._project_id is None:
+            return {}
+
+        archived_sessions: Dict[str, SessionCacheData] = {}
+
+        with self._get_connection() as conn:
+            session_rows = conn.execute(
+                "SELECT * FROM sessions WHERE project_id = ?",
+                (self._project_id,),
+            ).fetchall()
+
+            for row in session_rows:
+                session_id = row["session_id"]
+                if session_id not in valid_session_ids:
+                    archived_sessions[session_id] = SessionCacheData(
+                        session_id=session_id,
+                        summary=row["summary"],
+                        first_timestamp=row["first_timestamp"],
+                        last_timestamp=row["last_timestamp"],
+                        message_count=row["message_count"],
+                        first_user_message=row["first_user_message"],
+                        cwd=row["cwd"],
+                        total_input_tokens=row["total_input_tokens"],
+                        total_output_tokens=row["total_output_tokens"],
+                        total_cache_creation_tokens=row["total_cache_creation_tokens"],
+                        total_cache_read_tokens=row["total_cache_read_tokens"],
+                    )
+
+        return archived_sessions
+
+    def export_session_to_jsonl(self, session_id: str) -> List[str]:
+        """Export all message content JSONs for a session, for JSONL restoration.
+
+        Args:
+            session_id: The session ID to export
+
+        Returns:
+            List of JSON strings (one per line for JSONL file), compact format
+        """
+        if self._project_id is None:
+            return []
+
+        with self._get_connection() as conn:
+            rows = conn.execute(
+                """SELECT content FROM messages
+                   WHERE project_id = ? AND session_id = ?
+                   ORDER BY timestamp NULLS LAST""",
+                (self._project_id, session_id),
+            ).fetchall()
+
+        # Re-serialize to compact JSON format (no spaces after separators)
+        # to match original JSONL file format
+        result: List[str] = []
+        for row in rows:
+            try:
+                parsed = json.loads(row["content"])
+                compact = json.dumps(parsed, separators=(",", ":"))
+                result.append(compact)
+            except json.JSONDecodeError:
+                # If parsing fails, use original content
+                result.append(row["content"])
+        return result
+
+    def load_session_entries(self, session_id: str) -> List[TranscriptEntry]:
+        """Load transcript entries for a session from cache.
+
+        Used for rendering archived sessions to HTML/Markdown when
+        the original JSONL file no longer exists.
+
+        Args:
+            session_id: The session ID to load
+
+        Returns:
+            List of TranscriptEntry objects for the session
+        """
+        if self._project_id is None:
+            return []
+
+        with self._get_connection() as conn:
+            rows = conn.execute(
+                """SELECT content FROM messages
+                   WHERE project_id = ? AND session_id = ?
+                   ORDER BY timestamp NULLS LAST""",
+                (self._project_id, session_id),
+            ).fetchall()
+
+        return [self._deserialize_entry(row) for row in rows]
+
     # ========== Page Cache Methods (Pagination) ==========
 
     def get_page_size_config(self) -> Optional[int]:
@@ -1224,6 +1323,115 @@ def get_page_count(self) -> int:
 
         return row["cnt"] if row else 0
 
+    def delete_session(self, session_id: str) -> bool:
+        """Delete a session and its messages from cache.
+
+        Args:
+            session_id: The session ID to delete
+
+        Returns:
+            True if session was deleted, False if not found
+        """
+        if self._project_id is None:
+            return False
+
+        with self._get_connection() as conn:
+            # Check if session exists
+            row = conn.execute(
+                "SELECT id FROM sessions WHERE project_id = ? AND session_id = ?",
+                (self._project_id, session_id),
+            ).fetchone()
+
+            if not row:
+                return False
+
+            # Delete messages for this session
+            conn.execute(
+                "DELETE FROM messages WHERE project_id = ? AND session_id = ?",
+                (self._project_id, session_id),
+            )
+
+            # Delete HTML cache entries for this session
+            conn.execute(
+                "DELETE FROM html_cache WHERE project_id = ? AND source_session_id = ?",
+                (self._project_id, session_id),
+            )
+
+            # Delete the session record
+            conn.execute(
+                "DELETE FROM sessions WHERE project_id = ? AND session_id = ?",
+                (self._project_id, session_id),
+            )
+
+            self._update_last_updated(conn)
+            conn.commit()
+
+        return True
+
+    def delete_project(self) -> bool:
+        """Delete this project and all its data from cache.
+
+        Returns:
+            True if project was deleted, False if not found
+        """
+        if self._project_id is None:
+            return False
+
+        with self._get_connection() as conn:
+            # Cascade delete handles messages, sessions, cached_files, html_cache, html_pages
+            conn.execute("DELETE FROM projects WHERE id = ?", (self._project_id,))
+            conn.commit()
+
+        self._project_id = None
+        return True
+
+
+def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]:
+    """Get all projects from cache, indicating which are archived.
+
+    This is a standalone function that queries the cache.db directly
+    to find all project paths, without needing to instantiate CacheManager
+    for each project.
+
+    Args:
+        projects_dir: Path to the projects directory (e.g., ~/.claude/projects)
+
+    Returns:
+        List of (project_path, is_archived) tuples.
+        is_archived is True if the project has no JSONL files but exists in cache.
+    """
+    db_path = projects_dir / "cache.db"
+    if not db_path.exists():
+        return []
+
+    result: List[tuple[str, bool]] = []
+
+    try:
+        conn = sqlite3.connect(db_path, timeout=30.0)
+        conn.row_factory = sqlite3.Row
+        try:
+            rows = conn.execute(
+                "SELECT project_path FROM projects ORDER BY project_path"
+            ).fetchall()
+
+            for row in rows:
+                project_path = Path(row["project_path"])
+                # Check if project has JSONL files (non-archived)
+                has_jsonl = (
+                    bool(list(project_path.glob("*.jsonl")))
+                    if project_path.exists()
+                    else False
+                )
+                # is_archived = project exists in cache but has no JSONL files
+                is_archived = not has_jsonl
+                result.append((row["project_path"], is_archived))
+        finally:
+            conn.close()
+    except Exception:
+        pass
+
+    return result
+
 
 __all__ = [
     "CacheManager",
@@ -1232,5 +1440,6 @@ def get_page_count(self) -> int:
     "PageCacheData",
     "ProjectCache",
     "SessionCacheData",
+    "get_all_cached_projects",
     "get_library_version",
 ]
diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py
index 90f0e4b3..19b4bb9d 100644
--- a/claude_code_log/cli.py
+++ b/claude_code_log/cli.py
@@ -17,7 +17,7 @@
     get_file_extension,
     process_projects_hierarchy,
 )
-from .cache import CacheManager, get_library_version
+from .cache import CacheManager, get_all_cached_projects, get_library_version
 
 
 def get_default_projects_dir() -> Path:
@@ -25,36 +25,75 @@ def get_default_projects_dir() -> Path:
     return Path.home() / ".claude" / "projects"
 
 
-def _launch_tui_with_cache_check(project_path: Path) -> Optional[str]:
+def _discover_projects(
+    projects_dir: Path,
+) -> tuple[list[Path], set[Path]]:
+    """Discover active and archived projects in the projects directory.
+
+    Returns:
+        Tuple of (all_project_dirs, archived_projects_set)
+    """
+    # Find active projects (directories with JSONL files)
+    project_dirs = [
+        d for d in projects_dir.iterdir() if d.is_dir() and list(d.glob("*.jsonl"))
+    ]
+
+    # Find archived projects (in cache but without JSONL files)
+    archived_projects: set[Path] = set()
+    cached_projects = get_all_cached_projects(projects_dir)
+    active_project_paths = {str(p) for p in project_dirs}
+    for project_path_str, is_archived in cached_projects:
+        if is_archived and project_path_str not in active_project_paths:
+            archived_path = Path(project_path_str)
+            archived_projects.add(archived_path)
+            project_dirs.append(archived_path)
+
+    return project_dirs, archived_projects
+
+
+def _launch_tui_with_cache_check(
+    project_path: Path, is_archived: bool = False
+) -> Optional[str]:
     """Launch TUI with proper cache checking and user feedback."""
     click.echo("Checking cache and loading session data...")
 
     # Check if we need to rebuild cache
     cache_manager = CacheManager(project_path, get_library_version())
-    jsonl_files = list(project_path.glob("*.jsonl"))
-    modified_files = cache_manager.get_modified_files(jsonl_files)
     project_cache = cache_manager.get_cached_project_data()
 
-    if not (project_cache and project_cache.sessions and not modified_files):
-        # Need to rebuild cache
-        if modified_files:
+    if is_archived:
+        # Archived projects have no JSONL files, just load from cache
+        if project_cache and project_cache.sessions:
             click.echo(
-                f"Found {len(modified_files)} modified files, rebuilding cache..."
+                f"[ARCHIVED] Found {len(project_cache.sessions)} sessions in cache. Launching TUI..."
             )
         else:
-            click.echo("Building session cache...")
-
-        # Pre-build the cache before launching TUI (no HTML generation)
-        try:
-            ensure_fresh_cache(project_path, cache_manager, silent=True)
-            click.echo("Cache ready! Launching TUI...")
-        except Exception as e:
-            click.echo(f"Error building cache: {e}", err=True)
+            click.echo("Error: No cached sessions found for archived project", err=True)
             return None
     else:
-        click.echo(
-            f"Cache up to date. Found {len(project_cache.sessions)} sessions. Launching TUI..."
-        )
+        jsonl_files = list(project_path.glob("*.jsonl"))
+        modified_files = cache_manager.get_modified_files(jsonl_files)
+
+        if not (project_cache and project_cache.sessions and not modified_files):
+            # Need to rebuild cache
+            if modified_files:
+                click.echo(
+                    f"Found {len(modified_files)} modified files, rebuilding cache..."
+                )
+            else:
+                click.echo("Building session cache...")
+
+            # Pre-build the cache before launching TUI (no HTML generation)
+            try:
+                ensure_fresh_cache(project_path, cache_manager, silent=True)
+                click.echo("Cache ready! Launching TUI...")
+            except Exception as e:
+                click.echo(f"Error building cache: {e}", err=True)
+                return None
+        else:
+            click.echo(
+                f"Cache up to date. Found {len(project_cache.sessions)} sessions. Launching TUI..."
+            )
 
     # Small delay to let user see the message before TUI clears screen
     import time
@@ -63,7 +102,7 @@ def _launch_tui_with_cache_check(project_path: Path) -> Optional[str]:
 
     from .tui import run_session_browser
 
-    result = run_session_browser(project_path)
+    result = run_session_browser(project_path, is_archived=is_archived)
     return result
 
 
@@ -511,11 +550,8 @@ def main(
                     click.echo(f"Error: Projects directory not found: {input_path}")
                     return
 
-                project_dirs = [
-                    d
-                    for d in input_path.iterdir()
-                    if d.is_dir() and list(d.glob("*.jsonl"))
-                ]
+                # Initial project discovery
+                project_dirs, archived_projects = _discover_projects(input_path)
 
                 if not project_dirs:
                     click.echo(f"No projects with JSONL files found in {input_path}")
@@ -524,7 +560,7 @@ def main(
                 # Try to find projects that match current working directory
                 matching_projects = find_projects_by_cwd(input_path)
 
-                if len(project_dirs) == 1:
+                if len(project_dirs) == 1 and not archived_projects:
                     # Only one project, open it directly
                     result = _launch_tui_with_cache_check(project_dirs[0])
                     if result == "back_to_projects":
@@ -532,14 +568,21 @@ def main(
                         from .tui import run_project_selector
 
                         while True:
+                            # Re-discover projects (may have changed after restore)
+                            project_dirs, archived_projects = _discover_projects(
+                                input_path
+                            )
                             selected_project = run_project_selector(
-                                project_dirs, matching_projects
+                                project_dirs, matching_projects, archived_projects
                             )
                             if not selected_project:
                                 # User cancelled
                                 return
 
-                            result = _launch_tui_with_cache_check(selected_project)
+                            is_archived = selected_project in archived_projects
+                            result = _launch_tui_with_cache_check(
+                                selected_project, is_archived=is_archived
+                            )
                             if result != "back_to_projects":
                                 # User quit normally
                                 return
@@ -555,14 +598,21 @@ def main(
                         from .tui import run_project_selector
 
                         while True:
+                            # Re-discover projects (may have changed after restore)
+                            project_dirs, archived_projects = _discover_projects(
+                                input_path
+                            )
                             selected_project = run_project_selector(
-                                project_dirs, matching_projects
+                                project_dirs, matching_projects, archived_projects
                             )
                             if not selected_project:
                                 # User cancelled
                                 return
 
-                            result = _launch_tui_with_cache_check(selected_project)
+                            is_archived = selected_project in archived_projects
+                            result = _launch_tui_with_cache_check(
+                                selected_project, is_archived=is_archived
+                            )
                             if result != "back_to_projects":
                                 # User quit normally
                                 return
@@ -572,14 +622,19 @@ def main(
                     from .tui import run_project_selector
 
                     while True:
+                        # Re-discover projects each iteration (may have changed after restore)
+                        project_dirs, archived_projects = _discover_projects(input_path)
                         selected_project = run_project_selector(
-                            project_dirs, matching_projects
+                            project_dirs, matching_projects, archived_projects
                         )
                         if not selected_project:
                             # User cancelled
                             return
 
-                        result = _launch_tui_with_cache_check(selected_project)
+                        is_archived = selected_project in archived_projects
+                        result = _launch_tui_with_cache_check(
+                            selected_project, is_archived=is_archived
+                        )
                         if result != "back_to_projects":
                             # User quit normally
                             return
diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index 07e31e00..6b5606f5 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -20,7 +20,12 @@
     create_session_preview,
     get_warmup_session_ids,
 )
-from .cache import CacheManager, SessionCacheData, get_library_version
+from .cache import (
+    CacheManager,
+    SessionCacheData,
+    get_all_cached_projects,
+    get_library_version,
+)
 from .parser import parse_timestamp
 from .factories import create_transcript_entry
 from .models import (
@@ -1477,6 +1482,49 @@ def _generate_individual_session_files(
     return regenerated_count
 
 
+def _get_cleanup_period_days() -> Optional[int]:
+    """Read cleanupPeriodDays from Claude Code settings.
+
+    Checks ~/.claude/settings.json for the cleanupPeriodDays setting.
+
+    Returns:
+        The configured cleanup period in days, or None if not set/readable.
+    """
+    import json
+
+    settings_path = Path.home() / ".claude" / "settings.json"
+    if not settings_path.exists():
+        return None
+
+    try:
+        with open(settings_path, "r", encoding="utf-8") as f:
+            settings = json.load(f)
+        return settings.get("cleanupPeriodDays")
+    except (json.JSONDecodeError, OSError):
+        return None
+
+
+def _print_archived_sessions_note(total_archived: int) -> None:
+    """Print a note about archived sessions and how to restore them.
+
+    Args:
+        total_archived: Total number of archived sessions across all projects.
+    """
+    cleanup_days = _get_cleanup_period_days()
+    cleanup_info = (
+        f" (cleanupPeriodDays: {cleanup_days})"
+        if cleanup_days is not None
+        else " (cleanupPeriodDays: 30 default)"
+    )
+
+    print(
+        f"\nNote: {total_archived} archived session(s) found{cleanup_info}.\n"
+        "  These sessions were cached before their JSONL files were deleted.\n"
+        "  To restore them or adjust cleanup settings, see:\n"
+        "  https://github.com/daaain/claude-code-log/blob/main/dev-docs/restoring-archived-sessions.md"
+    )
+
+
 def process_projects_hierarchy(
     projects_path: Path,
     from_date: Optional[str] = None,
@@ -1514,7 +1562,16 @@ def process_projects_hierarchy(
         if child.is_dir() and list(child.glob("*.jsonl")):
             project_dirs.append(child)
 
-    if not project_dirs:
+    # Find archived projects (projects in cache but without JSONL files)
+    archived_project_dirs: list[Path] = []
+    if use_cache:
+        cached_projects = get_all_cached_projects(projects_path)
+        active_project_paths = {str(p) for p in project_dirs}
+        for project_path_str, is_archived in cached_projects:
+            if is_archived and project_path_str not in active_project_paths:
+                archived_project_dirs.append(Path(project_path_str))
+
+    if not project_dirs and not archived_project_dirs:
         raise FileNotFoundError(
             f"No project directories with JSONL files found in {projects_path}"
         )
@@ -1530,6 +1587,7 @@ def process_projects_hierarchy(
     total_projects = len(project_dirs)
     projects_with_updates = 0
     total_sessions = 0
+    total_archived = 0
 
     # Per-project stats for summary output
     project_stats: List[tuple[str, GenerationStats]] = []
@@ -1571,6 +1629,7 @@ def process_projects_hierarchy(
                 if cache_manager
                 else 0
             )
+            total_archived += archived_count
             output_path = project_dir / "combined_transcripts.html"
             # Check combined_stale using the appropriate cache:
             # - Paginated projects store data in html_pages table (via save_page_cache)
@@ -1681,6 +1740,7 @@ def process_projects_hierarchy(
                             "latest_timestamp": cached_project_data.latest_timestamp,
                             "earliest_timestamp": cached_project_data.earliest_timestamp,
                             "working_directories": cache_manager.get_working_directories(),
+                            "is_archived": False,
                             "sessions": [
                                 {
                                     "id": session_data.session_id,
@@ -1790,6 +1850,7 @@ def process_projects_hierarchy(
                     "working_directories": cache_manager.get_working_directories()
                     if cache_manager
                     else [],
+                    "is_archived": False,
                     "sessions": sessions_data,
                 }
             )
@@ -1808,6 +1869,66 @@ def process_projects_hierarchy(
             )
             continue
 
+    # Process archived projects (projects in cache but without JSONL files)
+    archived_project_count = 0
+    for archived_dir in sorted(archived_project_dirs):
+        try:
+            # Initialize cache manager for archived project
+            cache_manager = CacheManager(archived_dir, library_version)
+            cached_project_data = cache_manager.get_cached_project_data()
+
+            if cached_project_data is None:
+                continue
+
+            archived_project_count += 1
+            print(
+                f"  {archived_dir.name}: [ARCHIVED] ({len(cached_project_data.sessions)} sessions)"
+            )
+
+            # Add archived project to summaries
+            project_summaries.append(
+                {
+                    "name": archived_dir.name,
+                    "path": archived_dir,
+                    "html_file": f"{archived_dir.name}/combined_transcripts.html",
+                    "jsonl_count": 0,
+                    "message_count": cached_project_data.total_message_count,
+                    "last_modified": 0.0,
+                    "total_input_tokens": cached_project_data.total_input_tokens,
+                    "total_output_tokens": cached_project_data.total_output_tokens,
+                    "total_cache_creation_tokens": cached_project_data.total_cache_creation_tokens,
+                    "total_cache_read_tokens": cached_project_data.total_cache_read_tokens,
+                    "latest_timestamp": cached_project_data.latest_timestamp,
+                    "earliest_timestamp": cached_project_data.earliest_timestamp,
+                    "working_directories": cache_manager.get_working_directories(),
+                    "is_archived": True,
+                    "sessions": [
+                        {
+                            "id": session_data.session_id,
+                            "summary": session_data.summary,
+                            "timestamp_range": format_timestamp_range(
+                                session_data.first_timestamp,
+                                session_data.last_timestamp,
+                            ),
+                            "first_timestamp": session_data.first_timestamp,
+                            "last_timestamp": session_data.last_timestamp,
+                            "message_count": session_data.message_count,
+                            "first_user_message": session_data.first_user_message
+                            or "[No user message found in session.]",
+                        }
+                        for session_data in cached_project_data.sessions.values()
+                        if session_data.first_user_message
+                        and session_data.first_user_message != "Warmup"
+                    ],
+                }
+            )
+        except Exception as e:
+            print(f"Warning: Failed to process archived project {archived_dir}: {e}")
+            continue
+
+    # Update total projects count to include archived
+    total_projects = len(project_dirs) + archived_project_count
+
     # Generate index (always regenerate if outdated)
     ext = get_file_extension(output_format)
     index_path = projects_path / f"index.{ext}"
@@ -1846,4 +1967,8 @@ def process_projects_hierarchy(
         summary_parts.append("  Index regenerated")
     print("\n".join(summary_parts))
 
+    # Show archived sessions note if any exist
+    if total_archived > 0:
+        _print_archived_sessions_note(total_archived)
+
     return index_path
diff --git a/claude_code_log/html/templates/components/project_card_styles.css b/claude_code_log/html/templates/components/project_card_styles.css
index ebae9931..c6d40f2f 100644
--- a/claude_code_log/html/templates/components/project_card_styles.css
+++ b/claude_code_log/html/templates/components/project_card_styles.css
@@ -111,4 +111,28 @@
 
 .project-sessions details[open] summary {
     margin-bottom: 10px;
+}
+
+/* Archived project styling */
+.project-card.archived {
+    opacity: 0.6;
+    background-color: #f5f5f522;
+}
+
+.project-card.archived:hover {
+    opacity: 0.8;
+}
+
+.archived-badge {
+    display: inline-block;
+    background-color: #888;
+    color: white;
+    font-size: 0.65em;
+    font-weight: 600;
+    padding: 2px 8px;
+    border-radius: 4px;
+    margin-left: 10px;
+    vertical-align: middle;
+    text-transform: uppercase;
+    letter-spacing: 0.5px;
 }
\ No newline at end of file
diff --git a/claude_code_log/html/templates/index.html b/claude_code_log/html/templates/index.html
index a539386a..4b2bf430 100644
--- a/claude_code_log/html/templates/index.html
+++ b/claude_code_log/html/templates/index.html
@@ -59,10 +59,14 @@ <h1>{{ title }}</h1>
 
     <div class='project-list'>
         {% for project in projects %}
-        <div class='project-card'>
+        <div class='project-card{% if project.is_archived %} archived{% endif %}'>
             <div class='project-name'>
                 <a href='{{ project.html_file }}'>{{ project.display_name }}</a>
+                {% if project.is_archived %}
+                <span class="archived-badge">Archived</span>
+                {% else %}
                 <span class="transcript-link-hint">(← open combined transcript)</span>
+                {% endif %}
             </div>
             <div class='project-stats'>
                 <div class='stat'>📁 {{ project.jsonl_count }} transcript files</div>
diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 7f1b398a..63c1d5f3 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -41,7 +41,7 @@ class ProjectSelector(App[Path]):
         border: solid $primary;
         margin-bottom: 1;
     }
-    
+
     DataTable {
         height: auto;
     }
@@ -50,7 +50,11 @@ class ProjectSelector(App[Path]):
     TITLE = "Claude Code Log - Project Selector"
     BINDINGS: ClassVar[list[BindingType]] = [
         Binding("q", "quit", "Quit"),
+        Binding("escape", "quit", "Quit", show=False),
         Binding("s", "select_project", "Select Project"),
+        Binding("a", "archive_project", "Archive Project"),
+        Binding("d", "delete_project", "Delete Project"),
+        Binding("r", "restore_project", "Restore Project"),
     ]
 
     selected_project_path: reactive[Optional[Path]] = reactive(
@@ -58,13 +62,20 @@ class ProjectSelector(App[Path]):
     )
     projects: list[Path]
     matching_projects: list[Path]
-
-    def __init__(self, projects: list[Path], matching_projects: list[Path]):
+    archived_projects: set[Path]
+
+    def __init__(
+        self,
+        projects: list[Path],
+        matching_projects: list[Path],
+        archived_projects: Optional[set[Path]] = None,
+    ):
         """Initialize the project selector."""
         super().__init__()
         self.theme = "gruvbox"
         self.projects = projects
         self.matching_projects = matching_projects
+        self.archived_projects = archived_projects or set()
 
     def compose(self) -> ComposeResult:
         """Create the UI layout."""
@@ -105,18 +116,21 @@ def populate_table(self) -> None:
 
         # Add rows
         for project_path in self.projects:
+            is_archived = project_path in self.archived_projects
             try:
                 cache_manager = CacheManager(project_path, get_library_version())
                 project_cache = cache_manager.get_cached_project_data()
 
                 if not project_cache or not project_cache.sessions:
-                    try:
-                        ensure_fresh_cache(project_path, cache_manager, silent=True)
-                        # Reload cache after ensuring it's fresh
-                        project_cache = cache_manager.get_cached_project_data()
-                    except Exception:
-                        # If cache building fails, continue with empty cache
-                        project_cache = None
+                    if not is_archived:
+                        # Only try to build cache for non-archived projects
+                        try:
+                            ensure_fresh_cache(project_path, cache_manager, silent=True)
+                            # Reload cache after ensuring it's fresh
+                            project_cache = cache_manager.get_cached_project_data()
+                        except Exception:
+                            # If cache building fails, continue with empty cache
+                            project_cache = None
 
                 # Get project info
                 session_count = (
@@ -132,6 +146,10 @@ def populate_table(self) -> None:
                 if project_path in self.matching_projects:
                     project_display = f"→ {project_display[2:]}"
 
+                # Add archived indicator
+                if is_archived:
+                    project_display = f"{project_display} [ARCHIVED]"
+
                 table.add_row(
                     project_display,
                     str(session_count),
@@ -141,6 +159,8 @@ def populate_table(self) -> None:
                 project_display = f"  {project_path.name}"
                 if project_path in self.matching_projects:
                     project_display = f"→ {project_display[2:]}"
+                if is_archived:
+                    project_display = f"{project_display} [ARCHIVED]"
 
                 table.add_row(
                     project_display,
@@ -151,6 +171,10 @@ def on_data_table_row_highlighted(self, _event: DataTable.RowHighlighted) -> Non
         """Handle row highlighting (cursor movement) in the projects table."""
         self._update_selected_project_from_cursor()
 
+    def on_data_table_row_selected(self, _event: DataTable.RowSelected) -> None:
+        """Handle row selection (Enter key) in the projects table."""
+        self.action_select_project()
+
     def _update_selected_project_from_cursor(self) -> None:
         """Update the selected project based on the current cursor position."""
         try:
@@ -164,6 +188,10 @@ def _update_selected_project_from_cursor(self) -> None:
                 if project_display.startswith("→"):
                     project_display = project_display[1:].strip()
 
+                # Remove the archived indicator if present
+                if project_display.endswith(" [ARCHIVED]"):
+                    project_display = project_display[:-11].strip()
+
                 # Find the matching project path
                 for project_path in self.projects:
                     if project_path.name == project_display:
@@ -186,6 +214,185 @@ async def action_quit(self) -> None:
         """Quit the application with proper cleanup."""
         self.exit(None)
 
+    def _get_project_session_count(self, project_path: Path) -> int:
+        """Get the number of sessions in a project from cache."""
+        try:
+            cache_manager = CacheManager(project_path, get_library_version())
+            project_cache = cache_manager.get_cached_project_data()
+            if project_cache and project_cache.sessions:
+                return len(project_cache.sessions)
+        except Exception:
+            pass
+        return 0
+
+    def _is_project_archived(self, project_path: Path) -> bool:
+        """Check if a project is archived (no JSONL files exist)."""
+        return project_path in self.archived_projects
+
+    def check_action(
+        self,
+        action: str,
+        parameters: tuple[object, ...],  # noqa: ARG002
+    ) -> bool | None:
+        """Control which actions are available based on context."""
+        project_path = self.selected_project_path
+        is_archived = project_path in self.archived_projects if project_path else False
+
+        if action == "archive_project":
+            # Can only archive non-archived projects
+            return project_path is not None and not is_archived
+        elif action == "restore_project":
+            # Can only restore archived projects
+            return project_path is not None and is_archived
+        elif action == "delete_project":
+            # Can delete any project
+            return project_path is not None
+
+        # Allow all other actions (quit, select_project, etc.)
+        return True
+
+    def action_archive_project(self) -> None:
+        """Archive all sessions in the selected project."""
+        if not self.selected_project_path:
+            self.notify("No project selected", severity="warning")
+            return
+
+        if self._is_project_archived(self.selected_project_path):
+            self.notify("Project is already archived", severity="warning")
+            return
+
+        session_count = self._get_project_session_count(self.selected_project_path)
+        self.push_screen(
+            ArchiveProjectConfirmScreen(self.selected_project_path.name, session_count),
+            self._handle_archive_project_confirm,
+        )
+
+    def _handle_archive_project_confirm(self, confirmed: bool | None) -> None:
+        """Handle the result of the archive project confirmation dialog."""
+        if not confirmed or not self.selected_project_path:
+            return
+
+        project_path = self.selected_project_path
+        archived_count = 0
+
+        # Delete all JSONL files in the project
+        for jsonl_file in project_path.glob("*.jsonl"):
+            try:
+                jsonl_file.unlink()
+                archived_count += 1
+            except Exception as e:
+                self.notify(
+                    f"Failed to delete {jsonl_file.name}: {e}", severity="error"
+                )
+
+        if archived_count > 0:
+            self.notify(f"Archived {archived_count} sessions")
+            # Add to archived projects set
+            self.archived_projects.add(project_path)
+            self.populate_table()
+
+    def action_delete_project(self) -> None:
+        """Delete the selected project from cache (and optionally JSONL files)."""
+        if not self.selected_project_path:
+            self.notify("No project selected", severity="warning")
+            return
+
+        is_archived = self._is_project_archived(self.selected_project_path)
+        session_count = self._get_project_session_count(self.selected_project_path)
+        self.push_screen(
+            DeleteProjectConfirmScreen(
+                self.selected_project_path.name, session_count, is_archived
+            ),
+            self._handle_delete_project_confirm,
+        )
+
+    def _handle_delete_project_confirm(self, result: Optional[str]) -> None:
+        """Handle the result of the delete project confirmation dialog."""
+        if not result or not self.selected_project_path:
+            return
+
+        project_path = self.selected_project_path
+
+        # Delete cache
+        cache_manager = CacheManager(project_path, get_library_version())
+        cache_manager.clear_cache()
+
+        # If deleting both, also delete JSONL files
+        if result == "both":
+            for jsonl_file in project_path.glob("*.jsonl"):
+                try:
+                    jsonl_file.unlink()
+                except Exception as e:
+                    self.notify(
+                        f"Failed to delete {jsonl_file.name}: {e}", severity="error"
+                    )
+
+        # Remove from projects list
+        if project_path in self.projects:
+            self.projects.remove(project_path)
+        if project_path in self.matching_projects:
+            self.matching_projects.remove(project_path)
+        if project_path in self.archived_projects:
+            self.archived_projects.discard(project_path)
+
+        self.notify(f"Deleted project: {project_path.name}")
+        self.selected_project_path = None
+        self.populate_table()
+
+    def action_restore_project(self) -> None:
+        """Restore all archived sessions in the selected project."""
+        if not self.selected_project_path:
+            self.notify("No project selected", severity="warning")
+            return
+
+        if not self._is_project_archived(self.selected_project_path):
+            self.notify("Project is not archived", severity="warning")
+            return
+
+        session_count = self._get_project_session_count(self.selected_project_path)
+        self.push_screen(
+            RestoreProjectConfirmScreen(self.selected_project_path.name, session_count),
+            self._handle_restore_project_confirm,
+        )
+
+    def _handle_restore_project_confirm(self, confirmed: bool | None) -> None:
+        """Handle the result of the restore project confirmation dialog."""
+        if not confirmed or not self.selected_project_path:
+            return
+
+        project_path = self.selected_project_path
+        cache_manager = CacheManager(project_path, get_library_version())
+        project_cache = cache_manager.get_cached_project_data()
+
+        if not project_cache or not project_cache.sessions:
+            self.notify("No sessions to restore", severity="warning")
+            return
+
+        # Ensure project directory exists
+        project_path.mkdir(parents=True, exist_ok=True)
+
+        restored_count = 0
+        for session_id in project_cache.sessions:
+            jsonl_path = project_path / f"{session_id}.jsonl"
+            if not jsonl_path.exists():
+                try:
+                    messages = cache_manager.export_session_to_jsonl(session_id)
+                    if messages:
+                        with open(jsonl_path, "w", encoding="utf-8") as f:
+                            for msg in messages:
+                                f.write(msg + "\n")
+                        restored_count += 1
+                except Exception as e:
+                    self.notify(
+                        f"Failed to restore {session_id}: {e}", severity="error"
+                    )
+
+        if restored_count > 0:
+            self.notify(f"Restored {restored_count} sessions")
+            # Remove from archived projects set
+            self.archived_projects.discard(project_path)
+            self.populate_table()
+
 
 class MarkdownViewerScreen(ModalScreen[None]):
     """Modal screen for viewing Markdown content with table of contents."""
@@ -309,6 +516,406 @@ async def action_dismiss(self, result: None = None) -> None:
         self.dismiss(result)
 
 
+class ArchiveConfirmScreen(ModalScreen[bool]):
+    """Modal screen for confirming session archiving (delete JSONL, keep cache)."""
+
+    CSS = """
+    ArchiveConfirmScreen {
+        align: center middle;
+    }
+
+    #archive-container {
+        width: 65;
+        height: auto;
+        border: solid $warning;
+        background: $surface;
+        padding: 1 2;
+    }
+
+    #archive-title {
+        text-align: center;
+        text-style: bold;
+        color: $warning;
+        margin-bottom: 1;
+    }
+
+    #archive-message {
+        margin-bottom: 1;
+    }
+
+    #archive-info {
+        color: $text-muted;
+        margin-bottom: 1;
+    }
+
+    #archive-buttons {
+        text-align: center;
+        height: auto;
+    }
+    """
+
+    BINDINGS: ClassVar[list[BindingType]] = [
+        Binding("y", "confirm", "Yes"),
+        Binding("enter", "confirm", "Confirm", show=False),
+        Binding("n", "cancel", "No"),
+        Binding("escape", "cancel", "Cancel", show=False),
+    ]
+
+    def __init__(self, session_id: str) -> None:
+        super().__init__()
+        self.session_id = session_id
+
+    def compose(self) -> ComposeResult:
+        with Container(id="archive-container"):
+            yield Static("Archive Session", id="archive-title")
+            yield Static(
+                f"Session: {self.session_id[:8]}...",
+                id="archive-message",
+            )
+            yield Static(
+                "This will delete the JSONL file.\n"
+                "The session will be archived and can be restored from cache.",
+                id="archive-info",
+            )
+            yield Static("\\[Enter/y] Yes  \\[Esc/n] No", id="archive-buttons")
+
+    def action_confirm(self) -> None:
+        self.dismiss(True)
+
+    def action_cancel(self) -> None:
+        self.dismiss(False)
+
+
+class DeleteConfirmScreen(ModalScreen[Optional[str]]):
+    """Modal screen for confirming session deletion with smart options."""
+
+    CSS = """
+    DeleteConfirmScreen {
+        align: center middle;
+    }
+
+    #delete-container {
+        width: 65;
+        height: auto;
+        border: solid $error;
+        background: $surface;
+        padding: 1 2;
+    }
+
+    #delete-title {
+        text-align: center;
+        text-style: bold;
+        color: $error;
+        margin-bottom: 1;
+    }
+
+    #delete-message {
+        margin-bottom: 1;
+    }
+
+    #delete-warning {
+        color: $warning;
+        margin-bottom: 1;
+    }
+
+    #delete-buttons {
+        text-align: center;
+        height: auto;
+    }
+    """
+
+    BINDINGS: ClassVar[list[BindingType]] = [
+        Binding("c", "delete_cache", "Cache only"),
+        Binding("b", "delete_both", "Both", show=False),
+        Binding("y", "delete_cache", "Yes", show=False),
+        Binding("enter", "delete_cache", "Confirm", show=False),
+        Binding("n", "cancel", "No"),
+        Binding("escape", "cancel", "Cancel", show=False),
+    ]
+
+    def __init__(self, session_id: str, is_archived: bool = False) -> None:
+        super().__init__()
+        self.session_id = session_id
+        self.is_archived = is_archived
+
+    def compose(self) -> ComposeResult:
+        with Container(id="delete-container"):
+            yield Static("Delete Session", id="delete-title")
+            yield Static(
+                f"Session: {self.session_id[:8]}...",
+                id="delete-message",
+            )
+            if self.is_archived:
+                yield Static(
+                    "This is an archived session with no JSONL file.\n"
+                    "Deletion is PERMANENT and cannot be undone!",
+                    id="delete-warning",
+                )
+                yield Static(
+                    "\\[Enter/y/c] Delete from cache  \\[Esc/n] Cancel",
+                    id="delete-buttons",
+                )
+            else:
+                yield Static(
+                    "Choose what to delete:\n"
+                    "• Cache only: JSONL file remains, session can be re-parsed\n"
+                    "• Both: Delete JSONL file AND cache (permanent!)",
+                    id="delete-warning",
+                )
+                yield Static(
+                    "\\[c] Cache only   \\[b] Both (permanent)   \\[Esc/n] Cancel",
+                    id="delete-buttons",
+                )
+
+    def action_delete_cache(self) -> None:
+        self.dismiss("cache_only")
+
+    def action_delete_both(self) -> None:
+        if not self.is_archived:
+            self.dismiss("both")
+
+    def action_cancel(self) -> None:
+        self.dismiss(None)
+
+
+class ArchiveProjectConfirmScreen(ModalScreen[bool]):
+    """Modal screen for confirming project archival."""
+
+    CSS = """
+    ArchiveProjectConfirmScreen {
+        align: center middle;
+    }
+
+    #archive-project-container {
+        width: 65;
+        height: auto;
+        border: solid $warning;
+        background: $surface;
+        padding: 1 2;
+    }
+
+    #archive-project-title {
+        text-align: center;
+        text-style: bold;
+        color: $warning;
+        margin-bottom: 1;
+    }
+
+    #archive-project-message {
+        margin-bottom: 1;
+    }
+
+    #archive-project-info {
+        color: $text-muted;
+        margin-bottom: 1;
+    }
+
+    #archive-project-buttons {
+        text-align: center;
+        height: auto;
+    }
+    """
+
+    BINDINGS: ClassVar[list[BindingType]] = [
+        Binding("y", "confirm", "Yes"),
+        Binding("enter", "confirm", "Confirm", show=False),
+        Binding("n", "cancel", "No"),
+        Binding("escape", "cancel", "Cancel", show=False),
+    ]
+
+    def __init__(self, project_name: str, session_count: int) -> None:
+        super().__init__()
+        self.project_name = project_name
+        self.session_count = session_count
+
+    def compose(self) -> ComposeResult:
+        with Container(id="archive-project-container"):
+            yield Static("Archive Project", id="archive-project-title")
+            yield Static(
+                f"Project: {self.project_name}\nSessions: {self.session_count}",
+                id="archive-project-message",
+            )
+            yield Static(
+                "This will delete ALL JSONL files in the project.\n"
+                "Sessions will be archived and can be restored from cache.",
+                id="archive-project-info",
+            )
+            yield Static("\\[Enter/y] Yes  \\[Esc/n] No", id="archive-project-buttons")
+
+    def action_confirm(self) -> None:
+        self.dismiss(True)
+
+    def action_cancel(self) -> None:
+        self.dismiss(False)
+
+
+class DeleteProjectConfirmScreen(ModalScreen[Optional[str]]):
+    """Modal screen for confirming project deletion with smart options."""
+
+    CSS = """
+    DeleteProjectConfirmScreen {
+        align: center middle;
+    }
+
+    #delete-project-container {
+        width: 65;
+        height: auto;
+        border: solid $error;
+        background: $surface;
+        padding: 1 2;
+    }
+
+    #delete-project-title {
+        text-align: center;
+        text-style: bold;
+        color: $error;
+        margin-bottom: 1;
+    }
+
+    #delete-project-message {
+        margin-bottom: 1;
+    }
+
+    #delete-project-warning {
+        color: $warning;
+        margin-bottom: 1;
+    }
+
+    #delete-project-buttons {
+        text-align: center;
+        height: auto;
+    }
+    """
+
+    BINDINGS: ClassVar[list[BindingType]] = [
+        Binding("c", "delete_cache", "Cache only"),
+        Binding("b", "delete_both", "Both", show=False),
+        Binding("y", "delete_cache", "Yes", show=False),
+        Binding("enter", "delete_cache", "Confirm", show=False),
+        Binding("n", "cancel", "No"),
+        Binding("escape", "cancel", "Cancel", show=False),
+    ]
+
+    def __init__(
+        self, project_name: str, session_count: int, is_archived: bool = False
+    ) -> None:
+        super().__init__()
+        self.project_name = project_name
+        self.session_count = session_count
+        self.is_archived = is_archived
+
+    def compose(self) -> ComposeResult:
+        with Container(id="delete-project-container"):
+            yield Static("Delete Project", id="delete-project-title")
+            yield Static(
+                f"Project: {self.project_name}\nSessions: {self.session_count}",
+                id="delete-project-message",
+            )
+            if self.is_archived:
+                yield Static(
+                    "This is an archived project with no JSONL files.\n"
+                    "Deletion is PERMANENT and cannot be undone!",
+                    id="delete-project-warning",
+                )
+                yield Static(
+                    "\\[Enter/y/c] Delete from cache  \\[Esc/n] Cancel",
+                    id="delete-project-buttons",
+                )
+            else:
+                yield Static(
+                    "Choose what to delete:\n"
+                    "• Cache only: JSONL files remain, sessions can be re-parsed\n"
+                    "• Both: Delete ALL JSONL files AND cache (permanent!)",
+                    id="delete-project-warning",
+                )
+                yield Static(
+                    "\\[c] Cache only   \\[b] Both (permanent)   \\[Esc/n] Cancel",
+                    id="delete-project-buttons",
+                )
+
+    def action_delete_cache(self) -> None:
+        self.dismiss("cache_only")
+
+    def action_delete_both(self) -> None:
+        if not self.is_archived:
+            self.dismiss("both")
+
+    def action_cancel(self) -> None:
+        self.dismiss(None)
+
+
+class RestoreProjectConfirmScreen(ModalScreen[bool]):
+    """Modal screen for confirming project restoration."""
+
+    CSS = """
+    RestoreProjectConfirmScreen {
+        align: center middle;
+    }
+
+    #restore-project-container {
+        width: 65;
+        height: auto;
+        border: solid $success;
+        background: $surface;
+        padding: 1 2;
+    }
+
+    #restore-project-title {
+        text-align: center;
+        text-style: bold;
+        color: $success;
+        margin-bottom: 1;
+    }
+
+    #restore-project-message {
+        margin-bottom: 1;
+    }
+
+    #restore-project-info {
+        color: $text-muted;
+        margin-bottom: 1;
+    }
+
+    #restore-project-buttons {
+        text-align: center;
+        height: auto;
+    }
+    """
+
+    BINDINGS: ClassVar[list[BindingType]] = [
+        Binding("y", "confirm", "Yes"),
+        Binding("enter", "confirm", "Confirm", show=False),
+        Binding("n", "cancel", "No"),
+        Binding("escape", "cancel", "Cancel", show=False),
+    ]
+
+    def __init__(self, project_name: str, session_count: int) -> None:
+        super().__init__()
+        self.project_name = project_name
+        self.session_count = session_count
+
+    def compose(self) -> ComposeResult:
+        with Container(id="restore-project-container"):
+            yield Static("Restore Project", id="restore-project-title")
+            yield Static(
+                f"Project: {self.project_name}\n"
+                f"Archived sessions: {self.session_count}",
+                id="restore-project-message",
+            )
+            yield Static(
+                "This will restore ALL archived sessions by writing JSONL files.\n"
+                "The project directory will be created if it doesn't exist.",
+                id="restore-project-info",
+            )
+            yield Static("\\[Enter/y] Yes  \\[Esc/n] No", id="restore-project-buttons")
+
+    def action_confirm(self) -> None:
+        self.dismiss(True)
+
+    def action_cancel(self) -> None:
+        self.dismiss(False)
+
+
 class SessionBrowser(App[Optional[str]]):
     """Interactive TUI for browsing and managing Claude Code Log sessions."""
 
@@ -349,6 +956,9 @@ class SessionBrowser(App[Optional[str]]):
     TITLE = "Claude Code Log - Session Browser"
     BINDINGS: ClassVar[list[BindingType]] = [
         Binding("q", "quit", "Quit"),
+        Binding("escape", "back_to_projects", "Back", show=False),
+        Binding("enter", "export_selected", "Open HTML", show=False),
+        Binding("a", "archive_session", "Archive Session"),
         Binding("h", "export_selected", "Open HTML page"),
         Binding("m", "export_markdown", "Open Markdown"),
         Binding("v", "view_markdown", "View Markdown"),
@@ -357,6 +967,8 @@ class SessionBrowser(App[Optional[str]]):
         Binding("M", "force_export_markdown", "Force Markdown", show=False),
         Binding("V", "force_view_markdown", "Force View", show=False),
         Binding("c", "resume_selected", "Resume in Claude Code"),
+        Binding("r", "restore_jsonl", "Restore JSONL"),
+        Binding("d", "delete_session", "Delete Session"),
         Binding("e", "toggle_expanded", "Toggle Expanded View"),
         Binding("p", "back_to_projects", "Open Project Selector"),
         Binding("?", "toggle_help", "Help"),
@@ -367,14 +979,17 @@ class SessionBrowser(App[Optional[str]]):
     project_path: Path
     cache_manager: CacheManager
     sessions: dict[str, SessionCacheData]
+    archived_sessions: dict[str, SessionCacheData]
 
-    def __init__(self, project_path: Path):
+    def __init__(self, project_path: Path, is_archived: bool = False):
         """Initialize the session browser with a project path."""
         super().__init__()
         self.theme = "gruvbox"
         self.project_path = project_path
+        self.is_archived_project = is_archived
         self.cache_manager = CacheManager(project_path, get_library_version())
         self.sessions = {}
+        self.archived_sessions = {}
 
     def compose(self) -> ComposeResult:
         """Create the UI layout."""
@@ -407,8 +1022,32 @@ def on_resize(self) -> None:
 
     def load_sessions(self) -> None:
         """Load session information from cache or build cache if needed."""
+        # For archived projects, just load from cache (no JSONL files to check)
+        if self.is_archived_project:
+            project_cache = self.cache_manager.get_cached_project_data()
+            if project_cache and project_cache.sessions:
+                # All sessions are "archived" for fully archived projects
+                self.sessions = {}
+                self.archived_sessions = project_cache.sessions
+            else:
+                self.sessions = {}
+                self.archived_sessions = {}
+            # Update UI
+            try:
+                self.populate_table()
+                self.update_stats()
+            except Exception:
+                pass
+            return
+
         # Check if we need to rebuild cache by checking for modified files
-        jsonl_files = list(self.project_path.glob("*.jsonl"))
+        # Exclude agent files - they are loaded via session references
+        jsonl_files = [
+            f
+            for f in self.project_path.glob("*.jsonl")
+            if not f.name.startswith("agent-")
+        ]
+        valid_session_ids = {f.stem for f in jsonl_files}
         modified_files = self.cache_manager.get_modified_files(jsonl_files)
 
         # Get cached project data
@@ -434,6 +1073,24 @@ def load_sessions(self) -> None:
                 # Don't show notification during startup - just return
                 return
 
+        # Only compute archived sessions if there are JSONL files to compare against
+        # (in test environments, there may be cached sessions but no JSONL files)
+        if valid_session_ids:
+            # Load archived sessions (cached but JSONL deleted)
+            self.archived_sessions = self.cache_manager.get_archived_sessions(
+                valid_session_ids
+            )
+
+            # Filter current sessions to only those with existing JSONL files
+            self.sessions = {
+                sid: data
+                for sid, data in self.sessions.items()
+                if sid in valid_session_ids
+            }
+        else:
+            # No JSONL files to compare - treat all sessions as current
+            self.archived_sessions = {}
+
         # Only update UI if we're in app context
         try:
             self.populate_table()
@@ -473,13 +1130,20 @@ def populate_table(self) -> None:
         table.add_column("Messages", width=messages_width)
         table.add_column("Tokens", width=tokens_width)
 
-        # Sort sessions by start time (newest first)
+        # Combine current and archived sessions with archived flag
+        all_sessions: list[tuple[str, SessionCacheData, bool]] = []
+        for session_id, session_data in self.sessions.items():
+            all_sessions.append((session_id, session_data, False))
+        for session_id, session_data in self.archived_sessions.items():
+            all_sessions.append((session_id, session_data, True))
+
+        # Sort all sessions by start time (newest first)
         sorted_sessions = sorted(
-            self.sessions.items(), key=lambda x: x[1].first_timestamp, reverse=True
+            all_sessions, key=lambda x: x[1].first_timestamp, reverse=True
         )
 
         # Add rows
-        for session_id, session_data in sorted_sessions:
+        for session_id, session_data, is_archived in sorted_sessions:
             # Format timestamps - use short format for narrow terminals
             use_short_format = terminal_width < 120
             start_time = self.format_timestamp(
@@ -501,7 +1165,9 @@ def populate_table(self) -> None:
                 or session_data.first_user_message
                 or "No preview available"
             )
-            # Let Textual handle truncation based on column width
+            # Add [ARCHIVED] indicator for archived sessions
+            if is_archived:
+                preview = f"[ARCHIVED] {preview}"
 
             table.add_row(
                 session_id[:8],
@@ -514,10 +1180,12 @@ def populate_table(self) -> None:
 
     def update_stats(self) -> None:
         """Update the project statistics display."""
-        total_sessions = len(self.sessions)
-        total_messages = sum(s.message_count for s in self.sessions.values())
+        # Combine all sessions for stats
+        all_sessions = {**self.sessions, **self.archived_sessions}
+        total_sessions = len(all_sessions)
+        total_messages = sum(s.message_count for s in all_sessions.values())
         total_tokens = sum(
-            s.total_input_tokens + s.total_output_tokens for s in self.sessions.values()
+            s.total_input_tokens + s.total_output_tokens for s in all_sessions.values()
         )
 
         # Get project name using shared logic
@@ -533,16 +1201,14 @@ def update_stats(self) -> None:
         )
 
         # Find date range
-        if self.sessions:
+        if all_sessions:
             timestamps = [
-                s.first_timestamp for s in self.sessions.values() if s.first_timestamp
+                s.first_timestamp for s in all_sessions.values() if s.first_timestamp
             ]
             earliest = min(timestamps) if timestamps else ""
             latest = (
-                max(
-                    s.last_timestamp for s in self.sessions.values() if s.last_timestamp
-                )
-                if self.sessions
+                max(s.last_timestamp for s in all_sessions.values() if s.last_timestamp)
+                if all_sessions
                 else ""
             )
 
@@ -560,8 +1226,17 @@ def update_stats(self) -> None:
         # Create spaced layout: Project (left), Sessions info (center), Date range (right)
         terminal_width = self.size.width
 
+        # Show archived count if any
+        archived_count = len(self.archived_sessions)
+        if archived_count > 0:
+            mode_indicator = f"({archived_count} archived)"
+        else:
+            mode_indicator = ""
+
         # Project section (left aligned)
-        project_section = f"[bold]Project:[/bold] {project_name}"
+        project_section = (
+            f"[bold]Project:[/bold] {project_name} {mode_indicator}".strip()
+        )
 
         # Sessions info section (center)
         sessions_section = f"[bold]Sessions:[/bold] {total_sessions:,} | [bold]Messages:[/bold] {total_messages:,} | [bold]Tokens:[/bold] {total_tokens:,}"
@@ -631,11 +1306,15 @@ def _update_selected_session_from_cursor(self) -> None:
             if row_data:
                 # Extract session ID from the first column (now just first 8 chars)
                 session_id_display = str(row_data[0])
-                # Find the full session ID
+                # Find the full session ID in both dicts (current first, then archived)
                 for full_session_id in self.sessions.keys():
                     if full_session_id.startswith(session_id_display):
                         self.selected_session_id = full_session_id
-                        break
+                        return
+                for full_session_id in self.archived_sessions.keys():
+                    if full_session_id.startswith(session_id_display):
+                        self.selected_session_id = full_session_id
+                        return
         except Exception:
             # If widget not mounted yet or we can't get the row data, don't update selection
             pass
@@ -764,14 +1443,17 @@ def _escape_rich_markup(self, text: str) -> str:
 
     def _update_expanded_content(self) -> None:
         """Update the expanded content for the currently selected session."""
-        if (
-            not self.selected_session_id
-            or self.selected_session_id not in self.sessions
-        ):
+        if not self.selected_session_id:
+            return
+
+        # Get session data from either current or archived sessions
+        session_data = self.sessions.get(
+            self.selected_session_id
+        ) or self.archived_sessions.get(self.selected_session_id)
+        if not session_data:
             return
 
         expanded_content = self.query_one("#expanded-content", Static)
-        session_data = self.sessions[self.selected_session_id]
 
         # Build expanded content
         content_parts: list[str] = []
@@ -841,16 +1523,24 @@ def _ensure_session_file(
         if not needs_regeneration:
             return session_file
 
-        # Load messages from JSONL files
+        # Load messages - from cache for archived sessions, from JSONL otherwise
         try:
-            messages = load_directory_transcripts(
-                self.project_path, self.cache_manager, silent=True
-            )
+            is_archived = session_id in self.archived_sessions
+            if is_archived:
+                # Load from cache for archived sessions
+                messages = self.cache_manager.load_session_entries(session_id)
+            else:
+                # Load from JSONL files for current sessions
+                messages = load_directory_transcripts(
+                    self.project_path, self.cache_manager, silent=True
+                )
             if not messages:
                 return None
 
-            # Build session title
-            session_data = self.sessions.get(session_id)
+            # Build session title - check both dicts
+            session_data = self.sessions.get(session_id) or self.archived_sessions.get(
+                session_id
+            )
             project_cache = self.cache_manager.get_cached_project_data()
             project_name = get_project_display_name(
                 self.project_path.name,
@@ -884,9 +1574,12 @@ def _ensure_session_file(
 
     def action_toggle_expanded(self) -> None:
         """Toggle the expanded view for the selected session."""
+        if not self.selected_session_id:
+            return
+        # Check if session exists in either current or archived sessions
         if (
-            not self.selected_session_id
-            or self.selected_session_id not in self.sessions
+            self.selected_session_id not in self.sessions
+            and self.selected_session_id not in self.archived_sessions
         ):
             return
 
@@ -909,8 +1602,12 @@ def action_toggle_help(self) -> None:
             "Claude Code Log - Session Browser\n\n"
             "Navigation:\n"
             "- Use arrow keys to select sessions\n"
-            "- Expanded content updates automatically when visible\n\n"
+            "- Expanded content updates automatically when visible\n"
+            "- [ARCHIVED] sessions have no JSONL file (cache only)\n\n"
             "Actions:\n"
+            "- a: Archive session (delete JSONL, keep in cache)\n"
+            "- d: Delete session (with options)\n"
+            "- r: Restore archived session to JSONL\n"
             "- e: Toggle expanded view for session\n"
             "- h: Open selected session's HTML page\n"
             "- m: Open selected session's Markdown file (in browser)\n"
@@ -921,6 +1618,181 @@ def action_toggle_help(self) -> None:
         )
         self.notify(help_text, timeout=10)
 
+    def check_action(self, action: str, parameters: tuple[object, ...]) -> bool | None:
+        """Conditionally enable/disable actions based on selected session type."""
+        if not self.selected_session_id:
+            return True  # Allow action, it will handle missing selection
+
+        is_archived = self.selected_session_id in self.archived_sessions
+        is_current = self.selected_session_id in self.sessions
+
+        # Archive is only available for current sessions (has JSONL file)
+        if action == "archive_session" and not is_current:
+            return False
+        # Resume is only available for current sessions
+        if action == "resume_selected" and not is_current:
+            return False
+        # Restore is only available for archived sessions
+        if action == "restore_jsonl" and not is_archived:
+            return False
+        return True
+
+    def action_restore_jsonl(self) -> None:
+        """Restore the selected archived session to a JSONL file."""
+        if not self.selected_session_id:
+            self.notify("No session selected", severity="warning")
+            return
+
+        if self.selected_session_id not in self.archived_sessions:
+            self.notify(
+                "Selected session not found in archived sessions", severity="error"
+            )
+            return
+
+        try:
+            # Export messages from cache
+            messages = self.cache_manager.export_session_to_jsonl(
+                self.selected_session_id
+            )
+            if not messages:
+                self.notify("No messages found for session", severity="error")
+                return
+
+            # Ensure project directory exists (may have been deleted)
+            self.project_path.mkdir(parents=True, exist_ok=True)
+
+            # Write to JSONL file
+            output_path = self.project_path / f"{self.selected_session_id}.jsonl"
+            with open(output_path, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(msg + "\n")
+
+            self.notify(
+                f"Restored {len(messages)} messages to {output_path.name}",
+                severity="information",
+            )
+
+            # Refresh to show the restored session as current
+            self._refresh_after_restore()
+
+        except Exception as e:
+            self.notify(f"Error restoring session: {e}", severity="error")
+
+    def _refresh_after_restore(self) -> None:
+        """Refresh sessions after restoring an archived session."""
+        # If this was a fully archived project, it's no longer archived
+        # since we just restored a JSONL file
+        if self.is_archived_project:
+            self.is_archived_project = False
+
+        # Reload sessions - this will now detect the restored JSONL file
+        self.load_sessions()
+
+        self.notify(
+            "Session restored! It now appears as a current session.",
+            timeout=5,
+        )
+
+    def action_archive_session(self) -> None:
+        """Archive the selected session (delete JSONL file, keep in cache)."""
+        if not self.selected_session_id:
+            self.notify("No session selected", severity="warning")
+            return
+
+        # Archive only works for current sessions (those with JSONL files)
+        if self.selected_session_id not in self.sessions:
+            self.notify(
+                "Only current sessions can be archived (already archived or not found)",
+                severity="warning",
+            )
+            return
+
+        # Push archive confirmation screen
+        self.push_screen(
+            ArchiveConfirmScreen(session_id=self.selected_session_id),
+            callback=self._on_archive_confirm,
+        )
+
+    def _on_archive_confirm(self, confirmed: Optional[bool]) -> None:
+        """Handle archive confirmation result."""
+        if not confirmed or not self.selected_session_id:
+            return
+
+        try:
+            # Delete the JSONL file
+            jsonl_path = self.project_path / f"{self.selected_session_id}.jsonl"
+            if jsonl_path.exists():
+                jsonl_path.unlink()
+                self.notify(
+                    f"Session {self.selected_session_id[:8]} archived",
+                    severity="information",
+                )
+                # Reload sessions - this will move the session to archived
+                self.load_sessions()
+            else:
+                self.notify("JSONL file not found", severity="error")
+        except Exception as e:
+            self.notify(f"Error archiving session: {e}", severity="error")
+
+    def action_delete_session(self) -> None:
+        """Delete the selected session with smart options."""
+        if not self.selected_session_id:
+            self.notify("No session selected", severity="warning")
+            return
+
+        # Check if session exists in either current or archived sessions
+        if (
+            self.selected_session_id not in self.sessions
+            and self.selected_session_id not in self.archived_sessions
+        ):
+            self.notify("Selected session not found", severity="error")
+            return
+
+        # Determine if this is an archived session (no JSONL to fall back on)
+        is_archived_session = self.selected_session_id in self.archived_sessions
+
+        # Push confirmation screen
+        self.push_screen(
+            DeleteConfirmScreen(
+                session_id=self.selected_session_id,
+                is_archived=is_archived_session,
+            ),
+            callback=self._on_delete_confirm,
+        )
+
+    def _on_delete_confirm(self, delete_option: Optional[str]) -> None:
+        """Handle deletion confirmation result."""
+        if not delete_option or not self.selected_session_id:
+            return
+
+        try:
+            deleted_what: list[str] = []
+
+            # Delete JSONL file if requested
+            if delete_option == "both":
+                jsonl_path = self.project_path / f"{self.selected_session_id}.jsonl"
+                if jsonl_path.exists():
+                    jsonl_path.unlink()
+                    deleted_what.append("JSONL file")
+
+            # Delete from cache
+            success = self.cache_manager.delete_session(self.selected_session_id)
+            if success:
+                deleted_what.append("cache")
+
+            if deleted_what:
+                self.notify(
+                    f"Session {self.selected_session_id[:8]} deleted ({', '.join(deleted_what)})",
+                    severity="information",
+                )
+                # Clear selection and reload
+                self.selected_session_id = None
+                self.load_sessions()
+            else:
+                self.notify("Failed to delete session", severity="error")
+        except Exception as e:
+            self.notify(f"Error deleting session: {e}", severity="error")
+
     def action_back_to_projects(self) -> None:
         """Navigate to the project selector."""
         # Exit with a special return value to signal we want to go to project selector
@@ -932,14 +1804,16 @@ async def action_quit(self) -> None:
 
 
 def run_project_selector(
-    projects: list[Path], matching_projects: list[Path]
+    projects: list[Path],
+    matching_projects: list[Path],
+    archived_projects: Optional[set[Path]] = None,
 ) -> Optional[Path]:
     """Run the project selector TUI and return the selected project path."""
     if not projects:
         print("Error: No projects provided")
         return None
 
-    app = ProjectSelector(projects, matching_projects)
+    app = ProjectSelector(projects, matching_projects, archived_projects)
     try:
         return app.run()
     except KeyboardInterrupt:
@@ -948,9 +1822,20 @@ def run_project_selector(
         return None
 
 
-def run_session_browser(project_path: Path) -> Optional[str]:
+def run_session_browser(project_path: Path, is_archived: bool = False) -> Optional[str]:
     """Run the session browser TUI for the given project path."""
     if not project_path.exists():
+        # For archived projects, the directory may not exist but cache may
+        if is_archived:
+            # Try to load from cache
+            try:
+                cache_manager = CacheManager(project_path, get_library_version())
+                project_cache = cache_manager.get_cached_project_data()
+                if project_cache and project_cache.sessions:
+                    app = SessionBrowser(project_path, is_archived=True)
+                    return app.run()
+            except Exception:
+                pass
         print(f"Error: Project path {project_path} does not exist")
         return None
 
@@ -961,10 +1846,20 @@ def run_session_browser(project_path: Path) -> Optional[str]:
     # Check if there are any JSONL files
     jsonl_files = list(project_path.glob("*.jsonl"))
     if not jsonl_files:
+        # For archived projects, check if we have cached sessions
+        if is_archived:
+            try:
+                cache_manager = CacheManager(project_path, get_library_version())
+                project_cache = cache_manager.get_cached_project_data()
+                if project_cache and project_cache.sessions:
+                    app = SessionBrowser(project_path, is_archived=True)
+                    return app.run()
+            except Exception:
+                pass
         print(f"Error: No JSONL transcript files found in {project_path}")
         return None
 
-    app = SessionBrowser(project_path)
+    app = SessionBrowser(project_path, is_archived=is_archived)
     try:
         return app.run()
     except KeyboardInterrupt:
diff --git a/dev-docs/restoring-archived-sessions.md b/dev-docs/restoring-archived-sessions.md
new file mode 100644
index 00000000..38582deb
--- /dev/null
+++ b/dev-docs/restoring-archived-sessions.md
@@ -0,0 +1,100 @@
+# Restoring Archived Sessions
+
+When you run `claude-code-log`, you may see output like:
+
+```sh
+project-name: cached, 3 archived (0.0s)
+```
+
+This indicates that 3 sessions exist in the cache whose source JSONL files have been deleted.
+
+## What Are Archived Sessions?
+
+Archived sessions are sessions preserved in the SQLite cache (`~/.claude/projects/cache.db`) even after their source JSONL files have been deleted. This happens when:
+
+1. Claude Code automatically deletes old JSONL files based on the `cleanupPeriodDays` setting
+2. You manually delete JSONL files from `~/.claude/projects/*/`
+
+The cache stores the complete message data, so full restoration is possible.
+
+## Preventing Automatic Deletion
+
+Claude Code automatically deletes session logs after 30 days by default. To change this, add `cleanupPeriodDays` to your `~/.claude/settings.json`:
+
+```json
+{
+  "cleanupPeriodDays": 99999
+}
+```
+
+This effectively disables automatic cleanup (274 years). You can also set it to a specific number of days.
+
+See Claude Code's [settings documentation](https://docs.anthropic.com/en/docs/claude-code/settings) for more details.
+
+## Using the TUI to Manage Archived Sessions
+
+The easiest way to browse and restore archived sessions is through the interactive TUI.
+
+### Launch the TUI
+
+```bash
+claude-code-log --tui
+```
+
+### Toggle Archived View
+
+Press `a` to toggle between current and archived sessions. The header shows the current mode:
+
+```text
+┌─ Claude Code Log ─────────────────────────────────────────────────┐
+│ Project: my-project ARCHIVED (3)                                  │
+│ Sessions: 3 │ Messages: 456 │ Tokens: 45,230                      │
+├──────────┬───────────────────────────────────┬─────────┬──────────┤
+│ Session  │ Title                             │ Start   │ Messages │
+├──────────┼───────────────────────────────────┼─────────┼──────────┤
+│ abc123   │ Fix authentication bug            │ 12-01   │ 45       │
+│ def456   │ Add user settings page            │ 11-28   │ 123      │
+│ ghi789   │ Refactor database layer           │ 11-15   │ 67       │
+└──────────┴───────────────────────────────────┴─────────┴──────────┘
+ [a] Current  [r] Restore  [h] HTML  [v] View  [q] Quit
+```
+
+### Restore a Session
+
+1. Switch to archived view with `a`
+2. Navigate to the session you want to restore
+3. Press `r` to restore the session to a JSONL file
+4. The session will be restored to `~/.claude/projects/{project}/{session-id}.jsonl`
+5. Press `a` again to switch back to current sessions and see the restored session
+
+### View Archived Sessions
+
+You can also view archived sessions as HTML or Markdown without restoring them:
+
+- `h` - Open HTML in browser
+- `m` - Open Markdown in browser
+- `v` - View Markdown in embedded viewer
+
+## Limitations
+
+- **Message order**: Messages are ordered by timestamp, which may differ slightly from original file order for same-timestamp entries
+- **Whitespace**: Original JSON formatting is not preserved (semantically identical)
+
+## Manual SQL Approach
+
+For advanced users, you can also query the cache database directly:
+
+```bash
+sqlite3 ~/.claude/projects/cache.db
+```
+
+```sql
+-- List all sessions
+SELECT p.project_path, s.session_id, s.first_timestamp, s.message_count
+FROM sessions s
+JOIN projects p ON s.project_id = p.id
+ORDER BY s.first_timestamp;
+
+-- Export a session's messages
+SELECT content FROM messages WHERE session_id = 'your-session-id' ORDER BY timestamp;
+```
diff --git a/test/__snapshots__/test_snapshot_html.ambr b/test/__snapshots__/test_snapshot_html.ambr
index 65bf5df3..83cee00c 100644
--- a/test/__snapshots__/test_snapshot_html.ambr
+++ b/test/__snapshots__/test_snapshot_html.ambr
@@ -461,6 +461,30 @@
   .project-sessions details[open] summary {
       margin-bottom: 10px;
   }
+  
+  /* Archived project styling */
+  .project-card.archived {
+      opacity: 0.6;
+      background-color: #f5f5f522;
+  }
+  
+  .project-card.archived:hover {
+      opacity: 0.8;
+  }
+  
+  .archived-badge {
+      display: inline-block;
+      background-color: #888;
+      color: white;
+      font-size: 0.65em;
+      font-weight: 600;
+      padding: 2px 8px;
+      border-radius: 4px;
+      margin-left: 10px;
+      vertical-align: middle;
+      text-transform: uppercase;
+      letter-spacing: 0.5px;
+  }
   /* Search Bar Styles */
   .search-container {
       position: relative;
@@ -1655,7 +1679,9 @@
           <div class='project-card'>
               <div class='project-name'>
                   <a href='-Users-test-project-beta/combined_transcripts.html'>Users/test/project/beta</a>
+                  
                   <span class="transcript-link-hint">(← open combined transcript)</span>
+                  
               </div>
               <div class='project-stats'>
                   <div class='stat'>📁 3 transcript files</div>
@@ -1672,7 +1698,9 @@
           <div class='project-card'>
               <div class='project-name'>
                   <a href='-Users-test-project-alpha/combined_transcripts.html'>alpha</a>
+                  
                   <span class="transcript-link-hint">(← open combined transcript)</span>
+                  
               </div>
               <div class='project-stats'>
                   <div class='stat'>📁 5 transcript files</div>
diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py
index 2bce66df..6af5b721 100644
--- a/test/test_cache_integration.py
+++ b/test/test_cache_integration.py
@@ -440,3 +440,424 @@ def test_cache_version_upgrade_scenario(self, setup_test_project):
         with patch("claude_code_log.cache.get_library_version", return_value="2.0.0"):
             output = convert_jsonl_to_html(input_path=project_dir, use_cache=True)
             assert output.exists()
+
+
+class TestArchivedSessionsIntegration:
+    """Test archived sessions functionality - sessions cached but JSONL deleted."""
+
+    def test_get_archived_sessions_after_file_deletion(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test that sessions become archived when JSONL files are deleted."""
+        project_dir = temp_projects_dir / "archived-test"
+        project_dir.mkdir()
+
+        # Create JSONL file with session data
+        jsonl_file = project_dir / "session-1.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Verify session is in cache
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert "session-1" in cached_data.sessions
+
+        # Delete the JSONL file
+        jsonl_file.unlink()
+
+        # Now session-1 should be archived (no valid session IDs)
+        valid_session_ids: set[str] = set()  # No JSONL files left
+        archived = cache_manager.get_archived_sessions(valid_session_ids)
+
+        assert "session-1" in archived
+        assert archived["session-1"].message_count > 0
+        assert archived["session-1"].first_timestamp == "2023-01-01T10:00:00Z"
+
+    def test_get_archived_sessions_with_some_files_remaining(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test archived sessions when only some JSONL files are deleted."""
+        project_dir = temp_projects_dir / "partial-archived"
+        project_dir.mkdir()
+
+        # Create two session files
+        for session_id in ["session-1", "session-2"]:
+            jsonl_file = project_dir / f"{session_id}.jsonl"
+            with open(jsonl_file, "w") as f:
+                for entry in sample_jsonl_data:
+                    entry_copy = entry.copy()
+                    if "sessionId" in entry_copy:
+                        entry_copy["sessionId"] = session_id
+                    f.write(json.dumps(entry_copy) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Delete only session-1
+        (project_dir / "session-1.jsonl").unlink()
+
+        # session-2 should be valid, session-1 should be archived
+        valid_session_ids = {"session-2"}
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        archived = cache_manager.get_archived_sessions(valid_session_ids)
+
+        assert "session-1" in archived
+        assert "session-2" not in archived
+
+    def test_export_session_to_jsonl(self, temp_projects_dir, sample_jsonl_data):
+        """Test exporting session messages for JSONL restoration."""
+        project_dir = temp_projects_dir / "export-test"
+        project_dir.mkdir()
+
+        # Create JSONL file
+        jsonl_file = project_dir / "session-1.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Export messages from cache
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        exported_messages = cache_manager.export_session_to_jsonl("session-1")
+
+        # Should have exported messages (not summary which has no sessionId)
+        assert len(exported_messages) >= 2  # user + assistant messages
+
+        # Each message should be valid JSON
+        for msg_json in exported_messages:
+            parsed = json.loads(msg_json)
+            assert "type" in parsed
+            assert parsed["sessionId"] == "session-1"
+
+    def test_load_session_entries_for_rendering(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test loading session entries from cache for HTML/Markdown rendering."""
+        project_dir = temp_projects_dir / "load-entries-test"
+        project_dir.mkdir()
+
+        # Create JSONL file
+        jsonl_file = project_dir / "session-1.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Load entries from cache
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        entries = cache_manager.load_session_entries("session-1")
+
+        # Should have TranscriptEntry objects
+        assert len(entries) >= 2
+
+        # Check that entries are proper types
+        entry_types = [e.type for e in entries]
+        assert "user" in entry_types
+        assert "assistant" in entry_types
+
+    def test_full_archive_and_restore_workflow(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test the full workflow: cache -> delete -> archive -> restore."""
+        project_dir = temp_projects_dir / "full-workflow"
+        project_dir.mkdir()
+
+        # Step 1: Create JSONL file and cache it
+        original_file = project_dir / "session-1.jsonl"
+        with open(original_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Verify cache populated
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        original_message_count = cached_data.sessions["session-1"].message_count
+
+        # Step 2: Delete the JSONL file
+        original_file.unlink()
+        assert not original_file.exists()
+
+        # Step 3: Verify session is now archived
+        archived = cache_manager.get_archived_sessions(set())
+        assert "session-1" in archived
+
+        # Step 4: Restore the session from cache
+        exported_messages = cache_manager.export_session_to_jsonl("session-1")
+        restored_file = project_dir / "session-1.jsonl"
+        with open(restored_file, "w") as f:
+            for msg in exported_messages:
+                f.write(msg + "\n")
+
+        # Step 5: Verify the restored file exists and session is no longer archived
+        assert restored_file.exists()
+
+        valid_session_ids = {"session-1"}
+        archived_after_restore = cache_manager.get_archived_sessions(valid_session_ids)
+        assert "session-1" not in archived_after_restore
+
+        # Step 6: Verify restored content is valid by re-processing
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+        cached_data = cache_manager.get_cached_project_data()
+        # Message count should be preserved
+        assert cached_data is not None
+        assert cached_data.sessions["session-1"].message_count == original_message_count
+
+    def test_archived_session_count_in_converter(
+        self, temp_projects_dir, sample_jsonl_data, capsys
+    ):
+        """Test that archived session count is reported in converter output."""
+        project_dir = temp_projects_dir / "count-test"
+        project_dir.mkdir()
+
+        # Create two sessions so one remains after deletion
+        for session_id in ["session-1", "session-2"]:
+            jsonl_file = project_dir / f"{session_id}.jsonl"
+            with open(jsonl_file, "w") as f:
+                for entry in sample_jsonl_data:
+                    entry_copy = entry.copy()
+                    if "sessionId" in entry_copy:
+                        entry_copy["sessionId"] = session_id
+                    f.write(json.dumps(entry_copy) + "\n")
+
+        # Process to cache (as part of all-projects hierarchy)
+        process_projects_hierarchy(projects_path=temp_projects_dir, use_cache=True)
+
+        # Delete only session-1, keeping session-2 so project is still found
+        (project_dir / "session-1.jsonl").unlink()
+
+        # Process again - should report archived sessions
+        process_projects_hierarchy(
+            projects_path=temp_projects_dir, use_cache=True, silent=False
+        )
+
+        captured = capsys.readouterr()
+        # Output should mention archived sessions
+        assert "archived" in captured.out.lower()
+
+    def test_load_entries_preserves_message_order(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test that loaded entries preserve chronological order."""
+        project_dir = temp_projects_dir / "order-test"
+        project_dir.mkdir()
+
+        # Create JSONL file
+        jsonl_file = project_dir / "session-1.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Load entries from cache
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        entries = cache_manager.load_session_entries("session-1")
+
+        # Filter to entries with timestamps and extract them
+        timestamps: list[str] = []
+        for e in entries:
+            if hasattr(e, "timestamp") and e.timestamp:
+                timestamps.append(str(e.timestamp))
+
+        # Verify chronological order (ISO timestamps are lexicographically sortable)
+        assert timestamps == sorted(timestamps)
+
+    def test_export_empty_session_returns_empty_list(self, temp_projects_dir):
+        """Test that exporting a non-existent session returns empty list."""
+        project_dir = temp_projects_dir / "empty-export"
+        project_dir.mkdir()
+
+        # Create a dummy JSONL to initialize the project
+        jsonl_file = project_dir / "dummy.jsonl"
+        jsonl_file.write_text("{}\n")
+
+        cache_manager = CacheManager(project_dir, "1.0.0")
+
+        # Export non-existent session
+        exported = cache_manager.export_session_to_jsonl("non-existent-session")
+        assert exported == []
+
+        # Load entries for non-existent session
+        entries = cache_manager.load_session_entries("non-existent-session")
+        assert entries == []
+
+    def test_export_session_produces_compact_json(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test that exported JSONL has compact JSON format (no spaces after separators)."""
+        project_dir = temp_projects_dir / "compact-json-test"
+        project_dir.mkdir()
+
+        # Create JSONL file
+        jsonl_file = project_dir / "session-1.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Export messages
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        exported_messages = cache_manager.export_session_to_jsonl("session-1")
+
+        # Each message should be compact JSON (no spaces after : or ,)
+        for msg_json in exported_messages:
+            # Should not have ": " (colon-space) pattern except in string values
+            # Check by ensuring re-serialization produces same result
+            parsed = json.loads(msg_json)
+            compact_reserialized = json.dumps(parsed, separators=(",", ":"))
+            assert msg_json == compact_reserialized, (
+                f"JSON should be compact format.\n"
+                f"Got: {msg_json[:100]}...\n"
+                f"Expected: {compact_reserialized[:100]}..."
+            )
+
+    def test_delete_session_from_cache(self, temp_projects_dir, sample_jsonl_data):
+        """Test deleting a session from cache."""
+        project_dir = temp_projects_dir / "delete-session-test"
+        project_dir.mkdir()
+
+        # Create JSONL file
+        jsonl_file = project_dir / "session-1.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Verify session exists in cache
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert "session-1" in cached_data.sessions
+
+        # Delete the session
+        result = cache_manager.delete_session("session-1")
+        assert result is True
+
+        # Verify session is gone from cache
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert "session-1" not in cached_data.sessions
+
+        # Export should return empty
+        exported = cache_manager.export_session_to_jsonl("session-1")
+        assert exported == []
+
+    def test_delete_nonexistent_session(self, temp_projects_dir):
+        """Test deleting a session that doesn't exist returns False."""
+        project_dir = temp_projects_dir / "delete-nonexistent"
+        project_dir.mkdir()
+
+        # Create a dummy JSONL to initialize the project
+        jsonl_file = project_dir / "dummy.jsonl"
+        jsonl_file.write_text("{}\n")
+
+        cache_manager = CacheManager(project_dir, "1.0.0")
+
+        # Delete non-existent session
+        result = cache_manager.delete_session("non-existent-session")
+        assert result is False
+
+    def test_delete_project_from_cache(self, temp_projects_dir, sample_jsonl_data):
+        """Test deleting an entire project from cache."""
+        project_dir = temp_projects_dir / "delete-project-test"
+        project_dir.mkdir()
+
+        # Create JSONL file
+        jsonl_file = project_dir / "session-1.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Verify project exists in cache
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+
+        # Delete the project
+        result = cache_manager.delete_project()
+        assert result is True
+
+        # Cache manager should no longer have valid project ID
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is None
+
+
+class TestGetAllCachedProjects:
+    """Tests for get_all_cached_projects() function."""
+
+    def test_get_all_cached_projects_finds_active_and_archived(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test finding both active and archived projects."""
+        from claude_code_log.cache import get_all_cached_projects
+
+        # Create two projects - one active, one that will be archived
+        active_dir = temp_projects_dir / "active-project"
+        active_dir.mkdir()
+        archived_dir = temp_projects_dir / "archived-project"
+        archived_dir.mkdir()
+
+        # Create JSONL files in both
+        for proj_dir in [active_dir, archived_dir]:
+            jsonl_file = proj_dir / "session-1.jsonl"
+            with open(jsonl_file, "w") as f:
+                for entry in sample_jsonl_data:
+                    f.write(json.dumps(entry) + "\n")
+
+        # Process both projects to populate cache
+        convert_jsonl_to_html(input_path=active_dir, use_cache=True)
+        convert_jsonl_to_html(input_path=archived_dir, use_cache=True)
+
+        # Delete JSONL from "archived" project to simulate archival
+        (archived_dir / "session-1.jsonl").unlink()
+
+        # Get all cached projects
+        projects = get_all_cached_projects(temp_projects_dir)
+
+        # Should find both projects
+        project_paths = {p[0] for p in projects}
+        assert str(active_dir) in project_paths
+        assert str(archived_dir) in project_paths
+
+        # Check is_archived flag
+        for project_path, is_archived in projects:
+            if project_path == str(active_dir):
+                assert is_archived is False
+            elif project_path == str(archived_dir):
+                assert is_archived is True
+
+    def test_get_all_cached_projects_empty_dir(self, temp_projects_dir):
+        """Test get_all_cached_projects with no cache."""
+        from claude_code_log.cache import get_all_cached_projects
+
+        # No cache.db exists
+        projects = get_all_cached_projects(temp_projects_dir)
+        assert projects == []
+
+    def test_get_all_cached_projects_nonexistent_dir(self, tmp_path):
+        """Test get_all_cached_projects with nonexistent directory."""
+        from claude_code_log.cache import get_all_cached_projects
+
+        nonexistent = tmp_path / "does-not-exist"
+        projects = get_all_cached_projects(nonexistent)
+        assert projects == []
diff --git a/test/test_tui.py b/test/test_tui.py
index 30856317..9009a490 100644
--- a/test/test_tui.py
+++ b/test/test_tui.py
@@ -13,7 +13,7 @@
 from textual.widgets import DataTable, Label
 
 from claude_code_log.cache import CacheManager, SessionCacheData
-from claude_code_log.tui import SessionBrowser, run_session_browser
+from claude_code_log.tui import ProjectSelector, SessionBrowser, run_session_browser
 
 
 @pytest.fixture
@@ -87,11 +87,23 @@ def temp_project_dir():
             },
         ]
 
-        # Write test data to JSONL file
-        jsonl_file = project_path / "test-transcript.jsonl"
-        with open(jsonl_file, "w", encoding="utf-8") as f:
+        # Write test data to JSONL files - one per session (matching real-world usage)
+        # Session 123 entries
+        session_123_file = project_path / "session-123.jsonl"
+        with open(session_123_file, "w", encoding="utf-8") as f:
             for entry in test_data:
-                f.write(json.dumps(entry) + "\n")
+                if entry.get("sessionId") == "session-123":
+                    f.write(json.dumps(entry) + "\n")
+
+        # Session 456 entries (includes summary)
+        session_456_file = project_path / "session-456.jsonl"
+        with open(session_456_file, "w", encoding="utf-8") as f:
+            for entry in test_data:
+                if (
+                    entry.get("sessionId") == "session-456"
+                    or entry.get("type") == "summary"
+                ):
+                    f.write(json.dumps(entry) + "\n")
 
         yield project_path
 
@@ -907,3 +919,1017 @@ async def test_empty_project_handling(self):
                 stats = cast(Label, app.query_one("#stats"))
                 stats_text = str(stats.content)
                 assert "Sessions:[/bold] 0" in stats_text
+
+    @pytest.mark.asyncio
+    async def test_archived_project_loads_archived_sessions(self):
+        """Test that an archived project (no JSONL files) loads sessions in archived_sessions."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+
+            # Create empty JSONL file to initialize
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.touch()
+
+            # Create app with is_archived=True (simulating archived project)
+            app = SessionBrowser(project_path, is_archived=True)
+
+            # Mock the cache manager to return some sessions
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    summary="Archived session",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=5,
+                    first_user_message="Hello from archived",
+                    total_input_tokens=100,
+                    total_output_tokens=200,
+                ),
+            }
+
+            with (
+                patch.object(
+                    app.cache_manager, "get_cached_project_data"
+                ) as mock_cache,
+            ):
+                mock_cache.return_value = Mock(
+                    sessions=mock_session_data,
+                    working_directories=[str(project_path)],
+                )
+
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    # Manually call load_sessions (since mocking)
+                    app.load_sessions()
+
+                    # Sessions should be in archived_sessions, not sessions
+                    assert len(app.archived_sessions) > 0
+                    assert len(app.sessions) == 0
+
+                    # Stats should show "archived" count
+                    stats = cast(Label, app.query_one("#stats"))
+                    stats_text = str(stats.content)
+                    assert "archived" in stats_text.lower()
+
+
+@pytest.mark.tui
+class TestUnifiedSessionList:
+    """Tests for the unified session list showing both current and archived sessions."""
+
+    @pytest.mark.asyncio
+    async def test_unified_list_shows_both_current_and_archived(self):
+        """Test that both current and archived sessions appear in the same list."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-current.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            current_session = {
+                "session-current": SessionCacheData(
+                    session_id="session-current",
+                    first_timestamp="2025-01-02T10:00:00Z",
+                    last_timestamp="2025-01-02T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Current session",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+            archived_session = {
+                "session-archived": SessionCacheData(
+                    session_id="session-archived",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Archived session",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                app.sessions = current_session
+                app.archived_sessions = archived_session
+                app.populate_table()
+
+                # Get the table
+                table = cast(DataTable, app.query_one("#sessions-table"))
+
+                # Should have 2 rows (both sessions in one list)
+                assert table.row_count == 2
+
+    @pytest.mark.asyncio
+    async def test_unified_list_sorted_by_timestamp_newest_first(self):
+        """Test that sessions are sorted by timestamp with newest first."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-old.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            # Create sessions with different timestamps
+            old_session = {
+                "session-old": SessionCacheData(
+                    session_id="session-old",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Old session",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+            new_archived_session = {
+                "session-new": SessionCacheData(
+                    session_id="session-new",
+                    first_timestamp="2025-01-03T10:00:00Z",
+                    last_timestamp="2025-01-03T10:01:00Z",
+                    message_count=1,
+                    first_user_message="New archived session",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                app.sessions = old_session
+                app.archived_sessions = new_archived_session
+                app.populate_table()
+
+                table = cast(DataTable, app.query_one("#sessions-table"))
+
+                # Get first row - should be the newest (archived) session
+                first_row = table.get_row_at(0)
+                # Session ID column shows first 8 chars
+                assert str(first_row[0]).startswith("session-")
+                # Title should have [ARCHIVED] prefix since newest is archived
+                assert "[ARCHIVED]" in str(first_row[1])
+
+    @pytest.mark.asyncio
+    async def test_archived_sessions_have_archived_indicator(self):
+        """Test that archived sessions display [ARCHIVED] indicator in title."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-current.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            current_session = {
+                "session-current": SessionCacheData(
+                    session_id="session-current",
+                    first_timestamp="2025-01-02T10:00:00Z",
+                    last_timestamp="2025-01-02T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Current session message",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+            archived_session = {
+                "session-archived": SessionCacheData(
+                    session_id="session-archived",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Archived session message",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                app.sessions = current_session
+                app.archived_sessions = archived_session
+                app.populate_table()
+
+                table = cast(DataTable, app.query_one("#sessions-table"))
+
+                # Check both rows
+                found_archived_indicator = False
+                found_current_without_indicator = False
+
+                for row_idx in range(table.row_count):
+                    row = table.get_row_at(row_idx)
+                    title = str(row[1])
+                    if "[ARCHIVED]" in title:
+                        found_archived_indicator = True
+                        assert "Archived session message" in title
+                    else:
+                        found_current_without_indicator = True
+                        assert "Current session message" in title
+
+                assert found_archived_indicator, (
+                    "Archived session should have [ARCHIVED] indicator"
+                )
+                assert found_current_without_indicator, (
+                    "Current session should not have [ARCHIVED] indicator"
+                )
+
+    @pytest.mark.asyncio
+    async def test_stats_show_combined_totals(self):
+        """Test that stats display combined totals from both current and archived sessions."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-current.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            current_session = {
+                "session-current": SessionCacheData(
+                    session_id="session-current",
+                    first_timestamp="2025-01-02T10:00:00Z",
+                    last_timestamp="2025-01-02T10:01:00Z",
+                    message_count=5,
+                    first_user_message="Current",
+                    total_input_tokens=100,
+                    total_output_tokens=200,
+                ),
+            }
+            archived_session = {
+                "session-archived": SessionCacheData(
+                    session_id="session-archived",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=3,
+                    first_user_message="Archived",
+                    total_input_tokens=50,
+                    total_output_tokens=100,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                app.sessions = current_session
+                app.archived_sessions = archived_session
+                app.update_stats()
+
+                stats = cast(Label, app.query_one("#stats"))
+                stats_text = str(stats.content)
+
+                # Should show combined sessions count (2)
+                assert "Sessions:[/bold] 2" in stats_text
+                # Should show combined messages count (5 + 3 = 8)
+                assert "Messages:[/bold] 8" in stats_text
+                # Should show combined tokens (100+200+50+100 = 450)
+                assert "Tokens:[/bold] 450" in stats_text
+                # Should indicate archived count
+                assert "1 archived" in stats_text
+
+
+@pytest.mark.tui
+class TestArchiveConfirmScreen:
+    """Tests for archive confirmation via the archive action."""
+
+    @pytest.mark.asyncio
+    async def test_archive_confirm_y_key_deletes_file(self):
+        """Test confirming archive with 'y' key deletes the JSONL file."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                app.sessions = mock_session_data
+                app.selected_session_id = "session-123"
+
+                assert jsonl_file.exists()
+
+                # Trigger archive (opens modal)
+                await pilot.press("a")
+                await pilot.pause(0.1)
+
+                # Confirm with 'y'
+                await pilot.press("y")
+                await pilot.pause(0.1)
+
+                assert not jsonl_file.exists()
+
+    @pytest.mark.asyncio
+    async def test_archive_confirm_enter_key_deletes_file(self):
+        """Test confirming archive with Enter key deletes the JSONL file."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                app.sessions = mock_session_data
+                app.selected_session_id = "session-123"
+
+                assert jsonl_file.exists()
+
+                # Trigger archive (opens modal)
+                await pilot.press("a")
+                await pilot.pause(0.1)
+
+                # Confirm with Enter
+                await pilot.press("enter")
+                await pilot.pause(0.1)
+
+                assert not jsonl_file.exists()
+
+    @pytest.mark.asyncio
+    async def test_archive_cancel_n_key_keeps_file(self):
+        """Test cancelling archive with 'n' key keeps the JSONL file."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                app.sessions = mock_session_data
+                app.selected_session_id = "session-123"
+
+                # Trigger archive (opens modal)
+                await pilot.press("a")
+                await pilot.pause(0.1)
+
+                # Cancel with 'n'
+                await pilot.press("n")
+                await pilot.pause(0.1)
+
+                # File should still exist
+                assert jsonl_file.exists()
+
+    @pytest.mark.asyncio
+    async def test_archive_cancel_escape_key_keeps_file(self):
+        """Test cancelling archive with Escape key keeps the JSONL file."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                app.sessions = mock_session_data
+                app.selected_session_id = "session-123"
+
+                # Trigger archive (opens modal)
+                await pilot.press("a")
+                await pilot.pause(0.1)
+
+                # Cancel with Escape
+                await pilot.press("escape")
+                await pilot.pause(0.1)
+
+                # File should still exist
+                assert jsonl_file.exists()
+
+
+@pytest.mark.tui
+class TestDeleteConfirmScreen:
+    """Tests for delete confirmation with smart options."""
+
+    @pytest.mark.asyncio
+    async def test_delete_current_session_cache_only_keeps_jsonl(self):
+        """Test delete with 'c' (cache only) keeps JSONL file."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            with patch.object(
+                app.cache_manager, "delete_session", return_value=True
+            ) as mock_delete:
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    app.sessions = mock_session_data
+                    app.selected_session_id = "session-123"
+
+                    # Trigger delete (opens modal)
+                    await pilot.press("d")
+                    await pilot.pause(0.1)
+
+                    # Choose cache only with 'c'
+                    await pilot.press("c")
+                    await pilot.pause(0.1)
+
+                    # JSONL should still exist
+                    assert jsonl_file.exists()
+                    mock_delete.assert_called_once_with("session-123")
+
+    @pytest.mark.asyncio
+    async def test_delete_current_session_both_deletes_jsonl(self):
+        """Test delete with 'b' (both) deletes JSONL file."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            with patch.object(
+                app.cache_manager, "delete_session", return_value=True
+            ) as mock_delete:
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    app.sessions = mock_session_data
+                    app.selected_session_id = "session-123"
+
+                    assert jsonl_file.exists()
+
+                    # Trigger delete (opens modal)
+                    await pilot.press("d")
+                    await pilot.pause(0.1)
+
+                    # Choose both with 'b'
+                    await pilot.press("b")
+                    await pilot.pause(0.1)
+
+                    # JSONL should be deleted
+                    assert not jsonl_file.exists()
+                    mock_delete.assert_called_once_with("session-123")
+
+    @pytest.mark.asyncio
+    async def test_delete_archived_session_with_enter_key(self):
+        """Test deleting archived session with Enter key."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_archived_data = {
+                "session-archived": SessionCacheData(
+                    session_id="session-archived",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            with patch.object(
+                app.cache_manager, "delete_session", return_value=True
+            ) as mock_delete:
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    app.sessions = {}
+                    app.archived_sessions = mock_archived_data
+                    app.selected_session_id = "session-archived"
+
+                    # Trigger delete (opens modal)
+                    await pilot.press("d")
+                    await pilot.pause(0.1)
+
+                    # Confirm with Enter (for archived sessions)
+                    await pilot.press("enter")
+                    await pilot.pause(0.1)
+
+                    mock_delete.assert_called_once_with("session-archived")
+
+    @pytest.mark.asyncio
+    async def test_delete_cancel_n_key(self):
+        """Test cancelling delete with 'n' key."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            with patch.object(
+                app.cache_manager, "delete_session", return_value=True
+            ) as mock_delete:
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    app.sessions = mock_session_data
+                    app.selected_session_id = "session-123"
+
+                    # Trigger delete (opens modal)
+                    await pilot.press("d")
+                    await pilot.pause(0.1)
+
+                    # Cancel with 'n'
+                    await pilot.press("n")
+                    await pilot.pause(0.1)
+
+                    # Should not have deleted
+                    mock_delete.assert_not_called()
+                    assert jsonl_file.exists()
+
+
+@pytest.mark.tui
+class TestArchiveActionEdgeCases:
+    """Edge case tests for the archive session action."""
+
+    @pytest.mark.asyncio
+    async def test_archive_action_no_selection(self):
+        """Test archive action with no session selected shows warning."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                # Ensure no session is selected
+                app.selected_session_id = None
+
+                # Try to archive - should notify warning
+                await pilot.press("a")
+                await pilot.pause(0.1)
+
+                # No modal should be pushed (we can't easily check notifications)
+                # but at least verify no crash occurred
+
+    @pytest.mark.asyncio
+    async def test_archive_action_on_archived_session_shows_warning(self):
+        """Test archive action on already archived session shows warning."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            mock_session_data = {
+                "session-archived": SessionCacheData(
+                    session_id="session-archived",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                # Set up archived session
+                app.archived_sessions = mock_session_data
+                app.sessions = {}
+                app.selected_session_id = "session-archived"
+
+                # Try to archive - should notify warning (already archived)
+                await pilot.press("a")
+                await pilot.pause(0.1)
+
+
+@pytest.mark.tui
+class TestDeleteActionEdgeCases:
+    """Edge case tests for the delete session action."""
+
+    @pytest.mark.asyncio
+    async def test_delete_action_no_selection(self):
+        """Test delete action with no session selected shows warning."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir)
+            jsonl_file = project_path / "session-123.jsonl"
+            jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8")
+
+            app = SessionBrowser(project_path)
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                # Ensure no session is selected
+                app.selected_session_id = None
+
+                # Try to delete - should notify warning
+                await pilot.press("d")
+                await pilot.pause(0.1)
+
+
+@pytest.mark.tui
+class TestRestoreWithMkdir:
+    """Tests for restore action creating directory if needed."""
+
+    @pytest.mark.asyncio
+    async def test_restore_creates_directory_if_missing(self):
+        """Test that restore creates the project directory if it was deleted."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir) / "deleted_project"
+            # Don't create the directory - it should be created on restore
+
+            app = SessionBrowser(project_path, is_archived=True)
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            with (
+                patch.object(
+                    app.cache_manager,
+                    "export_session_to_jsonl",
+                    return_value=['{"type":"user"}'],
+                ),
+                patch.object(
+                    app.cache_manager, "get_cached_project_data"
+                ) as mock_cache,
+                patch.object(
+                    app.cache_manager, "get_archived_sessions", return_value={}
+                ),
+            ):
+                mock_cache.return_value = Mock(
+                    sessions=mock_session_data,
+                    working_directories=[str(project_path)],
+                )
+
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    # Set up archived session
+                    app.archived_sessions = mock_session_data
+                    app.selected_session_id = "session-123"
+
+                    # Directory should not exist
+                    assert not project_path.exists()
+
+                    # Trigger restore
+                    app.action_restore_jsonl()
+                    await pilot.pause(0.1)
+
+                    # Directory should now exist
+                    assert project_path.exists()
+
+                    # JSONL file should be created
+                    assert (project_path / "session-123.jsonl").exists()
+
+
+@pytest.mark.tui
+class TestProjectSelector:
+    """Tests for the ProjectSelector TUI."""
+
+    @pytest.mark.asyncio
+    async def test_enter_key_selects_project(self):
+        """Test that Enter key selects the highlighted project."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project1 = Path(temp_dir) / "project1"
+            project1.mkdir()
+            (project1 / "session-1.jsonl").write_text('{"type":"user"}\n')
+
+            project2 = Path(temp_dir) / "project2"
+            project2.mkdir()
+            (project2 / "session-2.jsonl").write_text('{"type":"user"}\n')
+
+            app = ProjectSelector(
+                projects=[project1, project2],
+                matching_projects=[],
+                archived_projects=set(),
+            )
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                # Select first project and press Enter
+                await pilot.press("enter")
+                await pilot.pause(0.1)
+
+    @pytest.mark.asyncio
+    async def test_escape_key_quits(self):
+        """Test that Escape key quits the application."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project1 = Path(temp_dir) / "project1"
+            project1.mkdir()
+
+            app = ProjectSelector(
+                projects=[project1],
+                matching_projects=[],
+                archived_projects=set(),
+            )
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                # Press Escape to quit
+                await pilot.press("escape")
+                await pilot.pause(0.1)
+
+    @pytest.mark.asyncio
+    async def test_archive_project_action(self):
+        """Test archiving a project deletes JSONL files."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir) / "project1"
+            project_path.mkdir()
+            jsonl1 = project_path / "session-1.jsonl"
+            jsonl2 = project_path / "session-2.jsonl"
+            jsonl1.write_text('{"type":"user"}\n')
+            jsonl2.write_text('{"type":"user"}\n')
+
+            app = ProjectSelector(
+                projects=[project_path],
+                matching_projects=[],
+                archived_projects=set(),
+            )
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                # Select the project
+                app.selected_project_path = project_path
+
+                # Both JSONL files should exist
+                assert jsonl1.exists()
+                assert jsonl2.exists()
+
+                # Press 'a' to archive and then confirm
+                await pilot.press("a")
+                await pilot.pause(0.1)
+                await pilot.press("y")
+                await pilot.pause(0.1)
+
+                # JSONL files should be deleted
+                assert not jsonl1.exists()
+                assert not jsonl2.exists()
+
+                # Project should now be in archived set
+                assert project_path in app.archived_projects
+
+    @pytest.mark.asyncio
+    async def test_archive_project_already_archived_shows_warning(self):
+        """Test archiving an already archived project shows warning."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir) / "project1"
+            project_path.mkdir()
+
+            app = ProjectSelector(
+                projects=[project_path],
+                matching_projects=[],
+                archived_projects={project_path},  # Already archived
+            )
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                # Select the archived project
+                app.selected_project_path = project_path
+
+                # Try to archive - should show warning
+                await pilot.press("a")
+                await pilot.pause(0.1)
+
+    @pytest.mark.asyncio
+    async def test_delete_project_cache_only(self):
+        """Test deleting project cache only keeps JSONL files."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir) / "project1"
+            project_path.mkdir()
+            jsonl = project_path / "session-1.jsonl"
+            jsonl.write_text('{"type":"user"}\n')
+
+            app = ProjectSelector(
+                projects=[project_path],
+                matching_projects=[],
+                archived_projects=set(),
+            )
+
+            with patch.object(CacheManager, "clear_cache"):
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    # Select the project
+                    app.selected_project_path = project_path
+
+                    # Press 'd' to delete and choose cache only
+                    await pilot.press("d")
+                    await pilot.pause(0.1)
+                    await pilot.press("c")  # Cache only
+                    await pilot.pause(0.1)
+
+                    # JSONL file should still exist
+                    assert jsonl.exists()
+
+    @pytest.mark.asyncio
+    async def test_delete_project_both(self):
+        """Test deleting project cache and JSONL files."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir) / "project1"
+            project_path.mkdir()
+            jsonl = project_path / "session-1.jsonl"
+            jsonl.write_text('{"type":"user"}\n')
+
+            app = ProjectSelector(
+                projects=[project_path],
+                matching_projects=[],
+                archived_projects=set(),
+            )
+
+            with patch.object(CacheManager, "clear_cache"):
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    # Select the project
+                    app.selected_project_path = project_path
+
+                    assert jsonl.exists()
+
+                    # Press 'd' to delete and choose both
+                    await pilot.press("d")
+                    await pilot.pause(0.1)
+                    await pilot.press("b")  # Both
+                    await pilot.pause(0.1)
+
+                    # JSONL file should be deleted
+                    assert not jsonl.exists()
+
+    @pytest.mark.asyncio
+    async def test_restore_project_creates_directory(self):
+        """Test restoring a project creates directory if missing."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir) / "deleted_project"
+            # Don't create the directory
+
+            mock_session_data = {
+                "session-123": SessionCacheData(
+                    session_id="session-123",
+                    first_timestamp="2025-01-01T10:00:00Z",
+                    last_timestamp="2025-01-01T10:01:00Z",
+                    message_count=1,
+                    first_user_message="Test",
+                    total_input_tokens=10,
+                    total_output_tokens=10,
+                ),
+            }
+
+            app = ProjectSelector(
+                projects=[project_path],
+                matching_projects=[],
+                archived_projects={project_path},  # Archived project
+            )
+
+            with (
+                patch.object(CacheManager, "get_cached_project_data") as mock_cache,
+                patch.object(
+                    CacheManager,
+                    "export_session_to_jsonl",
+                    return_value=['{"type":"user"}'],
+                ),
+            ):
+                mock_cache.return_value = Mock(sessions=mock_session_data)
+
+                async with app.run_test() as pilot:
+                    await pilot.pause(0.2)
+
+                    # Select the project
+                    app.selected_project_path = project_path
+
+                    # Directory should not exist
+                    assert not project_path.exists()
+
+                    # Press 'r' to restore and confirm
+                    await pilot.press("r")
+                    await pilot.pause(0.1)
+                    await pilot.press("y")
+                    await pilot.pause(0.1)
+
+                    # Directory should now exist
+                    assert project_path.exists()
+
+    @pytest.mark.asyncio
+    async def test_restore_project_not_archived_shows_warning(self):
+        """Test restoring a non-archived project shows warning."""
+        with tempfile.TemporaryDirectory() as temp_dir:
+            project_path = Path(temp_dir) / "project1"
+            project_path.mkdir()
+            (project_path / "session-1.jsonl").write_text('{"type":"user"}\n')
+
+            app = ProjectSelector(
+                projects=[project_path],
+                matching_projects=[],
+                archived_projects=set(),  # Not archived
+            )
+
+            async with app.run_test() as pilot:
+                await pilot.pause(0.2)
+
+                # Select the non-archived project
+                app.selected_project_path = project_path
+
+                # Try to restore - should show warning
+                await pilot.press("r")
+                await pilot.pause(0.1)

From 3101323add7b1a06da87e6f247562f44266c7204 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Sat, 10 Jan 2026 02:01:57 +0000
Subject: [PATCH 07/23] PR feedback + zlib compressed message.content

---
 claude_code_log/cache.py                      |  51 +++++----
 claude_code_log/cli.py                        |   2 +-
 claude_code_log/converter.py                  | 103 +++++++++++++++++-
 .../migrations/001_initial_schema.sql         |   4 +-
 claude_code_log/tui.py                        |  16 +--
 test/test_cache.py                            |  75 ++++++++++++-
 test/test_cache_integration.py                |  60 +++++++++-
 test/test_integration_realistic.py            |   4 +-
 test/test_pagination.py                       |  49 +++++++++
 9 files changed, 321 insertions(+), 43 deletions(-)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 8e37d477..842c9165 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -3,8 +3,9 @@
 
 import json
 import sqlite3
+import zlib
 from contextlib import contextmanager
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, Generator, List, Optional
 
@@ -178,8 +179,8 @@ def __init__(self, project_path: Path, library_version: str):
         self.project_path = project_path
         self.library_version = library_version
 
-        # Database at parent level (projects_dir/cache.db)
-        self.db_path = project_path.parent / "cache.db"
+        # Database at parent level (projects_dir/claude-code-log-cache.db)
+        self.db_path = project_path.parent / "claude-code-log-cache.db"
 
         # Initialise database and ensure project exists
         self._init_database()
@@ -287,7 +288,9 @@ def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, An
             "_leaf_uuid": None,
             "_level": None,
             "_operation": None,
-            "content": json.dumps(entry.model_dump()),
+            "content": zlib.compress(
+                json.dumps(entry.model_dump(), separators=(",", ":")).encode("utf-8")
+            ),
         }
 
         # Extract flattened usage for assistant messages
@@ -321,7 +324,7 @@ def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, An
 
     def _deserialize_entry(self, row: sqlite3.Row) -> TranscriptEntry:
         """Convert SQLite row back to TranscriptEntry."""
-        content_dict = json.loads(row["content"])
+        content_dict = json.loads(zlib.decompress(row["content"]).decode("utf-8"))
         return create_transcript_entry(content_dict)
 
     def _get_file_id(self, jsonl_path: Path) -> Optional[int]:
@@ -422,13 +425,22 @@ def load_cached_entries_filtered(
         params: List[Any] = [file_id]
 
         if from_dt:
+            # Normalize to UTC 'Z' format for consistent string comparison
+            # with stored timestamps (which use 'Z' suffix from JSONL)
+            if from_dt.tzinfo is None:
+                from_dt = from_dt.replace(tzinfo=timezone.utc)
+            from_bound = from_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
             # Include entries with NULL timestamp (like summaries) OR within date range
             sql += " AND (timestamp IS NULL OR timestamp >= ?)"
-            params.append(from_dt.isoformat())
+            params.append(from_bound)
 
         if to_dt:
+            # Normalize to UTC 'Z' format for consistent string comparison
+            if to_dt.tzinfo is None:
+                to_dt = to_dt.replace(tzinfo=timezone.utc)
+            to_bound = to_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
             sql += " AND (timestamp IS NULL OR timestamp <= ?)"
-            params.append(to_dt.isoformat())
+            params.append(to_bound)
 
         sql += " ORDER BY timestamp NULLS LAST"
 
@@ -997,18 +1009,8 @@ def export_session_to_jsonl(self, session_id: str) -> List[str]:
                 (self._project_id, session_id),
             ).fetchall()
 
-        # Re-serialize to compact JSON format (no spaces after separators)
-        # to match original JSONL file format
-        result: List[str] = []
-        for row in rows:
-            try:
-                parsed = json.loads(row["content"])
-                compact = json.dumps(parsed, separators=(",", ":"))
-                result.append(compact)
-            except json.JSONDecodeError:
-                # If parsing fails, use original content
-                result.append(row["content"])
-        return result
+        # Content is stored as compressed, compact JSON - just decompress
+        return [zlib.decompress(row["content"]).decode("utf-8") for row in rows]
 
     def load_session_entries(self, session_id: str) -> List[TranscriptEntry]:
         """Load transcript entries for a session from cache.
@@ -1357,6 +1359,13 @@ def delete_session(self, session_id: str) -> bool:
                 (self._project_id, session_id),
             )
 
+            # Delete cached_files entry for this session's JSONL file
+            # File name pattern is {session_id}.jsonl
+            conn.execute(
+                "DELETE FROM cached_files WHERE project_id = ? AND file_name = ?",
+                (self._project_id, f"{session_id}.jsonl"),
+            )
+
             # Delete the session record
             conn.execute(
                 "DELETE FROM sessions WHERE project_id = ? AND session_id = ?",
@@ -1389,7 +1398,7 @@ def delete_project(self) -> bool:
 def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]:
     """Get all projects from cache, indicating which are archived.
 
-    This is a standalone function that queries the cache.db directly
+    This is a standalone function that queries the cache database directly
     to find all project paths, without needing to instantiate CacheManager
     for each project.
 
@@ -1400,7 +1409,7 @@ def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]:
         List of (project_path, is_archived) tuples.
         is_archived is True if the project has no JSONL files but exists in cache.
     """
-    db_path = projects_dir / "cache.db"
+    db_path = projects_dir / "claude-code-log-cache.db"
     if not db_path.exists():
         return []
 
diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py
index 19b4bb9d..0be197a3 100644
--- a/claude_code_log/cli.py
+++ b/claude_code_log/cli.py
@@ -303,7 +303,7 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None:
             click.echo("Clearing caches for all projects...")
 
             # Delete the shared SQLite cache database
-            cache_db = input_path / "cache.db"
+            cache_db = input_path / "claude-code-log-cache.db"
             if cache_db.exists():
                 try:
                     cache_db.unlink()
diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index 6b5606f5..e41a839b 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -576,6 +576,103 @@ def _assign_sessions_to_pages(
     return pages
 
 
+def _build_session_data_from_messages(
+    messages: List[TranscriptEntry],
+) -> Dict[str, SessionCacheData]:
+    """Build session data from messages when cache is unavailable.
+
+    This is a fallback for pagination when get_cached_project_data() returns None.
+
+    Args:
+        messages: All messages (deduplicated)
+
+    Returns:
+        Dict mapping session_id to SessionCacheData
+    """
+    from .parser import extract_text_content
+
+    # Pre-compute warmup session IDs to filter them out
+    warmup_session_ids = get_warmup_session_ids(messages)
+
+    # Group messages by session
+    sessions: Dict[str, Dict[str, Any]] = {}
+    for message in messages:
+        if not hasattr(message, "sessionId") or isinstance(
+            message, SummaryTranscriptEntry
+        ):
+            continue
+
+        session_id = getattr(message, "sessionId", "")
+        if not session_id or session_id in warmup_session_ids:
+            continue
+
+        if session_id not in sessions:
+            sessions[session_id] = {
+                "first_timestamp": getattr(message, "timestamp", ""),
+                "last_timestamp": getattr(message, "timestamp", ""),
+                "message_count": 0,
+                "first_user_message": "",
+                "total_input_tokens": 0,
+                "total_output_tokens": 0,
+                "total_cache_creation_tokens": 0,
+                "total_cache_read_tokens": 0,
+            }
+
+        sessions[session_id]["message_count"] += 1
+        current_timestamp = getattr(message, "timestamp", "")
+        if current_timestamp:
+            sessions[session_id]["last_timestamp"] = current_timestamp
+
+        # Get first user message for preview
+        if (
+            isinstance(message, UserTranscriptEntry)
+            and not sessions[session_id]["first_user_message"]
+            and hasattr(message, "message")
+        ):
+            first_user_content = extract_text_content(message.message.content)
+            if should_use_as_session_starter(first_user_content):
+                sessions[session_id]["first_user_message"] = create_session_preview(
+                    first_user_content
+                )
+
+        # Extract token usage from assistant messages
+        if isinstance(message, AssistantTranscriptEntry) and hasattr(
+            message, "message"
+        ):
+            msg_data = message.message
+            if hasattr(msg_data, "usage") and msg_data.usage:
+                usage = msg_data.usage
+                sessions[session_id]["total_input_tokens"] += (
+                    getattr(usage, "input_tokens", 0) or 0
+                )
+                sessions[session_id]["total_output_tokens"] += (
+                    getattr(usage, "output_tokens", 0) or 0
+                )
+                sessions[session_id]["total_cache_creation_tokens"] += (
+                    getattr(usage, "cache_creation_input_tokens", 0) or 0
+                )
+                sessions[session_id]["total_cache_read_tokens"] += (
+                    getattr(usage, "cache_read_input_tokens", 0) or 0
+                )
+
+    # Convert to Dict[str, SessionCacheData]
+    result: Dict[str, SessionCacheData] = {}
+    for session_id, data in sessions.items():
+        result[session_id] = SessionCacheData(
+            session_id=session_id,
+            first_timestamp=data["first_timestamp"],
+            last_timestamp=data["last_timestamp"],
+            message_count=data["message_count"],
+            first_user_message=data["first_user_message"],
+            total_input_tokens=data["total_input_tokens"],
+            total_output_tokens=data["total_output_tokens"],
+            total_cache_creation_tokens=data["total_cache_creation_tokens"],
+            total_cache_read_tokens=data["total_cache_read_tokens"],
+        )
+
+    return result
+
+
 def _generate_paginated_html(
     messages: List[TranscriptEntry],
     output_dir: Path,
@@ -928,7 +1025,11 @@ def convert_jsonl_to(
     if use_pagination:
         # Use paginated HTML generation
         assert cache_manager is not None  # Ensured by use_pagination condition
-        session_data = cached_data.sessions if cached_data else {}
+        # Use cached session data if available, otherwise build from messages
+        if cached_data is not None:
+            session_data = cached_data.sessions
+        else:
+            session_data = _build_session_data_from_messages(messages)
         output_path = _generate_paginated_html(
             messages,
             input_path,
diff --git a/claude_code_log/migrations/001_initial_schema.sql b/claude_code_log/migrations/001_initial_schema.sql
index f7c5946e..b90a6d6c 100644
--- a/claude_code_log/migrations/001_initial_schema.sql
+++ b/claude_code_log/migrations/001_initial_schema.sql
@@ -99,8 +99,8 @@ CREATE TABLE IF NOT EXISTS messages (
     -- QueueOperationTranscriptEntry
     _operation TEXT,
 
-    -- Message content as JSON
-    content JSON NOT NULL,
+    -- Message content as compressed JSON (zlib)
+    content BLOB NOT NULL,
 
     FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
     FOREIGN KEY (file_id) REFERENCES cached_files(id) ON DELETE CASCADE
diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 63c1d5f3..e1495e17 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -1036,8 +1036,9 @@ def load_sessions(self) -> None:
             try:
                 self.populate_table()
                 self.update_stats()
-            except Exception:
-                pass
+            except Exception as e:
+                # UI components may not be mounted yet during initialization
+                self.log.debug(f"Skipped UI update for archived project: {e}")
             return
 
         # Check if we need to rebuild cache by checking for modified files
@@ -1069,8 +1070,9 @@ def load_sessions(self) -> None:
                 else:
                     self.sessions = {}
 
-            except Exception:
-                # Don't show notification during startup - just return
+            except Exception as e:
+                # Don't show notification during startup - log and return
+                self.log.debug(f"Cache building failed during startup: {e}")
                 return
 
         # Only compute archived sessions if there are JSONL files to compare against
@@ -1095,9 +1097,9 @@ def load_sessions(self) -> None:
         try:
             self.populate_table()
             self.update_stats()
-        except Exception:
-            # Not in app context, skip UI updates
-            pass
+        except Exception as e:
+            # UI components may not be mounted yet during initialization
+            self.log.debug(f"Skipped UI update after session load: {e}")
 
     def populate_table(self) -> None:
         """Populate the sessions table with session data."""
diff --git a/test/test_cache.py b/test/test_cache.py
index 8bb4302d..2779f283 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -27,7 +27,7 @@
 def temp_project_dir():
     """Create a temporary project directory for testing."""
     with tempfile.TemporaryDirectory() as temp_dir:
-        # Create project subdirectory so db_path (parent/cache.db) is unique per test
+        # Create project subdirectory so db_path (parent/claude-code-log-cache.db) is unique per test
         project_dir = Path(temp_dir) / "project"
         project_dir.mkdir()
         yield project_dir
@@ -102,13 +102,16 @@ def test_initialization(self, temp_project_dir, mock_version):
         assert cache_manager.project_path == temp_project_dir
         assert cache_manager.library_version == mock_version
         # SQLite database should be created at parent level
-        assert cache_manager.db_path == temp_project_dir.parent / "cache.db"
+        assert (
+            cache_manager.db_path
+            == temp_project_dir.parent / "claude-code-log-cache.db"
+        )
         assert cache_manager.db_path.exists()
 
     def test_database_path(self, cache_manager, temp_project_dir):
         """Test that SQLite database is created at the correct location."""
-        # Database should be at parent level (projects_dir/cache.db)
-        expected_db = temp_project_dir.parent / "cache.db"
+        # Database should be at parent level (projects_dir/claude-code-log-cache.db)
+        expected_db = temp_project_dir.parent / "claude-code-log-cache.db"
         assert cache_manager.db_path == expected_db
         assert expected_db.exists()
 
@@ -260,6 +263,70 @@ def test_filtered_loading_with_dates(self, cache_manager, temp_project_dir):
         assert len(user_messages) == 1
         assert "Early message" in str(user_messages[0].message.content)
 
+    def test_filtered_loading_with_z_suffix_boundary(
+        self, cache_manager, temp_project_dir
+    ):
+        """Test that timestamps with 'Z' suffix are correctly compared at day boundaries.
+
+        This tests the edge case where a message at 23:59:59Z should be included
+        when filtering with to_date set to that day. Previously, the query used
+        isoformat() which produced '.999999' microseconds, and 'Z' > '.' in string
+        comparison caused incorrect exclusion.
+        """
+        entries = [
+            UserTranscriptEntry(
+                parentUuid=None,
+                isSidechain=False,
+                userType="user",
+                cwd="/test",
+                sessionId="session1",
+                version="1.0.0",
+                uuid="user1",
+                timestamp="2023-01-01T23:59:59Z",  # End of day with Z suffix
+                type="user",
+                message=UserMessageModel(
+                    role="user",
+                    content=[TextContent(type="text", text="End of day message")],
+                ),
+            ),
+            UserTranscriptEntry(
+                parentUuid=None,
+                isSidechain=False,
+                userType="user",
+                cwd="/test",
+                sessionId="session1",
+                version="1.0.0",
+                uuid="user2",
+                timestamp="2023-01-02T00:00:01Z",  # Start of next day
+                type="user",
+                message=UserMessageModel(
+                    role="user",
+                    content=[TextContent(type="text", text="Next day message")],
+                ),
+            ),
+        ]
+
+        jsonl_path = temp_project_dir / "test.jsonl"
+        jsonl_path.write_text("dummy content", encoding="utf-8")
+
+        cache_manager.save_cached_entries(jsonl_path, entries)
+
+        # Filter to only 2023-01-01 - should include the 23:59:59Z message
+        filtered = cache_manager.load_cached_entries_filtered(
+            jsonl_path, "2023-01-01", "2023-01-01"
+        )
+
+        assert filtered is not None
+        user_messages = [entry for entry in filtered if entry.type == "user"]
+
+        # Should include only the end-of-day message, not the next day message
+        assert len(user_messages) == 1, (
+            f"Expected 1 message from 2023-01-01, got {len(user_messages)}. "
+            "The 23:59:59Z message may have been incorrectly excluded due to "
+            "timestamp format mismatch (Z vs .999999 suffix)."
+        )
+        assert "End of day message" in str(user_messages[0].message.content)
+
     def test_clear_cache(self, cache_manager, temp_project_dir, sample_entries):
         """Test cache clearing functionality."""
         jsonl_path = temp_project_dir / "test.jsonl"
diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py
index 6af5b721..68d9639f 100644
--- a/test/test_cache_integration.py
+++ b/test/test_cache_integration.py
@@ -96,7 +96,7 @@ def test_cli_no_cache_flag(self, setup_test_project):
         assert result1.exit_code == 0
 
         # Check if SQLite cache was created at parent level
-        cache_db = project_dir.parent / "cache.db"
+        cache_db = project_dir.parent / "claude-code-log-cache.db"
         assert cache_db.exists()
 
         # Clear the cache
@@ -161,7 +161,7 @@ def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data):
         assert result.exit_code == 0
 
         # Verify SQLite cache database created at projects level
-        cache_db = temp_projects_dir / "cache.db"
+        cache_db = temp_projects_dir / "claude-code-log-cache.db"
         assert cache_db.exists()
 
         # Verify cache data exists for each project
@@ -202,7 +202,7 @@ def test_convert_jsonl_to_html_with_cache(self, setup_test_project):
         assert output1.exists()
 
         # Verify SQLite cache was created
-        cache_db = project_dir.parent / "cache.db"
+        cache_db = project_dir.parent / "claude-code-log-cache.db"
         assert cache_db.exists()
 
         # Verify cache has data
@@ -253,7 +253,7 @@ def test_process_projects_hierarchy_with_cache(
         assert output1.exists()
 
         # Verify SQLite cache database was created
-        cache_db = temp_projects_dir / "cache.db"
+        cache_db = temp_projects_dir / "claude-code-log-cache.db"
         assert cache_db.exists()
 
         # Verify cache data exists for each project
@@ -759,6 +759,56 @@ def test_delete_session_from_cache(self, temp_projects_dir, sample_jsonl_data):
         exported = cache_manager.export_session_to_jsonl("session-1")
         assert exported == []
 
+    def test_delete_session_invalidates_file_cache(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test that delete_session also removes cached_files entry.
+
+        Previously, delete_session only removed from messages, html_cache, and
+        sessions tables but left cached_files intact. This caused is_file_cached()
+        to return True even though the session data was gone, leading to
+        load_cached_entries() returning an empty list instead of None.
+        """
+        project_dir = temp_projects_dir / "delete-file-cache-test"
+        project_dir.mkdir()
+
+        # Create JSONL file with session ID matching file name
+        session_id = "session-1"
+        jsonl_file = project_dir / f"{session_id}.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        # Verify file is cached before deletion
+        cache_manager = CacheManager(project_dir, "1.0.0")
+        assert cache_manager.is_file_cached(jsonl_file), (
+            "File should be cached before deletion"
+        )
+        entries_before = cache_manager.load_cached_entries(jsonl_file)
+        assert entries_before is not None and len(entries_before) > 0, (
+            "Should load cached entries before deletion"
+        )
+
+        # Delete the session
+        result = cache_manager.delete_session(session_id)
+        assert result is True
+
+        # Verify cached_files entry is also removed
+        assert not cache_manager.is_file_cached(jsonl_file), (
+            "is_file_cached() should return False after delete_session() "
+            "because the cached_files entry should be removed"
+        )
+
+        # load_cached_entries should return None (not empty list) for uncached file
+        entries_after = cache_manager.load_cached_entries(jsonl_file)
+        assert entries_after is None, (
+            "load_cached_entries() should return None after delete_session() "
+            "because the file is no longer considered cached"
+        )
+
     def test_delete_nonexistent_session(self, temp_projects_dir):
         """Test deleting a session that doesn't exist returns False."""
         project_dir = temp_projects_dir / "delete-nonexistent"
@@ -850,7 +900,7 @@ def test_get_all_cached_projects_empty_dir(self, temp_projects_dir):
         """Test get_all_cached_projects with no cache."""
         from claude_code_log.cache import get_all_cached_projects
 
-        # No cache.db exists
+        # No claude-code-log-cache.db exists
         projects = get_all_cached_projects(temp_projects_dir)
         assert projects == []
 
diff --git a/test/test_integration_realistic.py b/test/test_integration_realistic.py
index 1c051290..92580cae 100644
--- a/test/test_integration_realistic.py
+++ b/test/test_integration_realistic.py
@@ -246,7 +246,7 @@ def test_clear_cache_with_projects_dir(self, temp_projects_copy: Path) -> None:
         assert result.exit_code == 0
 
         # Verify SQLite cache was created
-        cache_db = temp_projects_copy / "cache.db"
+        cache_db = temp_projects_copy / "claude-code-log-cache.db"
         assert cache_db.exists(), "SQLite cache should exist after processing"
 
         # Clear caches
@@ -419,7 +419,7 @@ def test_cache_creation_all_projects(self, temp_projects_copy: Path) -> None:
         process_projects_hierarchy(temp_projects_copy)
 
         # Verify SQLite cache database was created
-        cache_db = temp_projects_copy / "cache.db"
+        cache_db = temp_projects_copy / "claude-code-log-cache.db"
         assert cache_db.exists(), "SQLite cache database should exist"
 
         for project_dir in temp_projects_copy.iterdir():
diff --git a/test/test_pagination.py b/test/test_pagination.py
index fc9b0fd6..63285f60 100644
--- a/test/test_pagination.py
+++ b/test/test_pagination.py
@@ -685,3 +685,52 @@ def test_multi_page_last_has_hidden_next_link(self, temp_project_dir):
         )
         assert "PAGINATION_NEXT_LINK_START" in page2
         assert "last-page" in page2
+
+
+class TestPaginationFallbackWithoutCache:
+    """Tests for pagination when cache data is unavailable."""
+
+    def test_pagination_renders_messages_when_cache_unavailable(self, temp_project_dir):
+        """Pagination should render messages even when get_cached_project_data returns None.
+
+        This tests the fallback path where cached_data is None but pagination is triggered
+        because total_message_count exceeds page_size.
+        """
+        from unittest.mock import patch
+        from claude_code_log.converter import convert_jsonl_to_html
+        from claude_code_log.cache import CacheManager
+
+        # Create sessions with messages
+        for i, session_id in enumerate(["s1", "s2"]):
+            jsonl_file = temp_project_dir / f"{session_id}.jsonl"
+            messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}")
+            with open(jsonl_file, "w", encoding="utf-8") as f:
+                for msg in messages:
+                    f.write(json.dumps(msg) + "\n")
+
+        # First pass: Build cache but then simulate cache unavailable
+        convert_jsonl_to_html(temp_project_dir, page_size=5000, silent=True)
+
+        # Delete combined file to force regeneration
+        combined_path = temp_project_dir / "combined_transcripts.html"
+        if combined_path.exists():
+            combined_path.unlink()
+
+        # Patch get_cached_project_data to return None (simulating cache unavailable)
+        # but keep total_message_count high enough to trigger pagination
+        def mock_get_cached_project_data(self):
+            return None
+
+        with patch.object(
+            CacheManager, "get_cached_project_data", mock_get_cached_project_data
+        ):
+            # Force pagination with small page_size
+            convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True)
+
+        # Verify the generated HTML contains actual messages, not empty content
+        page1_content = combined_path.read_text(encoding="utf-8")
+
+        # The page should contain message content from the sessions
+        assert "Message 0 from user" in page1_content or "Response" in page1_content, (
+            "Paginated HTML should contain messages when cache is unavailable"
+        )

From a9f337e894325c1e4f780398f3f37702be37a705 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Mon, 12 Jan 2026 00:59:49 +0000
Subject: [PATCH 08/23] Make cache db path parametrised

---
 claude_code_log/cache.py | 56 +++++++++++++++++++++----
 test/test_cache.py       | 88 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 137 insertions(+), 7 deletions(-)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 842c9165..220cc90c 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -2,6 +2,7 @@
 """SQLite-based cache management for Claude Code Log."""
 
 import json
+import os
 import sqlite3
 import zlib
 from contextlib import contextmanager
@@ -163,24 +164,54 @@ def get_library_version() -> str:
     return "unknown"
 
 
+# ========== Cache Path Configuration ==========
+
+
+def get_cache_db_path(projects_dir: Path) -> Path:
+    """Get cache database path, respecting CLAUDE_CODE_LOG_CACHE_PATH env var.
+
+    Priority: CLAUDE_CODE_LOG_CACHE_PATH env var > default location.
+
+    Args:
+        projects_dir: Path to the projects directory (e.g., ~/.claude/projects)
+
+    Returns:
+        Path to the SQLite cache database.
+    """
+    env_path = os.getenv("CLAUDE_CODE_LOG_CACHE_PATH")
+    if env_path:
+        return Path(env_path)
+    return projects_dir / "claude-code-log-cache.db"
+
+
 # ========== Cache Manager ==========
 
 
 class CacheManager:
     """SQLite-based cache manager for Claude Code Log."""
 
-    def __init__(self, project_path: Path, library_version: str):
+    def __init__(
+        self,
+        project_path: Path,
+        library_version: str,
+        db_path: Optional[Path] = None,
+    ):
         """Initialise cache manager for a project.
 
         Args:
             project_path: Path to the project directory containing JSONL files
             library_version: Current version of the library for cache invalidation
+            db_path: Optional explicit path to the cache database. If not provided,
+                uses CLAUDE_CODE_LOG_CACHE_PATH env var or default location.
         """
         self.project_path = project_path
         self.library_version = library_version
 
-        # Database at parent level (projects_dir/claude-code-log-cache.db)
-        self.db_path = project_path.parent / "claude-code-log-cache.db"
+        # Priority: explicit db_path > env var > default location
+        if db_path:
+            self.db_path = db_path
+        else:
+            self.db_path = get_cache_db_path(project_path.parent)
 
         # Initialise database and ensure project exists
         self._init_database()
@@ -1395,7 +1426,10 @@ def delete_project(self) -> bool:
         return True
 
 
-def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]:
+def get_all_cached_projects(
+    projects_dir: Path,
+    db_path: Optional[Path] = None,
+) -> List[tuple[str, bool]]:
     """Get all projects from cache, indicating which are archived.
 
     This is a standalone function that queries the cache database directly
@@ -1404,19 +1438,26 @@ def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]:
 
     Args:
         projects_dir: Path to the projects directory (e.g., ~/.claude/projects)
+        db_path: Optional explicit path to the cache database. If not provided,
+            uses CLAUDE_CODE_LOG_CACHE_PATH env var or default location.
 
     Returns:
         List of (project_path, is_archived) tuples.
         is_archived is True if the project has no JSONL files but exists in cache.
     """
-    db_path = projects_dir / "claude-code-log-cache.db"
-    if not db_path.exists():
+    # Priority: explicit db_path > env var > default location
+    if db_path:
+        actual_db_path = db_path
+    else:
+        actual_db_path = get_cache_db_path(projects_dir)
+
+    if not actual_db_path.exists():
         return []
 
     result: List[tuple[str, bool]] = []
 
     try:
-        conn = sqlite3.connect(db_path, timeout=30.0)
+        conn = sqlite3.connect(actual_db_path, timeout=30.0)
         conn.row_factory = sqlite3.Row
         try:
             rows = conn.execute(
@@ -1450,5 +1491,6 @@ def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]:
     "ProjectCache",
     "SessionCacheData",
     "get_all_cached_projects",
+    "get_cache_db_path",
     "get_library_version",
 ]
diff --git a/test/test_cache.py b/test/test_cache.py
index 2779f283..e2832ccd 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -702,3 +702,91 @@ def test_cache_directory_permissions(self, temp_project_dir, mock_version):
                 cache_dir.chmod(0o755)
             except OSError:
                 pass
+
+
+class TestCachePathEnvVar:
+    """Test CLAUDE_CODE_LOG_CACHE_PATH environment variable."""
+
+    def test_default_path_without_env_var(self, tmp_path):
+        """Test default cache path when env var is not set."""
+        project_dir = tmp_path / "project"
+        project_dir.mkdir()
+
+        cache = CacheManager(project_dir, "1.0.0")
+
+        # Default should be parent/claude-code-log-cache.db
+        expected_path = tmp_path / "claude-code-log-cache.db"
+        assert cache.db_path == expected_path
+        assert expected_path.exists()
+
+    def test_env_var_overrides_default(self, tmp_path, monkeypatch):
+        """Test that CLAUDE_CODE_LOG_CACHE_PATH overrides default location."""
+        custom_db = tmp_path / "custom-cache.db"
+        monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(custom_db))
+
+        project_dir = tmp_path / "project"
+        project_dir.mkdir()
+
+        cache = CacheManager(project_dir, "1.0.0")
+        assert cache.db_path == custom_db
+        assert custom_db.exists()
+
+    def test_explicit_db_path_overrides_env_var(self, tmp_path, monkeypatch):
+        """Test that explicit db_path takes precedence over env var."""
+        env_db = tmp_path / "env-cache.db"
+        explicit_db = tmp_path / "explicit-cache.db"
+        monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(env_db))
+
+        project_dir = tmp_path / "project"
+        project_dir.mkdir()
+
+        cache = CacheManager(project_dir, "1.0.0", db_path=explicit_db)
+        assert cache.db_path == explicit_db
+        assert explicit_db.exists()
+        assert not env_db.exists()
+
+    def test_get_all_cached_projects_respects_env_var(self, tmp_path, monkeypatch):
+        """Test that get_all_cached_projects uses env var."""
+        from claude_code_log.cache import get_all_cached_projects
+
+        custom_db = tmp_path / "custom-cache.db"
+        monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(custom_db))
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+
+        # Create a project and cache it
+        project_dir = projects_dir / "test-project"
+        project_dir.mkdir()
+        cache = CacheManager(project_dir, "1.0.0")  # Uses env var
+        assert cache.db_path == custom_db
+
+        # get_all_cached_projects should also use the env var
+        projects = get_all_cached_projects(projects_dir)
+        assert len(projects) == 1
+        assert projects[0][0] == str(project_dir)
+
+    def test_get_all_cached_projects_explicit_db_path(self, tmp_path, monkeypatch):
+        """Test that get_all_cached_projects explicit db_path overrides env var."""
+        from claude_code_log.cache import get_all_cached_projects
+
+        env_db = tmp_path / "env-cache.db"
+        explicit_db = tmp_path / "explicit-cache.db"
+        monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(env_db))
+
+        projects_dir = tmp_path / "projects"
+        projects_dir.mkdir()
+        project_dir = projects_dir / "test-project"
+        project_dir.mkdir()
+
+        # Create cache using explicit path
+        cache = CacheManager(project_dir, "1.0.0", db_path=explicit_db)
+        assert cache.db_path == explicit_db
+
+        # get_all_cached_projects with explicit path should find it
+        projects = get_all_cached_projects(projects_dir, db_path=explicit_db)
+        assert len(projects) == 1
+
+        # get_all_cached_projects without explicit path uses env var (empty db)
+        projects_env = get_all_cached_projects(projects_dir)
+        assert len(projects_env) == 0  # env_db doesn't have any projects

From 479e71b26cfd8233016a8038fb5e0d527a0f04a8 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Mon, 12 Jan 2026 12:11:32 +0000
Subject: [PATCH 09/23] Improve test coverage and db isolation

---
 claude_code_log/cli.py              |  11 +-
 test/conftest.py                    |  41 +++
 test/test_cache_integration.py      | 145 +++++++--
 test/test_cache_sqlite_integrity.py | 149 ++++++---
 test/test_cli.py                    | 469 ++++++++++++++++++++++++++++
 test/test_migrations.py             | 367 ++++++++++++++++++++++
 test/test_renderer_timings.py       | 291 +++++++++++++++++
 7 files changed, 1386 insertions(+), 87 deletions(-)
 create mode 100644 test/test_cli.py
 create mode 100644 test/test_migrations.py
 create mode 100644 test/test_renderer_timings.py

diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py
index 0be197a3..2c6e3afc 100644
--- a/claude_code_log/cli.py
+++ b/claude_code_log/cli.py
@@ -17,7 +17,12 @@
     get_file_extension,
     process_projects_hierarchy,
 )
-from .cache import CacheManager, get_all_cached_projects, get_library_version
+from .cache import (
+    CacheManager,
+    get_all_cached_projects,
+    get_cache_db_path,
+    get_library_version,
+)
 
 
 def get_default_projects_dir() -> Path:
@@ -302,8 +307,8 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None:
             # Clear cache for all project directories
             click.echo("Clearing caches for all projects...")
 
-            # Delete the shared SQLite cache database
-            cache_db = input_path / "claude-code-log-cache.db"
+            # Delete the SQLite cache database (respects CLAUDE_CODE_LOG_CACHE_PATH env var)
+            cache_db = get_cache_db_path(input_path)
             if cache_db.exists():
                 try:
                     cache_db.unlink()
diff --git a/test/conftest.py b/test/conftest.py
index e158654f..7df5e9ba 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -1,15 +1,56 @@
 """Pytest configuration and shared fixtures."""
 
 from pathlib import Path
+from typing import TYPE_CHECKING, Generator
 
 import pytest
 
+if TYPE_CHECKING:
+    from claude_code_log.cache import CacheManager
+
 from test.snapshot_serializers import (
     NormalisedHTMLSerializer,
     NormalisedMarkdownSerializer,
 )
 
 
+# ========== Cache Test Fixtures ==========
+# These fixtures use explicit db_path for true test isolation,
+# enabling parallel test execution without database conflicts.
+
+
+@pytest.fixture
+def isolated_cache_dir(tmp_path: Path) -> Path:
+    """Create an isolated project directory with explicit db_path.
+
+    This fixture ensures each test gets its own SQLite database,
+    enabling full parallel execution with pytest-xdist.
+    """
+    project_dir = tmp_path / "project"
+    project_dir.mkdir()
+    return project_dir
+
+
+@pytest.fixture
+def isolated_db_path(tmp_path: Path) -> Path:
+    """Return an isolated database path for cache tests."""
+    return tmp_path / "test-cache.db"
+
+
+@pytest.fixture
+def isolated_cache_manager(
+    isolated_cache_dir: Path, isolated_db_path: Path
+) -> Generator["CacheManager", None, None]:
+    """Create a CacheManager with explicit db_path for test isolation.
+
+    This fixture is preferred over the older temp_project_dir pattern
+    as it guarantees database isolation for parallel test execution.
+    """
+    from claude_code_log.cache import CacheManager
+
+    yield CacheManager(isolated_cache_dir, "1.0.0-test", db_path=isolated_db_path)
+
+
 @pytest.fixture
 def test_data_dir() -> Path:
     """Return path to test data directory."""
diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py
index 68d9639f..ddccd39e 100644
--- a/test/test_cache_integration.py
+++ b/test/test_cache_integration.py
@@ -2,8 +2,8 @@
 """Integration tests for cache functionality with CLI and converter."""
 
 import json
-import tempfile
 from pathlib import Path
+from typing import Generator
 from unittest.mock import patch
 
 import pytest
@@ -14,13 +14,39 @@
 from claude_code_log.cache import CacheManager
 
 
+class ProjectSetup:
+    """Container for test project setup data."""
+
+    def __init__(self, projects_dir: Path, db_path: Path):
+        self.projects_dir = projects_dir
+        self.db_path = db_path
+
+
 @pytest.fixture
-def temp_projects_dir():
-    """Create a temporary projects directory structure."""
-    with tempfile.TemporaryDirectory() as temp_dir:
-        projects_dir = Path(temp_dir) / "projects"
-        projects_dir.mkdir()
-        yield projects_dir
+def temp_projects_setup(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> Generator[ProjectSetup, None, None]:
+    """Create a temporary projects directory structure with isolated cache.
+
+    Uses CLAUDE_CODE_LOG_CACHE_PATH env var for cache isolation,
+    enabling parallel test execution with pytest-xdist.
+
+    Returns ProjectSetup with both projects_dir and db_path.
+    """
+    projects_dir = tmp_path / "projects"
+    projects_dir.mkdir()
+
+    # Set env var to isolate cache for this test
+    isolated_db = tmp_path / "test-cache.db"
+    monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(isolated_db))
+
+    yield ProjectSetup(projects_dir, isolated_db)
+
+
+@pytest.fixture
+def temp_projects_dir(temp_projects_setup: ProjectSetup) -> Path:
+    """Backward-compatible fixture returning just the projects dir."""
+    return temp_projects_setup.projects_dir
 
 
 @pytest.fixture
@@ -67,10 +93,57 @@ def sample_jsonl_data():
     ]
 
 
+class ProjectWithCache:
+    """Container for test project with cache info.
+
+    Implements Path-like interface for backward compatibility with tests
+    that pass this directly to functions expecting Path objects.
+    """
+
+    def __init__(self, project_dir: Path, db_path: Path):
+        self.project_dir = project_dir
+        self.db_path = db_path
+
+    # Path-like interface for backward compatibility
+    def __fspath__(self) -> str:
+        return str(self.project_dir)
+
+    def __str__(self) -> str:
+        return str(self.project_dir)
+
+    def __truediv__(self, other: str) -> Path:
+        return self.project_dir / other
+
+    @property
+    def parent(self) -> Path:
+        return self.project_dir.parent
+
+    def exists(self) -> bool:
+        return self.project_dir.exists()
+
+    def is_dir(self) -> bool:
+        return self.project_dir.is_dir()
+
+    def is_file(self) -> bool:
+        return self.project_dir.is_file()
+
+    def glob(self, pattern: str):
+        return self.project_dir.glob(pattern)
+
+    def iterdir(self):
+        return self.project_dir.iterdir()
+
+    @property
+    def name(self) -> str:
+        return self.project_dir.name
+
+
 @pytest.fixture
-def setup_test_project(temp_projects_dir, sample_jsonl_data):
+def setup_test_project(
+    temp_projects_setup: ProjectSetup, sample_jsonl_data
+) -> ProjectWithCache:
     """Set up a test project with JSONL files."""
-    project_dir = temp_projects_dir / "test-project"
+    project_dir = temp_projects_setup.projects_dir / "test-project"
     project_dir.mkdir()
 
     # Create JSONL file
@@ -79,15 +152,16 @@ def setup_test_project(temp_projects_dir, sample_jsonl_data):
         for entry in sample_jsonl_data:
             f.write(json.dumps(entry) + "\n")
 
-    return project_dir
+    return ProjectWithCache(project_dir, temp_projects_setup.db_path)
 
 
 class TestCacheIntegrationCLI:
     """Test cache integration with CLI commands."""
 
-    def test_cli_no_cache_flag(self, setup_test_project):
+    def test_cli_no_cache_flag(self, setup_test_project: ProjectWithCache):
         """Test --no-cache flag disables caching."""
-        project_dir = setup_test_project
+        project_dir = setup_test_project.project_dir
+        db_path = setup_test_project.db_path
 
         runner = CliRunner()
 
@@ -95,9 +169,8 @@ def test_cli_no_cache_flag(self, setup_test_project):
         result1 = runner.invoke(main, [str(project_dir)])
         assert result1.exit_code == 0
 
-        # Check if SQLite cache was created at parent level
-        cache_db = project_dir.parent / "claude-code-log-cache.db"
-        assert cache_db.exists()
+        # Check if SQLite cache was created at the isolated location
+        assert db_path.exists()
 
         # Clear the cache
         runner.invoke(main, [str(project_dir), "--clear-cache"])
@@ -107,7 +180,7 @@ def test_cli_no_cache_flag(self, setup_test_project):
         assert result2.exit_code == 0
 
         # Cache should be empty (project should not be populated)
-        cache_manager = CacheManager(project_dir, "1.0.0")
+        cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path)
         cached_data = cache_manager.get_cached_project_data()
         assert cached_data is not None
         assert cached_data.total_message_count == 0
@@ -138,8 +211,13 @@ def test_cli_clear_cache_flag(self, setup_test_project):
         assert cached_data is not None
         assert len(cached_data.cached_files) == 0
 
-    def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data):
+    def test_cli_all_projects_caching(
+        self, temp_projects_setup: ProjectSetup, sample_jsonl_data
+    ):
         """Test caching with --all-projects flag."""
+        temp_projects_dir = temp_projects_setup.projects_dir
+        db_path = temp_projects_setup.db_path
+
         # Create multiple projects
         for i in range(3):
             project_dir = temp_projects_dir / f"project-{i}"
@@ -160,14 +238,13 @@ def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data):
         result = runner.invoke(main, [str(temp_projects_dir), "--all-projects"])
         assert result.exit_code == 0
 
-        # Verify SQLite cache database created at projects level
-        cache_db = temp_projects_dir / "claude-code-log-cache.db"
-        assert cache_db.exists()
+        # Verify SQLite cache database created at isolated location
+        assert db_path.exists()
 
         # Verify cache data exists for each project
         for i in range(3):
             project_dir = temp_projects_dir / f"project-{i}"
-            cache_manager = CacheManager(project_dir, "1.0.0")
+            cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path)
             cached_data = cache_manager.get_cached_project_data()
             assert cached_data is not None
             assert len(cached_data.cached_files) >= 1
@@ -193,20 +270,22 @@ def test_cli_date_filtering_with_cache(self, setup_test_project):
 class TestCacheIntegrationConverter:
     """Test cache integration with converter functions."""
 
-    def test_convert_jsonl_to_html_with_cache(self, setup_test_project):
+    def test_convert_jsonl_to_html_with_cache(
+        self, setup_test_project: ProjectWithCache
+    ):
         """Test converter uses cache when available."""
-        project_dir = setup_test_project
+        project_dir = setup_test_project.project_dir
+        db_path = setup_test_project.db_path
 
         # First conversion (populate cache)
         output1 = convert_jsonl_to_html(input_path=project_dir, use_cache=True)
         assert output1.exists()
 
-        # Verify SQLite cache was created
-        cache_db = project_dir.parent / "claude-code-log-cache.db"
-        assert cache_db.exists()
+        # Verify SQLite cache was created at isolated location
+        assert db_path.exists()
 
         # Verify cache has data
-        cache_manager = CacheManager(project_dir, "1.0.0")
+        cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path)
         cached_data = cache_manager.get_cached_project_data()
         assert cached_data is not None
         assert len(cached_data.cached_files) >= 1
@@ -230,9 +309,12 @@ def test_convert_jsonl_to_html_no_cache(self, setup_test_project):
         assert cached_data.total_message_count == 0
 
     def test_process_projects_hierarchy_with_cache(
-        self, temp_projects_dir, sample_jsonl_data
+        self, temp_projects_setup: ProjectSetup, sample_jsonl_data
     ):
         """Test project hierarchy processing uses cache effectively."""
+        temp_projects_dir = temp_projects_setup.projects_dir
+        db_path = temp_projects_setup.db_path
+
         # Create multiple projects
         for i in range(2):
             project_dir = temp_projects_dir / f"project-{i}"
@@ -252,14 +334,13 @@ def test_process_projects_hierarchy_with_cache(
         )
         assert output1.exists()
 
-        # Verify SQLite cache database was created
-        cache_db = temp_projects_dir / "claude-code-log-cache.db"
-        assert cache_db.exists()
+        # Verify SQLite cache database was created at isolated location
+        assert db_path.exists()
 
         # Verify cache data exists for each project
         for i in range(2):
             project_dir = temp_projects_dir / f"project-{i}"
-            cache_manager = CacheManager(project_dir, "1.0.0")
+            cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path)
             cached_data = cache_manager.get_cached_project_data()
             assert cached_data is not None
             assert len(cached_data.cached_files) >= 1
diff --git a/test/test_cache_sqlite_integrity.py b/test/test_cache_sqlite_integrity.py
index 15d2ee3b..bc1c3e43 100644
--- a/test/test_cache_sqlite_integrity.py
+++ b/test/test_cache_sqlite_integrity.py
@@ -3,7 +3,6 @@
 
 import json
 import sqlite3
-import tempfile
 import threading
 import time
 from pathlib import Path
@@ -24,19 +23,13 @@
 )
 
 
-@pytest.fixture
-def temp_project_dir():
-    """Create a temporary project directory."""
-    with tempfile.TemporaryDirectory() as temp_dir:
-        project_dir = Path(temp_dir) / "test-project"
-        project_dir.mkdir()
-        yield project_dir
+# Use conftest.py fixtures: isolated_cache_dir, isolated_db_path, isolated_cache_manager
 
 
 @pytest.fixture
-def cache_manager(temp_project_dir):
-    """Create a cache manager for testing."""
-    return CacheManager(temp_project_dir, "1.0.0")
+def cache_manager(isolated_cache_dir: Path, isolated_db_path: Path) -> CacheManager:
+    """Create a cache manager with explicit db_path for test isolation."""
+    return CacheManager(isolated_cache_dir, "1.0.0", db_path=isolated_db_path)
 
 
 @pytest.fixture
@@ -92,13 +85,19 @@ class TestCascadeDelete:
     """Tests for cascade delete behaviour."""
 
     def test_cascade_delete_project_removes_all_nested_records(
-        self, temp_project_dir, sample_user_entry, sample_assistant_entry
+        self,
+        isolated_cache_dir,
+        isolated_db_path,
+        sample_user_entry,
+        sample_assistant_entry,
     ):
         """Deleting project cascades to files, messages, sessions."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Create a JSONL file with entries
-        jsonl_file = temp_project_dir / "test.jsonl"
+        jsonl_file = isolated_cache_dir / "test.jsonl"
         jsonl_file.write_text(
             json.dumps(sample_user_entry.model_dump())
             + "\n"
@@ -180,10 +179,12 @@ class TestTokenSumVerification:
     """Tests for token sum calculations."""
 
     def test_session_token_totals_match_message_sums(
-        self, temp_project_dir, sample_assistant_entry
+        self, isolated_cache_dir, isolated_db_path, sample_assistant_entry
     ):
         """Session token totals equal sum of message tokens."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Create multiple assistant entries with known token values
         entries = []
@@ -219,7 +220,7 @@ def test_session_token_totals_match_message_sums(
             total_output += 50 + i * 5
 
         # Save entries
-        jsonl_file = temp_project_dir / "test.jsonl"
+        jsonl_file = isolated_cache_dir / "test.jsonl"
         jsonl_file.write_text(
             "\n".join(json.dumps(e.model_dump()) for e in entries),
             encoding="utf-8",
@@ -288,9 +289,13 @@ def test_cannot_insert_message_without_valid_project_id(self, cache_manager):
 class TestSerializationRoundTrip:
     """Tests for message serialization/deserialization."""
 
-    def test_complex_message_types_roundtrip_correctly(self, temp_project_dir):
+    def test_complex_message_types_roundtrip_correctly(
+        self, isolated_cache_dir, isolated_db_path
+    ):
         """Tool use, images, thinking content survive JSON serialization."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Create entries with complex content types
         entries = [
@@ -372,7 +377,7 @@ def test_complex_message_types_roundtrip_correctly(self, temp_project_dir):
         ]
 
         # Save entries
-        jsonl_file = temp_project_dir / "complex.jsonl"
+        jsonl_file = isolated_cache_dir / "complex.jsonl"
         jsonl_file.write_text(
             "\n".join(json.dumps(e.model_dump()) for e in entries),
             encoding="utf-8",
@@ -453,9 +458,13 @@ def test_duplicate_session_id_in_project_fails(self, cache_manager):
 class TestTimestampOrdering:
     """Tests for message timestamp ordering."""
 
-    def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry):
+    def test_messages_ordered_by_timestamp(
+        self, isolated_cache_dir, isolated_db_path, sample_user_entry
+    ):
         """Messages retrieved in timestamp order."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Create entries with out-of-order timestamps
         entries = []
@@ -484,7 +493,7 @@ def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry
             )
             entries.append(entry)
 
-        jsonl_file = temp_project_dir / "order.jsonl"
+        jsonl_file = isolated_cache_dir / "order.jsonl"
         jsonl_file.write_text(
             "\n".join(json.dumps(e.model_dump()) for e in entries),
             encoding="utf-8",
@@ -504,9 +513,13 @@ def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry
 class TestNullTokenHandling:
     """Tests for NULL token value handling."""
 
-    def test_null_tokens_handled_in_aggregates(self, temp_project_dir):
+    def test_null_tokens_handled_in_aggregates(
+        self, isolated_cache_dir, isolated_db_path
+    ):
         """NULL token values don't corrupt sums."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Create mix of entries with and without tokens
         entries = [
@@ -548,7 +561,7 @@ def test_null_tokens_handled_in_aggregates(self, temp_project_dir):
             ),
         ]
 
-        jsonl_file = temp_project_dir / "mixed.jsonl"
+        jsonl_file = isolated_cache_dir / "mixed.jsonl"
         jsonl_file.write_text(
             "\n".join(json.dumps(e.model_dump()) for e in entries),
             encoding="utf-8",
@@ -577,13 +590,19 @@ class TestMessageFileRelationship:
     """Tests for message-file relationships."""
 
     def test_cached_file_message_count_matches_actual(
-        self, temp_project_dir, sample_user_entry, sample_assistant_entry
+        self,
+        isolated_cache_dir,
+        isolated_db_path,
+        sample_user_entry,
+        sample_assistant_entry,
     ):
         """message_count column matches COUNT(*) FROM messages."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         entries = [sample_user_entry, sample_assistant_entry]
-        jsonl_file = temp_project_dir / "count.jsonl"
+        jsonl_file = isolated_cache_dir / "count.jsonl"
         jsonl_file.write_text(
             "\n".join(json.dumps(e.model_dump()) for e in entries),
             encoding="utf-8",
@@ -620,9 +639,11 @@ def test_wal_journal_mode_enabled(self, cache_manager):
 class TestConcurrentAccess:
     """Tests for concurrent database access."""
 
-    def test_concurrent_readers_dont_block(self, temp_project_dir):
+    def test_concurrent_readers_dont_block(self, isolated_cache_dir, isolated_db_path):
         """Multiple readers can access simultaneously."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Add some data
         entry = UserTranscriptEntry(
@@ -640,7 +661,7 @@ def test_concurrent_readers_dont_block(self, temp_project_dir):
             ),
         )
 
-        jsonl_file = temp_project_dir / "concurrent.jsonl"
+        jsonl_file = isolated_cache_dir / "concurrent.jsonl"
         jsonl_file.write_text(json.dumps(entry.model_dump()), encoding="utf-8")
         cache_manager.save_cached_entries(jsonl_file, [entry])
 
@@ -649,7 +670,7 @@ def test_concurrent_readers_dont_block(self, temp_project_dir):
 
         def read_data():
             try:
-                cm = CacheManager(temp_project_dir, "1.0.0")
+                cm = CacheManager(isolated_cache_dir, "1.0.0", db_path=isolated_db_path)
                 data = cm.get_cached_project_data()
                 results.append(data is not None)
             except Exception as e:
@@ -669,9 +690,13 @@ def read_data():
 class TestLargeDatasetPerformance:
     """Tests for performance with large datasets."""
 
-    def test_query_performance_with_large_dataset(self, temp_project_dir):
+    def test_query_performance_with_large_dataset(
+        self, isolated_cache_dir, isolated_db_path
+    ):
         """Queries complete in reasonable time with large datasets."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Create 1000 entries (reduced from 10k for test speed)
         entries = []
@@ -692,7 +717,7 @@ def test_query_performance_with_large_dataset(self, temp_project_dir):
             )
             entries.append(entry)
 
-        jsonl_file = temp_project_dir / "large.jsonl"
+        jsonl_file = isolated_cache_dir / "large.jsonl"
         jsonl_file.write_text(
             "\n".join(json.dumps(e.model_dump()) for e in entries),
             encoding="utf-8",
@@ -713,9 +738,13 @@ def test_query_performance_with_large_dataset(self, temp_project_dir):
 class TestSessionBoundaryDetection:
     """Tests for session boundary correctness."""
 
-    def test_sessions_contain_correct_messages(self, temp_project_dir):
+    def test_sessions_contain_correct_messages(
+        self, isolated_cache_dir, isolated_db_path
+    ):
         """Each session contains only its messages."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Create entries for multiple sessions
         entries = []
@@ -743,7 +772,7 @@ def test_sessions_contain_correct_messages(self, temp_project_dir):
                 )
                 entries.append(entry)
 
-        jsonl_file = temp_project_dir / "sessions.jsonl"
+        jsonl_file = isolated_cache_dir / "sessions.jsonl"
         jsonl_file.write_text(
             "\n".join(json.dumps(e.model_dump()) for e in entries),
             encoding="utf-8",
@@ -766,13 +795,19 @@ class TestCacheStatsAccuracy:
     """Tests for cache statistics accuracy."""
 
     def test_cache_stats_match_actual_counts(
-        self, temp_project_dir, sample_user_entry, sample_assistant_entry
+        self,
+        isolated_cache_dir,
+        isolated_db_path,
+        sample_user_entry,
+        sample_assistant_entry,
     ):
         """get_cache_stats() returns accurate data."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         entries = [sample_user_entry, sample_assistant_entry]
-        jsonl_file = temp_project_dir / "stats.jsonl"
+        jsonl_file = isolated_cache_dir / "stats.jsonl"
         jsonl_file.write_text(
             "\n".join(json.dumps(e.model_dump()) for e in entries),
             encoding="utf-8",
@@ -814,9 +849,13 @@ def test_cache_stats_match_actual_counts(
 class TestWorkingDirectoryQuery:
     """Tests for working directory queries."""
 
-    def test_get_working_directories_returns_distinct_cwds(self, temp_project_dir):
+    def test_get_working_directories_returns_distinct_cwds(
+        self, isolated_cache_dir, isolated_db_path
+    ):
         """get_working_directories() returns unique values."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         # Create sessions with duplicate cwds
         cache_manager.update_session_cache(
@@ -861,11 +900,15 @@ def test_get_working_directories_returns_distinct_cwds(self, temp_project_dir):
 class TestFileModificationDetection:
     """Tests for file modification time detection."""
 
-    def test_mtime_change_invalidates_cache(self, temp_project_dir, sample_user_entry):
+    def test_mtime_change_invalidates_cache(
+        self, isolated_cache_dir, isolated_db_path, sample_user_entry
+    ):
         """Changing file mtime marks cache as stale."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
-        jsonl_file = temp_project_dir / "mtime.jsonl"
+        jsonl_file = isolated_cache_dir / "mtime.jsonl"
         jsonl_file.write_text(
             json.dumps(sample_user_entry.model_dump()), encoding="utf-8"
         )
@@ -887,9 +930,11 @@ def test_mtime_change_invalidates_cache(self, temp_project_dir, sample_user_entr
 class TestMigrationIntegrity:
     """Tests for migration system integrity."""
 
-    def test_migration_checksum_stored(self, temp_project_dir):
+    def test_migration_checksum_stored(self, isolated_cache_dir, isolated_db_path):
         """Migration checksums are stored in _schema_version."""
-        cache_manager = CacheManager(temp_project_dir, "1.0.0")
+        cache_manager = CacheManager(
+            isolated_cache_dir, "1.0.0", db_path=isolated_db_path
+        )
 
         with cache_manager._get_connection() as conn:
             rows = conn.execute(
@@ -902,10 +947,10 @@ def test_migration_checksum_stored(self, temp_project_dir):
             assert row["filename"].endswith(".sql")
             assert len(row["checksum"]) == 64  # SHA256 hex length
 
-    def test_migration_applied_only_once(self, temp_project_dir):
+    def test_migration_applied_only_once(self, isolated_cache_dir, isolated_db_path):
         """Migrations are not re-applied on subsequent runs."""
         # First run
-        cm1 = CacheManager(temp_project_dir, "1.0.0")
+        cm1 = CacheManager(isolated_cache_dir, "1.0.0", db_path=isolated_db_path)
 
         with cm1._get_connection() as conn:
             initial_count = conn.execute(
@@ -913,7 +958,7 @@ def test_migration_applied_only_once(self, temp_project_dir):
             ).fetchone()[0]
 
         # Second run
-        cm2 = CacheManager(temp_project_dir, "1.0.0")
+        cm2 = CacheManager(isolated_cache_dir, "1.0.0", db_path=isolated_db_path)
 
         with cm2._get_connection() as conn:
             final_count = conn.execute(
diff --git a/test/test_cli.py b/test/test_cli.py
new file mode 100644
index 00000000..320614e0
--- /dev/null
+++ b/test/test_cli.py
@@ -0,0 +1,469 @@
+#!/usr/bin/env python3
+"""Tests for CLI functionality and helper functions."""
+
+import json
+from pathlib import Path
+from typing import Generator
+
+import pytest
+from click.testing import CliRunner
+
+from claude_code_log.cli import (
+    _clear_caches,
+    _clear_output_files,
+    _discover_projects,
+    get_default_projects_dir,
+    main,
+)
+from claude_code_log.cache import CacheManager
+
+
+class ProjectsSetup:
+    """Container for test projects setup."""
+
+    def __init__(self, projects_dir: Path, db_path: Path):
+        self.projects_dir = projects_dir
+        self.db_path = db_path
+
+
+@pytest.fixture
+def cli_projects_setup(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> Generator[ProjectsSetup, None, None]:
+    """Create isolated projects setup for CLI tests."""
+    projects_dir = tmp_path / "projects"
+    projects_dir.mkdir()
+
+    # Set env var to isolate cache
+    isolated_db = tmp_path / "test-cache.db"
+    monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(isolated_db))
+
+    yield ProjectsSetup(projects_dir, isolated_db)
+
+
+@pytest.fixture
+def sample_jsonl_content() -> list[dict]:
+    """Sample JSONL data for tests."""
+    return [
+        {
+            "type": "user",
+            "uuid": "user-1",
+            "timestamp": "2023-01-01T10:00:00Z",
+            "sessionId": "session-1",
+            "version": "1.0.0",
+            "parentUuid": None,
+            "isSidechain": False,
+            "userType": "user",
+            "cwd": "/test",
+            "message": {"role": "user", "content": "Hello"},
+        },
+        {
+            "type": "assistant",
+            "uuid": "assistant-1",
+            "timestamp": "2023-01-01T10:01:00Z",
+            "sessionId": "session-1",
+            "version": "1.0.0",
+            "parentUuid": None,
+            "isSidechain": False,
+            "userType": "assistant",
+            "cwd": "/test",
+            "requestId": "req-1",
+            "message": {
+                "id": "msg-1",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-3",
+                "content": [{"type": "text", "text": "Hi there!"}],
+                "usage": {"input_tokens": 10, "output_tokens": 15},
+            },
+        },
+        {"type": "summary", "summary": "A greeting", "leafUuid": "assistant-1"},
+    ]
+
+
+def create_project_with_jsonl(
+    projects_dir: Path, name: str, jsonl_data: list[dict]
+) -> Path:
+    """Helper to create a project directory with JSONL file."""
+    project_dir = projects_dir / name
+    project_dir.mkdir(exist_ok=True)
+    jsonl_file = project_dir / "session-1.jsonl"
+    with open(jsonl_file, "w") as f:
+        for entry in jsonl_data:
+            f.write(json.dumps(entry) + "\n")
+    return project_dir
+
+
+class TestGetDefaultProjectsDir:
+    """Tests for get_default_projects_dir helper."""
+
+    def test_returns_expected_path(self):
+        """Default projects dir is ~/.claude/projects."""
+        result = get_default_projects_dir()
+        assert result == Path.home() / ".claude" / "projects"
+
+
+class TestDiscoverProjects:
+    """Tests for _discover_projects helper."""
+
+    def test_discovers_active_projects(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """Finds directories with JSONL files."""
+        projects_dir = cli_projects_setup.projects_dir
+
+        # Create two active projects
+        create_project_with_jsonl(projects_dir, "project-1", sample_jsonl_content)
+        create_project_with_jsonl(projects_dir, "project-2", sample_jsonl_content)
+
+        # Create an empty directory (not a project)
+        (projects_dir / "empty-dir").mkdir()
+
+        project_dirs, archived = _discover_projects(projects_dir)
+
+        assert len(project_dirs) == 2
+        assert len(archived) == 0
+        project_names = {p.name for p in project_dirs}
+        assert project_names == {"project-1", "project-2"}
+
+    def test_discovers_archived_projects(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """Finds archived projects from cache."""
+        projects_dir = cli_projects_setup.projects_dir
+        db_path = cli_projects_setup.db_path
+
+        # Create a project and cache it
+        project_dir = create_project_with_jsonl(
+            projects_dir, "my-project", sample_jsonl_content
+        )
+        cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path)
+
+        # Save entries to cache
+        from claude_code_log.converter import load_transcript
+
+        jsonl_file = project_dir / "session-1.jsonl"
+        entries = load_transcript(jsonl_file, silent=True)
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Delete the JSONL file to simulate archival
+        jsonl_file.unlink()
+
+        project_dirs, archived = _discover_projects(projects_dir)
+
+        assert len(project_dirs) == 1
+        assert len(archived) == 1
+        assert project_dir in archived
+
+    def test_empty_directory(self, cli_projects_setup: ProjectsSetup):
+        """Empty projects directory returns empty lists."""
+        project_dirs, archived = _discover_projects(cli_projects_setup.projects_dir)
+        assert project_dirs == []
+        assert archived == set()
+
+
+class TestClearCaches:
+    """Tests for _clear_caches helper."""
+
+    def test_clear_cache_single_project(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """Clears cache for a single project."""
+        projects_dir = cli_projects_setup.projects_dir
+        db_path = cli_projects_setup.db_path
+
+        project_dir = create_project_with_jsonl(
+            projects_dir, "test-project", sample_jsonl_content
+        )
+
+        # Create cache
+        cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path)
+        from claude_code_log.converter import load_transcript
+
+        jsonl_file = project_dir / "session-1.jsonl"
+        entries = load_transcript(jsonl_file, silent=True)
+        cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Verify cache has data
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert len(cached_data.cached_files) >= 1
+
+        # Clear cache
+        _clear_caches(project_dir, all_projects=False)
+
+        # Verify cache is cleared
+        cache_manager2 = CacheManager(project_dir, "1.0.0", db_path=db_path)
+        cached_data2 = cache_manager2.get_cached_project_data()
+        assert cached_data2 is not None
+        assert len(cached_data2.cached_files) == 0
+
+    def test_clear_cache_all_projects(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """Clears cache database for all projects."""
+        projects_dir = cli_projects_setup.projects_dir
+        db_path = cli_projects_setup.db_path
+
+        # Create multiple projects
+        for i in range(3):
+            create_project_with_jsonl(
+                projects_dir, f"project-{i}", sample_jsonl_content
+            )
+
+        # Create cache entries
+        for i in range(3):
+            project_dir = projects_dir / f"project-{i}"
+            cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path)
+            from claude_code_log.converter import load_transcript
+
+            jsonl_file = project_dir / "session-1.jsonl"
+            entries = load_transcript(jsonl_file, silent=True)
+            cache_manager.save_cached_entries(jsonl_file, entries)
+
+        # Verify cache exists
+        assert db_path.exists()
+
+        # Clear all caches
+        _clear_caches(projects_dir, all_projects=True)
+
+        # Database file should be deleted
+        assert not db_path.exists()
+
+    def test_clear_cache_single_file_noop(self, tmp_path: Path):
+        """Clearing cache for single file has no effect."""
+        # Create a single JSONL file (not in a project structure)
+        jsonl_file = tmp_path / "test.jsonl"
+        jsonl_file.write_text('{"type": "user"}')
+
+        # Should complete without error
+        _clear_caches(jsonl_file, all_projects=False)
+
+
+class TestClearOutputFiles:
+    """Tests for _clear_output_files helper."""
+
+    def test_clear_html_single_project(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """Clears HTML files from single project."""
+        project_dir = create_project_with_jsonl(
+            cli_projects_setup.projects_dir, "test-project", sample_jsonl_content
+        )
+
+        # Create some HTML files
+        (project_dir / "combined_transcripts.html").write_text("<html></html>")
+        (project_dir / "session-1.html").write_text("<html></html>")
+
+        assert len(list(project_dir.glob("*.html"))) == 2
+
+        _clear_output_files(project_dir, all_projects=False, file_ext="html")
+
+        assert len(list(project_dir.glob("*.html"))) == 0
+
+    def test_clear_html_all_projects(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """Clears HTML files from all projects."""
+        projects_dir = cli_projects_setup.projects_dir
+
+        # Create projects with HTML files
+        for i in range(2):
+            project_dir = create_project_with_jsonl(
+                projects_dir, f"project-{i}", sample_jsonl_content
+            )
+            (project_dir / "combined_transcripts.html").write_text("<html></html>")
+
+        # Create index file
+        (projects_dir / "index.html").write_text("<html></html>")
+
+        _clear_output_files(projects_dir, all_projects=True, file_ext="html")
+
+        # All HTML files should be gone
+        assert not (projects_dir / "index.html").exists()
+        for i in range(2):
+            project_dir = projects_dir / f"project-{i}"
+            assert len(list(project_dir.glob("*.html"))) == 0
+
+    def test_clear_md_files(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """Clears Markdown files."""
+        project_dir = create_project_with_jsonl(
+            cli_projects_setup.projects_dir, "test-project", sample_jsonl_content
+        )
+
+        (project_dir / "combined_transcripts.md").write_text("# Test")
+        assert len(list(project_dir.glob("*.md"))) == 1
+
+        _clear_output_files(project_dir, all_projects=False, file_ext="md")
+
+        assert len(list(project_dir.glob("*.md"))) == 0
+
+    def test_clear_no_files_to_remove(self, cli_projects_setup: ProjectsSetup):
+        """No error when no files to remove."""
+        project_dir = cli_projects_setup.projects_dir / "empty-project"
+        project_dir.mkdir()
+        (project_dir / "test.jsonl").write_text('{"type": "user"}')
+
+        # Should complete without error
+        _clear_output_files(project_dir, all_projects=False, file_ext="html")
+
+
+class TestCLIMainCommand:
+    """Tests for main CLI command."""
+
+    def test_help_shows_options(self):
+        """Help shows all expected options."""
+        runner = CliRunner()
+        result = runner.invoke(main, ["--help"])
+
+        assert result.exit_code == 0
+        assert "--output" in result.output
+        assert "--all-projects" in result.output
+        assert "--clear-cache" in result.output
+        assert "--open-browser" in result.output
+
+    def test_no_arguments_uses_default_or_cwd(self, monkeypatch: pytest.MonkeyPatch):
+        """Running without arguments attempts to find projects."""
+        runner = CliRunner()
+        # Mock to avoid actual file system operations
+        result = runner.invoke(main, [])
+        # Should either succeed or fail gracefully (no crash)
+        assert result.exit_code in (0, 1)
+
+    def test_clear_cache_flag(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """--clear-cache flag clears cache."""
+        project_dir = create_project_with_jsonl(
+            cli_projects_setup.projects_dir, "test-project", sample_jsonl_content
+        )
+
+        runner = CliRunner()
+
+        # First run to create cache
+        result1 = runner.invoke(main, [str(project_dir)])
+        assert result1.exit_code == 0
+
+        # Clear cache
+        result2 = runner.invoke(main, [str(project_dir), "--clear-cache"])
+        assert result2.exit_code == 0
+        assert "clearing" in result2.output.lower() or "clear" in result2.output.lower()
+
+    def test_clear_html_flag(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """--clear-html flag clears HTML files."""
+        project_dir = create_project_with_jsonl(
+            cli_projects_setup.projects_dir, "test-project", sample_jsonl_content
+        )
+
+        runner = CliRunner()
+
+        # Generate HTML
+        result1 = runner.invoke(main, [str(project_dir)])
+        assert result1.exit_code == 0
+        assert len(list(project_dir.glob("*.html"))) > 0
+
+        # Clear HTML
+        result2 = runner.invoke(main, [str(project_dir), "--clear-html"])
+        assert result2.exit_code == 0
+        assert len(list(project_dir.glob("*.html"))) == 0
+
+    def test_format_option_md(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """--format md generates Markdown output."""
+        project_dir = create_project_with_jsonl(
+            cli_projects_setup.projects_dir, "test-project", sample_jsonl_content
+        )
+
+        runner = CliRunner()
+        result = runner.invoke(main, [str(project_dir), "--format", "md"])
+
+        assert result.exit_code == 0
+        assert len(list(project_dir.glob("*.md"))) > 0
+
+    def test_no_cache_flag(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """--no-cache flag processes without caching."""
+        project_dir = create_project_with_jsonl(
+            cli_projects_setup.projects_dir, "test-project", sample_jsonl_content
+        )
+        db_path = cli_projects_setup.db_path
+
+        runner = CliRunner()
+        result = runner.invoke(main, [str(project_dir), "--no-cache"])
+
+        assert result.exit_code == 0
+
+        # Cache should exist but be empty for this project
+        cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path)
+        cached_data = cache_manager.get_cached_project_data()
+        assert cached_data is not None
+        assert cached_data.total_message_count == 0
+
+    def test_nonexistent_path_error(self):
+        """Nonexistent path shows appropriate error."""
+        runner = CliRunner()
+        result = runner.invoke(main, ["/nonexistent/path/to/file.jsonl"])
+
+        # Should fail gracefully
+        assert result.exit_code != 0 or "error" in result.output.lower()
+
+    def test_output_option(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """--output option specifies output path."""
+        project_dir = create_project_with_jsonl(
+            cli_projects_setup.projects_dir, "test-project", sample_jsonl_content
+        )
+        output_path = cli_projects_setup.projects_dir / "custom_output.html"
+
+        runner = CliRunner()
+        result = runner.invoke(main, [str(project_dir), "--output", str(output_path)])
+
+        assert result.exit_code == 0
+        assert output_path.exists()
+
+
+class TestCLIErrorHandling:
+    """Tests for CLI error handling paths."""
+
+    def test_invalid_format_option(
+        self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict]
+    ):
+        """Invalid format option shows error."""
+        project_dir = create_project_with_jsonl(
+            cli_projects_setup.projects_dir, "test-project", sample_jsonl_content
+        )
+
+        runner = CliRunner()
+        result = runner.invoke(main, [str(project_dir), "--format", "invalid"])
+
+        assert result.exit_code != 0
+
+    def test_empty_project_directory(self, cli_projects_setup: ProjectsSetup):
+        """Empty project directory handled gracefully."""
+        project_dir = cli_projects_setup.projects_dir / "empty-project"
+        project_dir.mkdir()
+
+        runner = CliRunner()
+        result = runner.invoke(main, [str(project_dir)])
+
+        # Should complete (possibly with warning)
+        assert result.exit_code == 0
+
+    def test_malformed_jsonl_handled(self, cli_projects_setup: ProjectsSetup):
+        """Malformed JSONL handled gracefully."""
+        project_dir = cli_projects_setup.projects_dir / "bad-project"
+        project_dir.mkdir()
+        (project_dir / "test.jsonl").write_text("not valid json\n{also: bad}")
+
+        runner = CliRunner()
+        result = runner.invoke(main, [str(project_dir)])
+
+        # Should not crash
+        assert result.exit_code in (0, 1)
diff --git a/test/test_migrations.py b/test/test_migrations.py
new file mode 100644
index 00000000..97da2e94
--- /dev/null
+++ b/test/test_migrations.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python3
+"""Tests for migration runner functionality."""
+
+import sqlite3
+from pathlib import Path
+
+import pytest
+
+from claude_code_log.migrations.runner import (
+    _compute_checksum,
+    _ensure_schema_version_table,
+    _parse_migration_number,
+    apply_migration,
+    get_applied_migrations,
+    get_available_migrations,
+    get_current_version,
+    get_pending_migrations,
+    run_migrations,
+    verify_migrations,
+)
+
+
+class TestParseMigrationNumber:
+    """Tests for migration filename parsing."""
+
+    def test_parses_standard_format(self):
+        """Parses standard migration filename."""
+        assert _parse_migration_number("001_initial_schema.sql") == 1
+        assert _parse_migration_number("002_add_column.sql") == 2
+        assert _parse_migration_number("010_fix_bug.sql") == 10
+        assert _parse_migration_number("100_big_change.sql") == 100
+
+    def test_handles_double_underscores(self):
+        """Handles filenames with multiple underscores."""
+        assert _parse_migration_number("003_add_html_cache.sql") == 3
+
+    def test_invalid_format_raises_error(self):
+        """Invalid format raises ValueError."""
+        with pytest.raises(ValueError):
+            _parse_migration_number("invalid.sql")
+        with pytest.raises(ValueError):
+            _parse_migration_number("no_number.sql")
+        with pytest.raises(ValueError):
+            _parse_migration_number("abc_name.sql")
+
+
+class TestComputeChecksum:
+    """Tests for checksum computation."""
+
+    def test_consistent_checksum(self):
+        """Same content produces same checksum."""
+        content = "CREATE TABLE test (id INTEGER);"
+        checksum1 = _compute_checksum(content)
+        checksum2 = _compute_checksum(content)
+        assert checksum1 == checksum2
+
+    def test_different_content_different_checksum(self):
+        """Different content produces different checksum."""
+        checksum1 = _compute_checksum("CREATE TABLE test1;")
+        checksum2 = _compute_checksum("CREATE TABLE test2;")
+        assert checksum1 != checksum2
+
+    def test_checksum_is_sha256_hex(self):
+        """Checksum is 64-character hex string (SHA256)."""
+        checksum = _compute_checksum("test")
+        assert len(checksum) == 64
+        assert all(c in "0123456789abcdef" for c in checksum)
+
+
+class TestEnsureSchemaVersionTable:
+    """Tests for schema version table creation."""
+
+    def test_creates_table_if_not_exists(self, tmp_path: Path):
+        """Creates _schema_version table on fresh database."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+
+        _ensure_schema_version_table(conn)
+
+        # Verify table exists with correct columns
+        columns = conn.execute("PRAGMA table_info(_schema_version)").fetchall()
+        column_names = {col[1] for col in columns}
+        assert "version" in column_names
+        assert "filename" in column_names
+        assert "applied_at" in column_names
+        assert "checksum" in column_names
+
+        conn.close()
+
+    def test_upgrades_old_format_table(self, tmp_path: Path):
+        """Upgrades old format table (without checksum) to new format."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+
+        # Create old format table (without checksum)
+        conn.execute("""
+            CREATE TABLE _schema_version (
+                version INTEGER PRIMARY KEY,
+                filename TEXT NOT NULL,
+                applied_at TEXT NOT NULL
+            )
+        """)
+        conn.commit()
+
+        # Call ensure - should upgrade
+        _ensure_schema_version_table(conn)
+
+        # Verify new schema
+        columns = conn.execute("PRAGMA table_info(_schema_version)").fetchall()
+        column_names = {col[1] for col in columns}
+        assert "checksum" in column_names
+
+        conn.close()
+
+
+class TestGetAppliedMigrations:
+    """Tests for getting applied migrations."""
+
+    def test_empty_database_returns_empty_list(self, tmp_path: Path):
+        """Fresh database returns empty list."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+
+        applied = get_applied_migrations(conn)
+        assert applied == []
+
+        conn.close()
+
+    def test_returns_applied_migrations(self, tmp_path: Path):
+        """Returns list of applied migrations."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+        _ensure_schema_version_table(conn)
+
+        # Insert some migration records
+        conn.execute(
+            "INSERT INTO _schema_version VALUES (1, '001_test.sql', '2024-01-01', 'abc')"
+        )
+        conn.execute(
+            "INSERT INTO _schema_version VALUES (2, '002_test.sql', '2024-01-02', 'def')"
+        )
+        conn.commit()
+
+        applied = get_applied_migrations(conn)
+        assert len(applied) == 2
+        assert applied[0] == (1, "abc")
+        assert applied[1] == (2, "def")
+
+        conn.close()
+
+
+class TestGetAvailableMigrations:
+    """Tests for getting available migrations."""
+
+    def test_returns_sql_files_in_order(self):
+        """Returns migration files sorted by version."""
+        migrations = get_available_migrations()
+
+        # Should have at least the initial migrations
+        assert len(migrations) >= 1
+
+        # Should be sorted by version
+        versions = [v for v, _ in migrations]
+        assert versions == sorted(versions)
+
+        # All should be .sql files
+        for _, path in migrations:
+            assert path.suffix == ".sql"
+
+
+class TestGetPendingMigrations:
+    """Tests for getting pending migrations."""
+
+    def test_all_pending_on_fresh_database(self, tmp_path: Path):
+        """All migrations pending on fresh database."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+        _ensure_schema_version_table(conn)
+
+        pending = get_pending_migrations(conn)
+        available = get_available_migrations()
+
+        assert len(pending) == len(available)
+
+        conn.close()
+
+    def test_none_pending_after_all_applied(self, tmp_path: Path):
+        """No migrations pending after all applied."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+
+        # Run all migrations
+        run_migrations(db_path)
+
+        # Reconnect and check
+        conn = sqlite3.connect(db_path)
+        pending = get_pending_migrations(conn)
+        assert len(pending) == 0
+
+        conn.close()
+
+
+class TestApplyMigration:
+    """Tests for applying individual migrations."""
+
+    def test_applies_migration_and_records(self, tmp_path: Path):
+        """Applies migration and records in schema version."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+        _ensure_schema_version_table(conn)
+
+        # Create a test migration file
+        migration_file = tmp_path / "001_test.sql"
+        migration_file.write_text("CREATE TABLE test_table (id INTEGER);")
+
+        apply_migration(conn, 1, migration_file)
+
+        # Verify table was created
+        tables = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='test_table'"
+        ).fetchall()
+        assert len(tables) == 1
+
+        # Verify migration was recorded
+        applied = get_applied_migrations(conn)
+        assert len(applied) == 1
+        assert applied[0][0] == 1
+
+        conn.close()
+
+
+class TestVerifyMigrations:
+    """Tests for migration verification."""
+
+    def test_no_warnings_for_unmodified_migrations(self, tmp_path: Path):
+        """No warnings when migrations haven't been modified."""
+        db_path = tmp_path / "test.db"
+
+        # Run migrations
+        run_migrations(db_path)
+
+        conn = sqlite3.connect(db_path)
+        warnings = verify_migrations(conn)
+
+        # Should have no warnings for unmodified migrations
+        assert warnings == []
+
+        conn.close()
+
+    def test_warning_for_modified_migration(self, tmp_path: Path):
+        """Warning when migration file has been modified."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+        _ensure_schema_version_table(conn)
+
+        # Insert a fake migration record with wrong checksum
+        conn.execute(
+            "INSERT INTO _schema_version VALUES (1, '001_initial_schema.sql', '2024-01-01', 'wrong_checksum')"
+        )
+        conn.commit()
+
+        warnings = verify_migrations(conn)
+
+        # Should warn about modified migration
+        assert len(warnings) == 1
+        assert "modified" in warnings[0].lower()
+
+        conn.close()
+
+
+class TestRunMigrations:
+    """Tests for running all migrations."""
+
+    def test_runs_all_pending_migrations(self, tmp_path: Path):
+        """Runs all pending migrations on fresh database."""
+        db_path = tmp_path / "test.db"
+
+        count = run_migrations(db_path)
+
+        # Should have run at least the initial migrations
+        assert count >= 1
+
+        # Verify schema was created
+        conn = sqlite3.connect(db_path)
+        tables = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table'"
+        ).fetchall()
+        table_names = {t[0] for t in tables}
+
+        # Should have core tables from initial migration
+        assert "projects" in table_names
+        assert "_schema_version" in table_names
+
+        conn.close()
+
+    def test_idempotent_multiple_runs(self, tmp_path: Path):
+        """Running multiple times is safe."""
+        db_path = tmp_path / "test.db"
+
+        count1 = run_migrations(db_path)
+        count2 = run_migrations(db_path)
+
+        # First run applies migrations
+        assert count1 >= 1
+        # Second run applies nothing (already applied)
+        assert count2 == 0
+
+    def test_creates_database_if_not_exists(self, tmp_path: Path):
+        """Creates database file if it doesn't exist."""
+        db_path = tmp_path / "new_db.db"
+        assert not db_path.exists()
+
+        run_migrations(db_path)
+
+        assert db_path.exists()
+
+
+class TestGetCurrentVersion:
+    """Tests for getting current schema version."""
+
+    def test_returns_zero_for_fresh_database(self, tmp_path: Path):
+        """Returns 0 for database with no migrations."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+
+        version = get_current_version(conn)
+        assert version == 0
+
+        conn.close()
+
+    def test_returns_highest_version(self, tmp_path: Path):
+        """Returns highest applied migration version."""
+        db_path = tmp_path / "test.db"
+        conn = sqlite3.connect(db_path)
+        _ensure_schema_version_table(conn)
+
+        # Insert migrations out of order
+        conn.execute(
+            "INSERT INTO _schema_version VALUES (3, '003_test.sql', '2024-01-03', 'c')"
+        )
+        conn.execute(
+            "INSERT INTO _schema_version VALUES (1, '001_test.sql', '2024-01-01', 'a')"
+        )
+        conn.execute(
+            "INSERT INTO _schema_version VALUES (2, '002_test.sql', '2024-01-02', 'b')"
+        )
+        conn.commit()
+
+        version = get_current_version(conn)
+        assert version == 3
+
+        conn.close()
+
+    def test_returns_version_after_real_migrations(self, tmp_path: Path):
+        """Returns correct version after running real migrations."""
+        db_path = tmp_path / "test.db"
+        run_migrations(db_path)
+
+        conn = sqlite3.connect(db_path)
+        version = get_current_version(conn)
+
+        # Should match number of available migrations
+        available = get_available_migrations()
+        expected_version = max(v for v, _ in available)
+        assert version == expected_version
+
+        conn.close()
diff --git a/test/test_renderer_timings.py b/test/test_renderer_timings.py
new file mode 100644
index 00000000..c5e91ec8
--- /dev/null
+++ b/test/test_renderer_timings.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python3
+"""Tests for renderer timing utilities."""
+
+import time
+
+import pytest
+
+
+class TestDebugTimingFlag:
+    """Tests for DEBUG_TIMING environment variable."""
+
+    def test_debug_timing_disabled_by_default(self):
+        """DEBUG_TIMING is False by default."""
+        # Import with fresh module state
+
+        # Note: We can't easily test the default since the module is already loaded
+        # This test just documents the expected default behavior
+        # The actual value depends on environment at import time
+
+    def test_debug_timing_enabled_with_1(self, monkeypatch: pytest.MonkeyPatch):
+        """DEBUG_TIMING enabled with '1'."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        # Reimport to pick up env var
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+        assert rt.DEBUG_TIMING is True
+
+    def test_debug_timing_enabled_with_true(self, monkeypatch: pytest.MonkeyPatch):
+        """DEBUG_TIMING enabled with 'true'."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "true")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+        assert rt.DEBUG_TIMING is True
+
+    def test_debug_timing_enabled_with_yes(self, monkeypatch: pytest.MonkeyPatch):
+        """DEBUG_TIMING enabled with 'yes'."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "yes")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+        assert rt.DEBUG_TIMING is True
+
+    def test_debug_timing_case_insensitive(self, monkeypatch: pytest.MonkeyPatch):
+        """DEBUG_TIMING handles uppercase values."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "TRUE")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+        assert rt.DEBUG_TIMING is True
+
+
+class TestSetTimingVar:
+    """Tests for set_timing_var function."""
+
+    def test_sets_variable_when_enabled(self, monkeypatch: pytest.MonkeyPatch):
+        """Sets timing variable when DEBUG_TIMING enabled."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+        rt._timing_data.clear()
+
+        rt.set_timing_var("test_var", "test_value")
+        assert rt._timing_data.get("test_var") == "test_value"
+
+    def test_ignores_when_disabled(self, monkeypatch: pytest.MonkeyPatch):
+        """Ignores set when DEBUG_TIMING disabled."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+        rt._timing_data.clear()
+
+        rt.set_timing_var("test_var", "test_value")
+        assert "test_var" not in rt._timing_data
+
+
+class TestLogTiming:
+    """Tests for log_timing context manager."""
+
+    def test_logs_phase_timing_when_enabled(
+        self, monkeypatch: pytest.MonkeyPatch, capsys
+    ):
+        """Logs phase timing when DEBUG_TIMING enabled."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+
+        with rt.log_timing("Test Phase"):
+            time.sleep(0.01)  # Brief sleep to measure
+
+        captured = capsys.readouterr()
+        assert "[TIMING]" in captured.out
+        assert "Test Phase" in captured.out
+
+    def test_no_output_when_disabled(self, monkeypatch: pytest.MonkeyPatch, capsys):
+        """No output when DEBUG_TIMING disabled."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+
+        with rt.log_timing("Test Phase"):
+            pass
+
+        captured = capsys.readouterr()
+        assert "[TIMING]" not in captured.out
+
+    def test_callable_phase_name(self, monkeypatch: pytest.MonkeyPatch, capsys):
+        """Supports callable for dynamic phase names."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+
+        items = [1, 2, 3]
+        with rt.log_timing(lambda: f"Processing ({len(items)} items)"):
+            pass
+
+        captured = capsys.readouterr()
+        assert "Processing (3 items)" in captured.out
+
+    def test_shows_total_time_when_t_start_provided(
+        self, monkeypatch: pytest.MonkeyPatch, capsys
+    ):
+        """Shows total elapsed time when t_start provided."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+
+        t_start = time.time()
+        time.sleep(0.01)
+
+        with rt.log_timing("Test Phase", t_start=t_start):
+            pass
+
+        captured = capsys.readouterr()
+        assert "total:" in captured.out
+
+
+class TestTimingStat:
+    """Tests for timing_stat context manager."""
+
+    def test_tracks_operation_timing_when_enabled(
+        self, monkeypatch: pytest.MonkeyPatch
+    ):
+        """Tracks operation timing when DEBUG_TIMING enabled."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+        rt._timing_data.clear()
+        rt._timing_data["_test_timings"] = []
+        rt._timing_data["_current_msg_id"] = "msg-123"
+
+        with rt.timing_stat("_test_timings"):
+            time.sleep(0.01)
+
+        assert len(rt._timing_data["_test_timings"]) == 1
+        duration, msg_id = rt._timing_data["_test_timings"][0]
+        assert duration >= 0.01
+        assert msg_id == "msg-123"
+
+    def test_no_tracking_when_disabled(self, monkeypatch: pytest.MonkeyPatch):
+        """No tracking when DEBUG_TIMING disabled."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+        rt._timing_data.clear()
+        rt._timing_data["_test_timings"] = []
+
+        with rt.timing_stat("_test_timings"):
+            pass
+
+        assert len(rt._timing_data["_test_timings"]) == 0
+
+
+class TestReportTimingStatistics:
+    """Tests for report_timing_statistics function."""
+
+    def test_reports_statistics(self, monkeypatch: pytest.MonkeyPatch, capsys):
+        """Reports timing statistics."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+
+        timings = [
+            (0.1, "msg-1"),
+            (0.2, "msg-2"),
+            (0.05, "msg-3"),
+        ]
+
+        rt.report_timing_statistics([("Test Operation", timings)])
+
+        captured = capsys.readouterr()
+        assert "Test Operation" in captured.out
+        assert "Total operations: 3" in captured.out
+        assert "Total time:" in captured.out
+        assert "Slowest 10 operations" in captured.out
+
+    def test_empty_timings_no_output(self, monkeypatch: pytest.MonkeyPatch, capsys):
+        """No output for empty timings."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+
+        rt.report_timing_statistics([("Test Operation", [])])
+
+        captured = capsys.readouterr()
+        # Empty timings produce no output (the if timings: check)
+        assert "Test Operation" not in captured.out
+
+    def test_sorts_by_duration_descending(
+        self, monkeypatch: pytest.MonkeyPatch, capsys
+    ):
+        """Slowest operations listed first."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+
+        # Create timings in ascending order
+        timings = [
+            (0.001, "msg-fast"),
+            (0.1, "msg-slow"),
+            (0.01, "msg-medium"),
+        ]
+
+        rt.report_timing_statistics([("Test", timings)])
+
+        captured = capsys.readouterr()
+        # msg-slow should appear before msg-medium and msg-fast
+        slow_pos = captured.out.find("msg-slow")
+        medium_pos = captured.out.find("msg-medium")
+        fast_pos = captured.out.find("msg-fast")
+        assert slow_pos < medium_pos < fast_pos
+
+    def test_limits_to_10_slowest(self, monkeypatch: pytest.MonkeyPatch, capsys):
+        """Only shows 10 slowest operations."""
+        monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1")
+
+        import importlib
+        import claude_code_log.renderer_timings as rt
+
+        importlib.reload(rt)
+
+        # Create 15 timings
+        timings = [(i * 0.001, f"msg-{i}") for i in range(15)]
+
+        rt.report_timing_statistics([("Test", timings)])
+
+        captured = capsys.readouterr()
+        # Should only show 10
+        assert captured.out.count("msg-") == 10

From 4695f792738ff38a8a8f6bd72244f75abcf53323 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Mon, 12 Jan 2026 14:06:04 +0000
Subject: [PATCH 10/23] CR PR feedback

---
 claude_code_log/cache.py |  53 ++++++++++----
 claude_code_log/tui.py   | 135 +++++++++++++++++++++++++---------
 test/test_pagination.py  | 152 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 296 insertions(+), 44 deletions(-)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 220cc90c..3a6ffc9a 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -492,11 +492,17 @@ def save_cached_entries(
 
         with self._get_connection() as conn:
             # Insert or update file record
+            # Use ON CONFLICT to preserve file ID and avoid cascade deletes on messages
             conn.execute(
                 """
-                INSERT OR REPLACE INTO cached_files
+                INSERT INTO cached_files
                 (project_id, file_name, file_path, source_mtime, cached_mtime, message_count)
                 VALUES (?, ?, ?, ?, ?, ?)
+                ON CONFLICT(project_id, file_name) DO UPDATE SET
+                    file_path = excluded.file_path,
+                    source_mtime = excluded.source_mtime,
+                    cached_mtime = excluded.cached_mtime,
+                    message_count = excluded.message_count
                 """,
                 (
                     self._project_id,
@@ -552,12 +558,23 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non
             for session_id, data in session_data.items():
                 conn.execute(
                     """
-                    INSERT OR REPLACE INTO sessions (
+                    INSERT INTO sessions (
                         project_id, session_id, summary, first_timestamp, last_timestamp,
                         message_count, first_user_message, cwd,
                         total_input_tokens, total_output_tokens,
                         total_cache_creation_tokens, total_cache_read_tokens
                     ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                    ON CONFLICT(project_id, session_id) DO UPDATE SET
+                        summary = excluded.summary,
+                        first_timestamp = excluded.first_timestamp,
+                        last_timestamp = excluded.last_timestamp,
+                        message_count = excluded.message_count,
+                        first_user_message = excluded.first_user_message,
+                        cwd = excluded.cwd,
+                        total_input_tokens = excluded.total_input_tokens,
+                        total_output_tokens = excluded.total_output_tokens,
+                        total_cache_creation_tokens = excluded.total_cache_creation_tokens,
+                        total_cache_read_tokens = excluded.total_cache_read_tokens
                     """,
                     (
                         self._project_id,
@@ -1301,18 +1318,30 @@ def is_page_stale(
 
         # Check if any session on this page has changed
         with self._get_connection() as conn:
-            for session_id in page_data.session_ids:
-                row = conn.execute(
-                    """SELECT message_count FROM sessions
-                       WHERE project_id = ? AND session_id = ?""",
-                    (self._project_id, session_id),
-                ).fetchone()
+            # Build placeholders for IN clause
+            placeholders = ",".join("?" for _ in page_data.session_ids)
+            params = [self._project_id, *page_data.session_ids]
 
-                if not row:
-                    return True, "session_missing"
+            row = conn.execute(
+                f"""SELECT COUNT(*) as session_count,
+                           COALESCE(SUM(message_count), 0) as total_messages,
+                           MAX(last_timestamp) as max_timestamp
+                    FROM sessions
+                    WHERE project_id = ? AND session_id IN ({placeholders})""",
+                params,
+            ).fetchone()
+
+            # Check if any sessions are missing
+            if row["session_count"] != len(page_data.session_ids):
+                return True, "session_missing"
+
+            # Check if message count changed
+            if row["total_messages"] != page_data.message_count:
+                return True, "message_count_changed"
 
-                # We need to check if session content changed
-                # For now, just check if session exists
+            # Check if last timestamp changed (session content updated)
+            if row["max_timestamp"] != page_data.last_timestamp:
+                return True, "timestamp_changed"
 
         return False, "up_to_date"
 
diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index e1495e17..ca8580e7 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -273,22 +273,46 @@ def _handle_archive_project_confirm(self, confirmed: bool | None) -> None:
             return
 
         project_path = self.selected_project_path
-        archived_count = 0
+
+        # Collect all JSONL files first
+        jsonl_files = list(project_path.glob("*.jsonl"))
+        if not jsonl_files:
+            self.notify("No sessions to archive", severity="warning")
+            return
+
+        # Track successes and failures
+        succeeded: list[str] = []
+        failed: list[tuple[str, str]] = []  # (filename, error message)
 
         # Delete all JSONL files in the project
-        for jsonl_file in project_path.glob("*.jsonl"):
+        for jsonl_file in jsonl_files:
             try:
                 jsonl_file.unlink()
-                archived_count += 1
+                succeeded.append(jsonl_file.name)
             except Exception as e:
-                self.notify(
-                    f"Failed to delete {jsonl_file.name}: {e}", severity="error"
-                )
+                failed.append((jsonl_file.name, str(e)))
+
+        # Report results clearly
+        total = len(jsonl_files)
+        if failed:
+            # Show detailed failure information
+            failed_names = ", ".join(f[0] for f in failed[:3])
+            if len(failed) > 3:
+                failed_names += f" and {len(failed) - 3} more"
+            self.notify(
+                f"Archive incomplete: {len(succeeded)}/{total} sessions deleted. "
+                f"Failed: {failed_names}",
+                severity="error",
+            )
+        else:
+            self.notify(f"Archived {len(succeeded)} sessions")
 
-        if archived_count > 0:
-            self.notify(f"Archived {archived_count} sessions")
-            # Add to archived projects set
+        # Only mark as fully archived if ALL files were deleted
+        if not failed and succeeded:
             self.archived_projects.add(project_path)
+
+        # Always refresh to show current state
+        if succeeded:
             self.populate_table()
 
     def action_delete_project(self) -> None:
@@ -318,13 +342,29 @@ def _handle_delete_project_confirm(self, result: Optional[str]) -> None:
         cache_manager.clear_cache()
 
         # If deleting both, also delete JSONL files
+        file_delete_failed = False
         if result == "both":
-            for jsonl_file in project_path.glob("*.jsonl"):
-                try:
-                    jsonl_file.unlink()
-                except Exception as e:
+            jsonl_files = list(project_path.glob("*.jsonl"))
+            if jsonl_files:
+                succeeded: list[str] = []
+                failed: list[tuple[str, str]] = []
+
+                for jsonl_file in jsonl_files:
+                    try:
+                        jsonl_file.unlink()
+                        succeeded.append(jsonl_file.name)
+                    except Exception as e:
+                        failed.append((jsonl_file.name, str(e)))
+
+                if failed:
+                    file_delete_failed = True
+                    failed_names = ", ".join(f[0] for f in failed[:3])
+                    if len(failed) > 3:
+                        failed_names += f" and {len(failed) - 3} more"
                     self.notify(
-                        f"Failed to delete {jsonl_file.name}: {e}", severity="error"
+                        f"Cache deleted but {len(failed)}/{len(jsonl_files)} "
+                        f"session files failed to delete: {failed_names}",
+                        severity="error",
                     )
 
         # Remove from projects list
@@ -335,7 +375,8 @@ def _handle_delete_project_confirm(self, result: Optional[str]) -> None:
         if project_path in self.archived_projects:
             self.archived_projects.discard(project_path)
 
-        self.notify(f"Deleted project: {project_path.name}")
+        if not file_delete_failed:
+            self.notify(f"Deleted project: {project_path.name}")
         self.selected_project_path = None
         self.populate_table()
 
@@ -371,26 +412,56 @@ def _handle_restore_project_confirm(self, confirmed: bool | None) -> None:
         # Ensure project directory exists
         project_path.mkdir(parents=True, exist_ok=True)
 
-        restored_count = 0
-        for session_id in project_cache.sessions:
+        # Identify sessions that need restoration (don't already exist as files)
+        sessions_to_restore = [
+            session_id
+            for session_id in project_cache.sessions
+            if not (project_path / f"{session_id}.jsonl").exists()
+        ]
+
+        if not sessions_to_restore:
+            self.notify("All sessions already exist as files", severity="warning")
+            return
+
+        # Track successes and failures
+        succeeded: list[str] = []
+        failed: list[tuple[str, str]] = []  # (session_id, error message)
+
+        for session_id in sessions_to_restore:
             jsonl_path = project_path / f"{session_id}.jsonl"
-            if not jsonl_path.exists():
-                try:
-                    messages = cache_manager.export_session_to_jsonl(session_id)
-                    if messages:
-                        with open(jsonl_path, "w", encoding="utf-8") as f:
-                            for msg in messages:
-                                f.write(msg + "\n")
-                        restored_count += 1
-                except Exception as e:
-                    self.notify(
-                        f"Failed to restore {session_id}: {e}", severity="error"
-                    )
+            try:
+                messages = cache_manager.export_session_to_jsonl(session_id)
+                if messages:
+                    with open(jsonl_path, "w", encoding="utf-8") as f:
+                        for msg in messages:
+                            f.write(msg + "\n")
+                    succeeded.append(session_id)
+                else:
+                    failed.append((session_id, "No messages found in cache"))
+            except Exception as e:
+                failed.append((session_id, str(e)))
+
+        # Report results clearly
+        total = len(sessions_to_restore)
+        if failed:
+            # Show detailed failure information
+            failed_ids = ", ".join(f[0][:8] for f in failed[:3])  # Truncate UUIDs
+            if len(failed) > 3:
+                failed_ids += f" and {len(failed) - 3} more"
+            self.notify(
+                f"Restore incomplete: {len(succeeded)}/{total} sessions restored. "
+                f"Failed: {failed_ids}",
+                severity="error",
+            )
+        else:
+            self.notify(f"Restored {len(succeeded)} sessions")
 
-        if restored_count > 0:
-            self.notify(f"Restored {restored_count} sessions")
-            # Remove from archived projects set
+        # Only mark as fully restored if ALL sessions were restored
+        if not failed and succeeded:
             self.archived_projects.discard(project_path)
+
+        # Always refresh to show current state
+        if succeeded:
             self.populate_table()
 
 
diff --git a/test/test_pagination.py b/test/test_pagination.py
index 63285f60..417c5785 100644
--- a/test/test_pagination.py
+++ b/test/test_pagination.py
@@ -273,6 +273,158 @@ def test_is_page_stale_page_size_changed(self, cache_manager):
         assert is_stale is True
         assert "page_size" in reason.lower() or "size" in reason.lower()
 
+    def test_is_page_stale_session_missing(self, cache_manager, temp_project_dir):
+        """is_page_stale should return True when a session is missing from sessions table."""
+        # Create page cache entry referencing session "s1"
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+        # Create the HTML file so it passes the file existence check
+        (temp_project_dir / "combined_transcripts.html").write_text("<html></html>")
+
+        # Don't add session "s1" to sessions table - it should be detected as missing
+        # Mock is_html_outdated to skip HTML version check (tested separately)
+        with patch("claude_code_log.renderer.is_html_outdated", return_value=False):
+            is_stale, reason = cache_manager.is_page_stale(1, 5000)
+        assert is_stale is True
+        assert "session_missing" in reason
+
+    def test_is_page_stale_message_count_changed(self, cache_manager, temp_project_dir):
+        """is_page_stale should return True when session message count has changed."""
+        # Create page cache entry with message_count=1000
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,  # Page expects 1000 messages
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+        # Create the HTML file
+        (temp_project_dir / "combined_transcripts.html").write_text("<html></html>")
+
+        # Add session with different message count
+        cache_manager.update_session_cache(
+            {
+                "s1": SessionCacheData(
+                    session_id="s1",
+                    message_count=1500,  # Different from page's 1000
+                    first_timestamp="2023-01-01T10:00:00Z",
+                    last_timestamp="2023-01-01T11:00:00Z",
+                    first_user_message="Test",
+                    total_input_tokens=100,
+                    total_output_tokens=50,
+                    total_cache_creation_tokens=0,
+                    total_cache_read_tokens=0,
+                )
+            }
+        )
+
+        # Mock is_html_outdated to skip HTML version check (tested separately)
+        with patch("claude_code_log.renderer.is_html_outdated", return_value=False):
+            is_stale, reason = cache_manager.is_page_stale(1, 5000)
+        assert is_stale is True
+        assert "message_count" in reason
+
+    def test_is_page_stale_timestamp_changed(self, cache_manager, temp_project_dir):
+        """is_page_stale should return True when session last_timestamp has changed."""
+        # Create page cache entry
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",  # Page expects this timestamp
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+        # Create the HTML file
+        (temp_project_dir / "combined_transcripts.html").write_text("<html></html>")
+
+        # Add session with same message_count but different last_timestamp
+        cache_manager.update_session_cache(
+            {
+                "s1": SessionCacheData(
+                    session_id="s1",
+                    message_count=1000,  # Same as page
+                    first_timestamp="2023-01-01T10:00:00Z",
+                    last_timestamp="2023-01-01T12:00:00Z",  # Different timestamp
+                    first_user_message="Test",
+                    total_input_tokens=100,
+                    total_output_tokens=50,
+                    total_cache_creation_tokens=0,
+                    total_cache_read_tokens=0,
+                )
+            }
+        )
+
+        # Mock is_html_outdated to skip HTML version check (tested separately)
+        with patch("claude_code_log.renderer.is_html_outdated", return_value=False):
+            is_stale, reason = cache_manager.is_page_stale(1, 5000)
+        assert is_stale is True
+        assert "timestamp" in reason
+
+    def test_is_page_stale_up_to_date(self, cache_manager, temp_project_dir):
+        """is_page_stale should return False when page matches session data."""
+        # Create page cache entry
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=5000,
+            session_ids=["s1"],
+            message_count=1000,
+            first_timestamp="2023-01-01T10:00:00Z",
+            last_timestamp="2023-01-01T11:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+        # Create the HTML file
+        (temp_project_dir / "combined_transcripts.html").write_text("<html></html>")
+
+        # Add session with matching data
+        cache_manager.update_session_cache(
+            {
+                "s1": SessionCacheData(
+                    session_id="s1",
+                    message_count=1000,  # Same as page
+                    first_timestamp="2023-01-01T10:00:00Z",
+                    last_timestamp="2023-01-01T11:00:00Z",  # Same as page
+                    first_user_message="Test",
+                    total_input_tokens=100,
+                    total_output_tokens=50,
+                    total_cache_creation_tokens=0,
+                    total_cache_read_tokens=0,
+                )
+            }
+        )
+
+        # Mock is_html_outdated to skip HTML version check (tested separately)
+        with patch("claude_code_log.renderer.is_html_outdated", return_value=False):
+            is_stale, reason = cache_manager.is_page_stale(1, 5000)
+        assert is_stale is False
+        assert "up_to_date" in reason
+
     def test_invalidate_all_pages(self, cache_manager):
         """invalidate_all_pages should remove all page cache entries."""
         cache_manager.update_page_cache(

From dd4c5969dadb9e4186a3c372b94f9d6f0ea3e86f Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Mon, 12 Jan 2026 15:32:26 +0000
Subject: [PATCH 11/23] CR PR feedback

---
 claude_code_log/cache.py       |  7 ++++
 test/test_cache_integration.py | 65 ++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 3a6ffc9a..3c53d770 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -1419,6 +1419,13 @@ def delete_session(self, session_id: str) -> bool:
                 (self._project_id, session_id),
             )
 
+            # Delete page_sessions entries referencing this session
+            conn.execute(
+                """DELETE FROM page_sessions WHERE session_id = ?
+                   AND page_id IN (SELECT id FROM html_pages WHERE project_id = ?)""",
+                (session_id, self._project_id),
+            )
+
             # Delete cached_files entry for this session's JSONL file
             # File name pattern is {session_id}.jsonl
             conn.execute(
diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py
index ddccd39e..385ccadc 100644
--- a/test/test_cache_integration.py
+++ b/test/test_cache_integration.py
@@ -890,6 +890,71 @@ def test_delete_session_invalidates_file_cache(
             "because the file is no longer considered cached"
         )
 
+    def test_delete_session_removes_page_sessions(
+        self, temp_projects_dir, sample_jsonl_data
+    ):
+        """Test that delete_session removes page_sessions entries.
+
+        When a session is part of a paginated combined transcript, deleting
+        the session should also remove its entry from the page_sessions table.
+        """
+        project_dir = temp_projects_dir / "delete-page-sessions-test"
+        project_dir.mkdir()
+
+        session_id = "session-1"
+        jsonl_file = project_dir / f"{session_id}.jsonl"
+        with open(jsonl_file, "w") as f:
+            for entry in sample_jsonl_data:
+                f.write(json.dumps(entry) + "\n")
+
+        # Process to populate cache
+        convert_jsonl_to_html(input_path=project_dir, use_cache=True)
+
+        cache_manager = CacheManager(project_dir, "1.0.0")
+
+        # Add page cache entry with this session
+        cache_manager.update_page_cache(
+            page_number=1,
+            html_path="combined_transcripts.html",
+            page_size_config=50,
+            session_ids=[session_id],
+            message_count=5,
+            first_timestamp="2024-01-01T00:00:00Z",
+            last_timestamp="2024-01-01T01:00:00Z",
+            total_input_tokens=100,
+            total_output_tokens=200,
+            total_cache_creation_tokens=0,
+            total_cache_read_tokens=0,
+        )
+
+        # Verify page has the session
+        page_data = cache_manager.get_page_data(1)
+        assert page_data is not None
+        assert session_id in page_data.session_ids
+
+        # Delete the session
+        result = cache_manager.delete_session(session_id)
+        assert result is True
+
+        # Verify page_sessions entry is removed
+        # The page itself still exists, but the session mapping should be gone
+        import sqlite3
+
+        conn = sqlite3.connect(cache_manager.db_path)
+        conn.row_factory = sqlite3.Row
+        try:
+            row = conn.execute(
+                """SELECT COUNT(*) as cnt FROM page_sessions ps
+                   JOIN html_pages hp ON ps.page_id = hp.id
+                   WHERE hp.project_id = ? AND ps.session_id = ?""",
+                (cache_manager._project_id, session_id),
+            ).fetchone()
+            assert row["cnt"] == 0, (
+                "page_sessions entry should be removed after delete_session()"
+            )
+        finally:
+            conn.close()
+
     def test_delete_nonexistent_session(self, temp_projects_dir):
         """Test deleting a session that doesn't exist returns False."""
         project_dir = temp_projects_dir / "delete-nonexistent"

From f8a074a5fefc2be51f31c86affe358cd6b2d3c56 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Mon, 12 Jan 2026 19:49:54 +0000
Subject: [PATCH 12/23] CR PR feedback - take 3

---
 claude_code_log/cache.py       | 55 ++++++++++++++++------------------
 test/test_cache_integration.py | 17 ++++++-----
 2 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 3c53d770..ebc4363a 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -2,6 +2,7 @@
 """SQLite-based cache management for Claude Code Log."""
 
 import json
+import logging
 import os
 import sqlite3
 import zlib
@@ -24,6 +25,8 @@
     UserTranscriptEntry,
 )
 
+logger = logging.getLogger(__name__)
+
 
 # ========== Data Models ==========
 
@@ -442,14 +445,7 @@ def load_cached_entries_filtered(
         if to_date:
             to_dt = dateparser.parse(to_date)
             if to_dt:
-                if to_date in ["today", "yesterday"] or "days ago" in to_date:
-                    to_dt = to_dt.replace(
-                        hour=23, minute=59, second=59, microsecond=999999
-                    )
-                else:
-                    to_dt = to_dt.replace(
-                        hour=23, minute=59, second=59, microsecond=999999
-                    )
+                to_dt = to_dt.replace(hour=23, minute=59, second=59, microsecond=999999)
 
         # Build query with SQL-based filtering
         sql = "SELECT content FROM messages WHERE file_id = ?"
@@ -525,26 +521,27 @@ def save_cached_entries(
             conn.execute("DELETE FROM messages WHERE file_id = ?", (file_id,))
 
             # Insert all entries in a batch
-            for entry in entries:
-                serialized = self._serialize_entry(entry, file_id)
-                conn.execute(
-                    """
-                    INSERT INTO messages (
-                        project_id, file_id, type, timestamp, session_id,
-                        _uuid, _parent_uuid, _is_sidechain, _user_type, _cwd, _version,
-                        _is_meta, _agent_id, _request_id,
-                        input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
-                        _leaf_uuid, _level, _operation, content
-                    ) VALUES (
-                        :project_id, :file_id, :type, :timestamp, :session_id,
-                        :_uuid, :_parent_uuid, :_is_sidechain, :_user_type, :_cwd, :_version,
-                        :_is_meta, :_agent_id, :_request_id,
-                        :input_tokens, :output_tokens, :cache_creation_tokens, :cache_read_tokens,
-                        :_leaf_uuid, :_level, :_operation, :content
-                    )
-                    """,
-                    serialized,
+            serialized_entries = [
+                self._serialize_entry(entry, file_id) for entry in entries
+            ]
+            conn.executemany(
+                """
+                INSERT INTO messages (
+                    project_id, file_id, type, timestamp, session_id,
+                    _uuid, _parent_uuid, _is_sidechain, _user_type, _cwd, _version,
+                    _is_meta, _agent_id, _request_id,
+                    input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens,
+                    _leaf_uuid, _level, _operation, content
+                ) VALUES (
+                    :project_id, :file_id, :type, :timestamp, :session_id,
+                    :_uuid, :_parent_uuid, :_is_sidechain, :_user_type, :_cwd, :_version,
+                    :_is_meta, :_agent_id, :_request_id,
+                    :input_tokens, :output_tokens, :cache_creation_tokens, :cache_read_tokens,
+                    :_leaf_uuid, :_level, :_operation, :content
                 )
+                """,
+                serialized_entries,
+            )
 
             self._update_last_updated(conn)
             conn.commit()
@@ -1513,8 +1510,8 @@ def get_all_cached_projects(
                 result.append((row["project_path"], is_archived))
         finally:
             conn.close()
-    except Exception:
-        pass
+    except (sqlite3.Error, OSError) as e:
+        logger.debug("Failed to read cached projects from %s: %s", actual_db_path, e)
 
     return result
 
diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py
index 385ccadc..5e633766 100644
--- a/test/test_cache_integration.py
+++ b/test/test_cache_integration.py
@@ -475,19 +475,20 @@ def test_mixed_cached_and_uncached_files(
 
     def test_cache_corruption_recovery(self, setup_test_project):
         """Test recovery from corrupted cache files."""
-        project_dir = setup_test_project
+        project_with_cache = setup_test_project
+        project_dir = project_with_cache.project_dir
+        db_path = project_with_cache.db_path
 
         # Create initial cache
         convert_jsonl_to_html(input_path=project_dir, use_cache=True)
 
-        # Corrupt cache file
-        cache_dir = project_dir / "cache"
-        cache_files = list(cache_dir.glob("*.json"))
-        if cache_files:
-            cache_file = [f for f in cache_files if f.name != "index.json"][0]
-            cache_file.write_text("corrupted json data", encoding="utf-8")
+        # Corrupt SQLite database
+        assert db_path.exists()
+        with open(db_path, "r+b") as f:
+            f.seek(100)  # Skip SQLite header
+            f.write(b"corrupted data here")
 
-        # Should recover gracefully
+        # Should recover gracefully (recreates database)
         output = convert_jsonl_to_html(input_path=project_dir, use_cache=True)
         assert output.exists()
 

From 53157d2a794e54f65e5aaac6a82898d68110cc4d Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Tue, 13 Jan 2026 20:44:40 +0000
Subject: [PATCH 13/23] ty update + fix sloppy types

---
 claude_code_log/converter.py              |  4 +--
 claude_code_log/factories/user_factory.py |  4 +--
 claude_code_log/html/renderer_code.py     | 29 ++++++++--------
 claude_code_log/html/tool_formatters.py   |  5 +--
 claude_code_log/html/utils.py             | 24 +++++++++-----
 claude_code_log/renderer.py               |  8 ++---
 pyproject.toml                            |  6 +++-
 stubs/pygments/__init__.pyi               |  5 +++
 stubs/pygments/formatter.pyi              |  7 ++++
 stubs/pygments/formatters/__init__.pyi    | 16 +++++++++
 stubs/pygments/lexer.pyi                  |  7 ++++
 stubs/pygments/lexers/__init__.pyi        | 20 ++++++++++++
 stubs/pygments/util.pyi                   |  6 ++++
 test/test_cache.py                        |  2 +-
 test/test_index_timezone.py               |  8 ++---
 test/test_message_types.py                |  6 ++--
 uv.lock                                   | 40 +++++++++++------------
 17 files changed, 137 insertions(+), 60 deletions(-)
 create mode 100644 stubs/pygments/__init__.pyi
 create mode 100644 stubs/pygments/formatter.pyi
 create mode 100644 stubs/pygments/formatters/__init__.pyi
 create mode 100644 stubs/pygments/lexer.pyi
 create mode 100644 stubs/pygments/lexers/__init__.pyi
 create mode 100644 stubs/pygments/util.pyi

diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index e41a839b..464163b5 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -6,7 +6,7 @@
 from dataclasses import dataclass, field
 from pathlib import Path
 import traceback
-from typing import Any, Dict, List, Optional, TYPE_CHECKING
+from typing import Any, Dict, List, Optional, TYPE_CHECKING, cast
 
 import dateparser
 
@@ -190,7 +190,7 @@ def load_transcript(
                             isinstance(tool_use_result, dict)
                             and "agentId" in tool_use_result
                         ):
-                            agent_id_value = tool_use_result.get("agentId")  # type: ignore[reportUnknownVariableType, reportUnknownMemberType]
+                            agent_id_value = cast(Any, tool_use_result).get("agentId")
                             if isinstance(agent_id_value, str):
                                 agent_ids.add(agent_id_value)
                                 # Copy agentId to top level for Pydantic to preserve
diff --git a/claude_code_log/factories/user_factory.py b/claude_code_log/factories/user_factory.py
index f14a4e4d..02cb01fa 100644
--- a/claude_code_log/factories/user_factory.py
+++ b/claude_code_log/factories/user_factory.py
@@ -445,7 +445,7 @@ def create_user_message(
     for item in content_list:
         # Check for text content
         if hasattr(item, "text"):
-            item_text: str = getattr(item, "text")  # type: ignore[assignment]
+            item_text: str = getattr(item, "text")
 
             if ide_content := create_ide_notification_content(item_text):
                 # Add IDE notification item first
@@ -462,6 +462,6 @@ def create_user_message(
             items.append(item)
         elif hasattr(item, "source") and getattr(item, "type", None) == "image":
             # Duck-typed image content - convert to our Pydantic model
-            items.append(ImageContent.model_validate(item.model_dump()))  # type: ignore[union-attr]
+            items.append(ImageContent.model_validate(item.model_dump()))
 
     return UserTextMessage(items=items, meta=meta)
diff --git a/claude_code_log/html/renderer_code.py b/claude_code_log/html/renderer_code.py
index 7a633d45..8878d82b 100644
--- a/claude_code_log/html/renderer_code.py
+++ b/claude_code_log/html/renderer_code.py
@@ -12,10 +12,12 @@
 import re
 from typing import Callable, Optional
 
-from pygments import highlight  # type: ignore[reportUnknownVariableType]
-from pygments.lexers import TextLexer, get_lexer_by_name, get_all_lexers  # type: ignore[reportUnknownVariableType]
-from pygments.formatters import HtmlFormatter  # type: ignore[reportUnknownVariableType]
-from pygments.util import ClassNotFound  # type: ignore[reportUnknownVariableType]
+from pygments import highlight
+from pygments.lexer import Lexer
+from pygments.lexers import TextLexer, get_lexer_by_name, get_all_lexers
+from pygments.formatter import Formatter
+from pygments.formatters import HtmlFormatter
+from pygments.util import ClassNotFound
 
 from ..renderer_timings import timing_stat
 
@@ -49,7 +51,7 @@ def _init_lexer_caches() -> tuple[dict[str, str], dict[str, str]]:
     extension_cache: dict[str, str] = {}
 
     # Use public API: get_all_lexers() returns (name, aliases, patterns, mimetypes) tuples
-    for name, aliases, patterns, mimetypes in get_all_lexers():  # type: ignore[reportUnknownVariableType]
+    for _name, aliases, patterns, _mimetypes in get_all_lexers():
         if aliases and patterns:
             # Use first alias as the lexer name
             lexer_alias = aliases[0]
@@ -93,6 +95,9 @@ def highlight_code_with_pygments(
     # Get basename for matching (patterns are like "*.py")
     basename = os.path.basename(file_path).lower()
 
+    # Default to plain text lexer
+    lexer: Lexer = TextLexer()
+
     try:
         # OPTIMIZATION: Try fast extension lookup first (O(1) dict lookup)
         lexer_alias = None
@@ -107,18 +112,16 @@ def highlight_code_with_pygments(
                     lexer_alias = lex_alias
                     break
 
-        # Get lexer or use TextLexer as fallback
+        # Get lexer based on file extension
         # Note: stripall=False preserves leading whitespace (important for code indentation)
         if lexer_alias:
-            lexer = get_lexer_by_name(lexer_alias, stripall=False)  # type: ignore[reportUnknownVariableType]
-        else:
-            lexer = TextLexer()  # type: ignore[reportUnknownVariableType]
+            lexer = get_lexer_by_name(lexer_alias, stripall=False)
     except ClassNotFound:
-        # Fall back to plain text lexer
-        lexer = TextLexer()  # type: ignore[reportUnknownVariableType]
+        # Fall back to plain text lexer (already set as default)
+        pass
 
     # Create formatter with line numbers in table format
-    formatter = HtmlFormatter(  # type: ignore[reportUnknownVariableType]
+    formatter: Formatter = HtmlFormatter(
         linenos="table" if show_linenos else False,
         cssclass="highlight",
         wrapcode=True,
@@ -127,7 +130,7 @@ def highlight_code_with_pygments(
 
     # Highlight the code with timing if enabled
     with timing_stat("_pygments_timings"):
-        return str(highlight(code, lexer, formatter))  # type: ignore[reportUnknownArgumentType]
+        return str(highlight(code, lexer, formatter))
 
 
 def truncate_highlighted_preview(highlighted_html: str, max_lines: int) -> str:
diff --git a/claude_code_log/html/tool_formatters.py b/claude_code_log/html/tool_formatters.py
index 6ce93fe3..2b99d352 100644
--- a/claude_code_log/html/tool_formatters.py
+++ b/claude_code_log/html/tool_formatters.py
@@ -542,7 +542,7 @@ def render_params_table(params: dict[str, Any]) -> str:
         # If value is structured (dict/list), render as JSON
         if isinstance(value, (dict, list)):
             try:
-                formatted_value = json.dumps(value, indent=2, ensure_ascii=False)  # type: ignore[arg-type]
+                formatted_value = json.dumps(value, indent=2, ensure_ascii=False)
                 escaped_value = escape_html(formatted_value)
 
                 # Make long structured values collapsible
@@ -559,7 +559,8 @@ def render_params_table(params: dict[str, Any]) -> str:
                         f"<pre class='tool-param-structured'>{escaped_value}</pre>"
                     )
             except (TypeError, ValueError):
-                escaped_value = escape_html(str(value))  # type: ignore[arg-type]
+                # Fallback: convert to string when JSON serialization fails
+                escaped_value = escape_html(str(cast(object, value)))
                 value_html = escaped_value
         else:
             # Simple value, render as-is (or collapsible if long)
diff --git a/claude_code_log/html/utils.py b/claude_code_log/html/utils.py
index 8822ab3e..613bab43 100644
--- a/claude_code_log/html/utils.py
+++ b/claude_code_log/html/utils.py
@@ -200,10 +200,12 @@ def escape_html(text: str) -> str:
 
 def _create_pygments_plugin() -> Any:
     """Create a mistune plugin that uses Pygments for code block syntax highlighting."""
-    from pygments import highlight  # type: ignore[reportUnknownVariableType]
-    from pygments.lexers import get_lexer_by_name, TextLexer  # type: ignore[reportUnknownVariableType]
-    from pygments.formatters import HtmlFormatter  # type: ignore[reportUnknownVariableType]
-    from pygments.util import ClassNotFound  # type: ignore[reportUnknownVariableType]
+    from pygments import highlight
+    from pygments.lexer import Lexer
+    from pygments.lexers import get_lexer_by_name, TextLexer
+    from pygments.formatter import Formatter
+    from pygments.formatters import HtmlFormatter
+    from pygments.util import ClassNotFound
 
     def plugin_pygments(md: Any) -> None:
         """Plugin to add Pygments syntax highlighting to code blocks."""
@@ -214,19 +216,21 @@ def block_code(code: str, info: Optional[str] = None) -> str:
             if info:
                 # Language hint provided, use Pygments
                 lang = info.split()[0] if info else ""
+                # Default to plain text lexer
+                lexer: Lexer = TextLexer()
                 try:
-                    lexer = get_lexer_by_name(lang, stripall=False)  # type: ignore[reportUnknownVariableType]
+                    lexer = get_lexer_by_name(lang, stripall=False)
                 except ClassNotFound:
-                    lexer = TextLexer()  # type: ignore[reportUnknownVariableType]
+                    pass  # Already have default
 
-                formatter = HtmlFormatter(  # type: ignore[reportUnknownVariableType]
+                formatter: Formatter = HtmlFormatter(
                     linenos=False,  # No line numbers in markdown code blocks
                     cssclass="highlight",
                     wrapcode=True,
                 )
                 # Track Pygments timing if enabled
                 with timing_stat("_pygments_timings"):
-                    return str(highlight(code, lexer, formatter))  # type: ignore[reportUnknownArgumentType]
+                    return str(highlight(code, lexer, formatter))
             else:
                 # No language hint, use default rendering
                 return original_render(code, info)
@@ -438,5 +442,7 @@ def get_template_environment() -> Environment:
         autoescape=select_autoescape(["html", "xml"]),
     )
     # Add custom filters/functions
-    env.globals["starts_with_emoji"] = starts_with_emoji  # type: ignore[index]
+    # Cast to Any to bypass Jinja2's overly strict globals type
+    globals_dict: Any = env.globals
+    globals_dict["starts_with_emoji"] = starts_with_emoji
     return env
diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py
index 6eb2f1af..e54594a2 100644
--- a/claude_code_log/renderer.py
+++ b/claude_code_log/renderer.py
@@ -1531,7 +1531,7 @@ def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]:
             content = message.content
             message_content = content if isinstance(content, list) else []
         else:
-            message_content = message.message.content  # type: ignore[union-attr]
+            message_content = message.message.content
 
         text_content = extract_text_content(message_content)
 
@@ -1735,16 +1735,16 @@ def _render_messages(
             )
             effective_type = "user"
         else:
-            message_content = message.message.content  # type: ignore
+            message_content = message.message.content
             meta = create_meta(message)
             effective_type = message_type
 
         # Chunk content: regular items (text/image) accumulate, special items (tool/thinking) separate
         if isinstance(message_content, list):
-            chunks = chunk_message_content(message_content)  # type: ignore[arg-type]
+            chunks = chunk_message_content(message_content)
         else:
             # String content - wrap in list with single TextContent
-            content_str: str = message_content.strip() if message_content else ""  # type: ignore[union-attr]
+            content_str: str = message_content.strip() if message_content else ""
             if content_str:
                 chunks: list[ContentChunk] = [
                     [TextContent(type="text", text=content_str)]  # pyright: ignore[reportUnknownArgumentType]
diff --git a/pyproject.toml b/pyproject.toml
index dc27cb5b..ef3d53a7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,10 @@ markers = [
     "benchmark: Performance benchmarks that output to GitHub Job Summary",
 ]
 
+[tool.ty.environment]
+# Use custom stubs for untyped libraries
+extra-paths = ["stubs"]
+
 [tool.pyright]
 # Pyright configuration with strict settings
 include = ["claude_code_log"]           # TODO: , "test"
@@ -112,7 +116,7 @@ dev = [
     "pytest-xdist[psutil]>=3.6.1",
     "pyright>=1.1.350",
     "vulture>=2.14",
-    "ty>=0.0.1a12",
+    "ty>=0.0.11",
     "pytest-playwright>=0.7.0",
     "syrupy>=5.0.0",
 ]
diff --git a/stubs/pygments/__init__.pyi b/stubs/pygments/__init__.pyi
new file mode 100644
index 00000000..06d21785
--- /dev/null
+++ b/stubs/pygments/__init__.pyi
@@ -0,0 +1,5 @@
+"""Type stubs for pygments - minimal stubs for functions used in this project."""
+
+from typing import Any
+
+def highlight(code: str, lexer: Any, formatter: Any, outfile: Any = None) -> str: ...
diff --git a/stubs/pygments/formatter.pyi b/stubs/pygments/formatter.pyi
new file mode 100644
index 00000000..fdf8cc00
--- /dev/null
+++ b/stubs/pygments/formatter.pyi
@@ -0,0 +1,7 @@
+"""Type stubs for pygments.formatter - base formatter class."""
+
+from typing import Any
+
+class Formatter:
+    """Base class for formatters."""
+    def __init__(self, **options: Any) -> None: ...
diff --git a/stubs/pygments/formatters/__init__.pyi b/stubs/pygments/formatters/__init__.pyi
new file mode 100644
index 00000000..8f181ab2
--- /dev/null
+++ b/stubs/pygments/formatters/__init__.pyi
@@ -0,0 +1,16 @@
+"""Type stubs for pygments.formatters - minimal stubs for functions used in this project."""
+
+from typing import Any, Literal
+
+from ..formatter import Formatter
+
+class HtmlFormatter(Formatter):
+    """HTML formatter for syntax highlighted code."""
+    def __init__(
+        self,
+        linenos: bool | Literal["table", "inline"] = False,
+        cssclass: str = "highlight",
+        wrapcode: bool = False,
+        linenostart: int = 1,
+        **options: Any,
+    ) -> None: ...
diff --git a/stubs/pygments/lexer.pyi b/stubs/pygments/lexer.pyi
new file mode 100644
index 00000000..16f50b37
--- /dev/null
+++ b/stubs/pygments/lexer.pyi
@@ -0,0 +1,7 @@
+"""Type stubs for pygments.lexer - base lexer class."""
+
+from typing import Any
+
+class Lexer:
+    """Base class for lexers."""
+    def __init__(self, **options: Any) -> None: ...
diff --git a/stubs/pygments/lexers/__init__.pyi b/stubs/pygments/lexers/__init__.pyi
new file mode 100644
index 00000000..ad20345f
--- /dev/null
+++ b/stubs/pygments/lexers/__init__.pyi
@@ -0,0 +1,20 @@
+"""Type stubs for pygments.lexers - minimal stubs for functions used in this project."""
+
+from typing import Any, Iterator
+
+from ..lexer import Lexer
+
+class TextLexer(Lexer):
+    """Plain text lexer."""
+    def __init__(self, **options: Any) -> None: ...
+
+def get_lexer_by_name(name: str, **options: Any) -> Lexer: ...
+def get_all_lexers() -> Iterator[
+    tuple[str, tuple[str, ...], tuple[str, ...], tuple[str, ...]]
+]:
+    """Get all registered lexers.
+
+    Returns:
+        Iterator of (name, aliases, patterns, mimetypes) tuples
+    """
+    ...
diff --git a/stubs/pygments/util.pyi b/stubs/pygments/util.pyi
new file mode 100644
index 00000000..a2d52e07
--- /dev/null
+++ b/stubs/pygments/util.pyi
@@ -0,0 +1,6 @@
+"""Type stubs for pygments.util - minimal stubs for functions used in this project."""
+
+class ClassNotFound(Exception):
+    """Exception raised when a lexer or formatter class is not found."""
+
+    ...
diff --git a/test/test_cache.py b/test/test_cache.py
index e2832ccd..ee843ba6 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -695,7 +695,7 @@ def test_cache_directory_permissions(self, temp_project_dir, mock_version):
                 assert cache_manager is not None
             except PermissionError:
                 # If we get permission errors, just skip this test
-                return pytest.skip("Cannot test permissions on this system")  # type: ignore[misc]
+                pytest.skip("Cannot test permissions on this system")
         finally:
             # Restore permissions
             try:
diff --git a/test/test_index_timezone.py b/test/test_index_timezone.py
index 3bdee10d..7bfdbc86 100644
--- a/test/test_index_timezone.py
+++ b/test/test_index_timezone.py
@@ -11,7 +11,7 @@ def test_index_timezone_conversion(page):
     index_path = Path.home() / ".claude" / "projects" / "index.html"
 
     if not index_path.exists():
-        pytest.skip("Index file not found")  # type: ignore[call-non-callable]
+        pytest.skip("Index file not found")
 
     # Load the page
     page.goto(f"file://{index_path}")
@@ -23,7 +23,7 @@ def test_index_timezone_conversion(page):
     timestamp_elements = page.query_selector_all(".timestamp[data-timestamp]")
 
     if len(timestamp_elements) == 0:
-        pytest.skip("No timestamps found in index page")  # type: ignore[call-non-callable]
+        pytest.skip("No timestamps found in index page")
 
     # Get the first timestamp element
     first_timestamp = timestamp_elements[0]
@@ -68,7 +68,7 @@ def test_session_navigation_timezone_conversion(page):
     test_html_path = Path("/tmp/test_output_tz.html")
 
     if not test_html_path.exists():
-        pytest.skip("Test HTML file not found")  # type: ignore[call-non-callable]
+        pytest.skip("Test HTML file not found")
 
     # Load the page
     page.goto(f"file://{test_html_path}")
@@ -82,7 +82,7 @@ def test_session_navigation_timezone_conversion(page):
     )
 
     if len(session_timestamps) == 0:
-        pytest.skip("No session navigation timestamps found")  # type: ignore[call-non-callable]
+        pytest.skip("No session navigation timestamps found")
 
     # Get the first session timestamp
     first_session_ts = session_timestamps[0]
diff --git a/test/test_message_types.py b/test/test_message_types.py
index 9be0e4c9..78f385a9 100644
--- a/test/test_message_types.py
+++ b/test/test_message_types.py
@@ -4,6 +4,8 @@
 import json
 import tempfile
 from pathlib import Path
+
+from pytest import CaptureFixture
 from claude_code_log.converter import load_transcript
 from claude_code_log.html.renderer import generate_html
 from claude_code_log.models import QueueOperationTranscriptEntry
@@ -148,7 +150,7 @@ def test_queue_operation_type_support():
         test_file_path.unlink()
 
 
-def test_load_transcript_missing_file_returns_empty_list(capsys):
+def test_load_transcript_missing_file_returns_empty_list(capsys: CaptureFixture[str]):
     """Test that load_transcript handles missing files gracefully.
 
     This handles the race condition where a file exists when globbed but
@@ -188,6 +190,6 @@ def test_load_transcript_missing_file_silent_mode():
 if __name__ == "__main__":
     test_summary_type_support()
     test_queue_operation_type_support()
-    test_load_transcript_missing_file_returns_empty_list(None)  # type: ignore
+    # test_load_transcript_missing_file_returns_empty_list requires pytest's capsys fixture
     test_load_transcript_missing_file_silent_mode()
     print("\n✅ All message type tests passed!")
diff --git a/uv.lock b/uv.lock
index df5825e1..ed571709 100644
--- a/uv.lock
+++ b/uv.lock
@@ -175,7 +175,7 @@ dev = [
     { name = "pytest-xdist", extras = ["psutil"], specifier = ">=3.6.1" },
     { name = "ruff", specifier = ">=0.11.2" },
     { name = "syrupy", specifier = ">=5.0.0" },
-    { name = "ty", specifier = ">=0.0.1a12" },
+    { name = "ty", specifier = ">=0.0.11" },
     { name = "vulture", specifier = ">=2.14" },
 ]
 
@@ -1252,27 +1252,27 @@ wheels = [
 
 [[package]]
 name = "ty"
-version = "0.0.1a29"
+version = "0.0.11"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/82/e1/1a75c95fbb284954b2f6fbbf7fbf1d35f531f50ebe93b23cf53145d1bc1d/ty-0.0.1a29.tar.gz", hash = "sha256:43bb55fd467a057880d62ad4bbb048223fd4fba7d8e4d7d5372a0f4881da83fe", size = 4624122, upload-time = "2025-11-28T20:23:51.728Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bc/45/5ae578480168d4b3c08cf8e5eac3caf8eb7acdb1a06a9bed7519564bd9b4/ty-0.0.11.tar.gz", hash = "sha256:ebcbc7d646847cb6610de1da4ffc849d8b800e29fd1e9ebb81ba8f3fbac88c25", size = 4920340, upload-time = "2026-01-09T21:06:01.592Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/be/49/35034e045ef27ebf220de742c092b1982902740c3ca151ad2072035df77a/ty-0.0.1a29-py3-none-linux_armv6l.whl", hash = "sha256:0276e8e0779046d464dec8415c240cc76b22e22c8c22c227dec2d79395f037be", size = 9581368, upload-time = "2025-11-28T20:24:07.099Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/76/350ab2592984907a7ed4a887b4e041ce4afe002ca0dff796c81e06b66e1d/ty-0.0.1a29-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4a47afe9be12667ff521a0f2ed5a0873fb85c1f8330a6680e0e3366f016e3e42", size = 9371567, upload-time = "2025-11-28T20:23:49.634Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/f6/27977a0206c9914a2b2be5a96c155cd38cf976492388b58ad09e14c42050/ty-0.0.1a29-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c0041f1c36fac2099bc61aa8cdad18d890ceb15544ba33f522f9967372fb3b82", size = 8889114, upload-time = "2025-11-28T20:23:42.492Z" },
-    { url = "https://files.pythonhosted.org/packages/34/1b/4f3c20ae1dac6cdc1c42f020a9fd37733f695bff13c4759ba4d84d1dcd51/ty-0.0.1a29-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46ed30fafdde93968f033bbdd3576f068ebe979c65fd2fcc166b1cff00097e5c", size = 9172880, upload-time = "2025-11-28T20:23:53.906Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/76/a671e3e560f37a3f82979637ec362d66363a94e5f23c99cf4f16a9fc737b/ty-0.0.1a29-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3814a7bd8b38d761ea621bf9ae7d1d38a7dde514d9f0e07fb2e70ea5aeeea0f5", size = 9377768, upload-time = "2025-11-28T20:24:11.365Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/f3/44b03bb0d96451c473a15bb2ab1dbd828b73f2f3c98f8991c8ac2f8a8083/ty-0.0.1a29-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:466f9eedee4fe17f6d1da352e5174374c935facf3dd4a6c6d301891864ac1797", size = 9756864, upload-time = "2025-11-28T20:24:03.523Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/75/056a750c4db3326825e0ba009b018892fff47b56efee8e648e01410f9199/ty-0.0.1a29-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:dc47493923c850ebc3a545b4d738da9924361cc9a9c67fbf4b49786462c5998e", size = 10390516, upload-time = "2025-11-28T20:23:44.891Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/1b/e2a764f84cf71ccb65671ce7678fa787d1a73bfffa4804f443c642c691aa/ty-0.0.1a29-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91acf7c35f655c1cb38b7029dc09330fccbc5d18fffb0ecbd7f218518d5fb8d3", size = 10135269, upload-time = "2025-11-28T20:23:59.584Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/46/6a7db14e584bd1b3da6b21a02190e218ddc3720a5b699b56039d142c4674/ty-0.0.1a29-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3260e3524d038c08234d77c0b8aecd8c12096ec6df153960591dedbd5688078", size = 10161544, upload-time = "2025-11-28T20:23:47.514Z" },
-    { url = "https://files.pythonhosted.org/packages/52/9e/04dfd308788117fe04cc6fe85612ea2945d852c1c9c80150f5aae0d7fe0c/ty-0.0.1a29-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc361cc48f901727a5f7a05cede0102cd4d1eba4aebc6269cb8ec7db23e6c86", size = 9706468, upload-time = "2025-11-28T20:24:13.565Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/86/3c0e21b7d7a4f721f5eb35542ed672c790ef3c0570f5665ff26bad4f3c69/ty-0.0.1a29-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8ef39318e49f0cf7f7d1de79003c0939749dc0bb5e3a54c8c3a15d0c6950dc8f", size = 9142491, upload-time = "2025-11-28T20:24:01.311Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/a8/55ce8472174efe1d53a6f25c8e325894e121471ecf4332957c941a503cef/ty-0.0.1a29-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:c6f4288b96d37d714542f2e8475d3822be427ebc573a85fc374c1eda7f0381fc", size = 9405392, upload-time = "2025-11-28T20:23:56.368Z" },
-    { url = "https://files.pythonhosted.org/packages/46/a6/2889a049257b0dd5c41ee0ca4c0081959b46184338ed378743f45c3d997d/ty-0.0.1a29-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e0fb272452129ba2cd1445a596a4a85c94ec52cb58fb800ed19a3056d8aa84d5", size = 9516865, upload-time = "2025-11-28T20:24:05.247Z" },
-    { url = "https://files.pythonhosted.org/packages/50/78/35e5bdce73d9f631a14cb838b024377a5c7fcc73a2254a993e9060247d52/ty-0.0.1a29-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:fb6d3ac94a95e86d6f3bc9e39b8e7a3e300be4224b1ac7984ccb3136dfa41d77", size = 9815887, upload-time = "2025-11-28T20:24:08.895Z" },
-    { url = "https://files.pythonhosted.org/packages/95/f9/6bb402efa8ad252d5e6b39eeb2a920ef85792c9602617bf391df7c40313a/ty-0.0.1a29-py3-none-win32.whl", hash = "sha256:fb4df9f8bf401a42019526c0da72e26de1b9dab3188d1df59ec6ecbd15edce4a", size = 9029263, upload-time = "2025-11-28T20:24:18.057Z" },
-    { url = "https://files.pythonhosted.org/packages/be/f0/3e314ee1a369eba776f3f8e9fac535b9703127097b7e52de5aba025d5c99/ty-0.0.1a29-py3-none-win_amd64.whl", hash = "sha256:3908a8b12616c52520bc7dc1a14732c3b86181125b1326444fc37049d2a20c37", size = 9875790, upload-time = "2025-11-28T20:24:15.884Z" },
-    { url = "https://files.pythonhosted.org/packages/af/c2/37d81529242602cd486cd112a93312874948d276515e5fb0718b0f99758d/ty-0.0.1a29-py3-none-win_arm64.whl", hash = "sha256:999ae9077f153fd1804b840d21d904850b9047e361a6a67da8d06dadf94a189a", size = 9373509, upload-time = "2025-11-28T20:24:19.941Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/34/b1d05cdcd01589a8d2e63011e0a1e24dcefdc2a09d024fee3e27755963f6/ty-0.0.11-py3-none-linux_armv6l.whl", hash = "sha256:68f0b8d07b0a2ea7ec63a08ba2624f853e4f9fa1a06fce47fb453fa279dead5a", size = 9521748, upload-time = "2026-01-09T21:06:13.221Z" },
+    { url = "https://files.pythonhosted.org/packages/43/21/f52d93f4b3784b91bfbcabd01b84dc82128f3a9de178536bcf82968f3367/ty-0.0.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cbf82d7ef0618e9ae3cc3c37c33abcfa302c9b3e3b8ff11d71076f98481cb1a8", size = 9454903, upload-time = "2026-01-09T21:06:42.363Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/01/3a563dba8b1255e474c35e1c3810b7589e81ae8c41df401b6a37c8e2cde9/ty-0.0.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:121987c906e02264c3b511b95cb9f8a3cdd66f3283b8bbab678ca3525652e304", size = 8823417, upload-time = "2026-01-09T21:06:26.315Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/b1/99b87222c05d3a28fb7bbfb85df4efdde8cb6764a24c1b138f3a615283dd/ty-0.0.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:999390b6cc045fe5e1b3da1c2c9ae8e8c0def23b69455e7c9191ba9ffd747023", size = 9290785, upload-time = "2026-01-09T21:05:59.028Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/9f/598809a8fff2194f907ba6de07ac3d7b7788342592d8f8b98b1b50c2fb49/ty-0.0.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed504d78eb613c49be3c848f236b345b6c13dc6bcfc4b202790a60a97e1d8f35", size = 9359392, upload-time = "2026-01-09T21:06:37.459Z" },
+    { url = "https://files.pythonhosted.org/packages/71/3e/aeea2a97b38f3dcd9f8224bf83609848efa4bc2f484085508165567daa7b/ty-0.0.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7fedc8b43cc8a9991e0034dd205f957a8380dd29bfce36f2a35b5d321636dfd9", size = 9852973, upload-time = "2026-01-09T21:06:21.245Z" },
+    { url = "https://files.pythonhosted.org/packages/72/40/86173116995e38f954811a86339ac4c00a2d8058cc245d3e4903bc4a132c/ty-0.0.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:0808bdfb7efe09881bf70249b85b0498fb8b75fbb036ce251c496c20adb10075", size = 10796113, upload-time = "2026-01-09T21:06:16.034Z" },
+    { url = "https://files.pythonhosted.org/packages/69/71/97c92c401dacae9baa3696163ebe8371635ebf34ba9fda781110d0124857/ty-0.0.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:07185b3e38b18c562056dfbc35fb51d866f872977ea1ebcd64ca24a001b5b4f1", size = 10432137, upload-time = "2026-01-09T21:06:07.498Z" },
+    { url = "https://files.pythonhosted.org/packages/18/10/9ab43f3cfc5f7792f6bc97620f54d0a0a81ef700be84ea7f6be330936a99/ty-0.0.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5c72f1ada8eb5be984502a600f71d1a3099e12fb6f3c0607aaba2f86f0e9d80", size = 10240520, upload-time = "2026-01-09T21:06:34.823Z" },
+    { url = "https://files.pythonhosted.org/packages/74/18/8dd4fe6df1fd66f3e83b4798eddb1d8482d9d9b105f25099b76703402ebb/ty-0.0.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25f88e8789072830348cb59b761d5ced70642ed5600673b4bf6a849af71eca8b", size = 9973340, upload-time = "2026-01-09T21:06:39.657Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/0b/fb2301450cf8f2d7164944d6e1e659cac9ec7021556cc173d54947cf8ef4/ty-0.0.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f370e1047a62dcedcd06e2b27e1f0b16c7f8ea2361d9070fcbf0d0d69baaa192", size = 9262101, upload-time = "2026-01-09T21:06:28.989Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/8c/d6374af023541072dee1c8bcfe8242669363a670b7619e6fffcc7415a995/ty-0.0.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:52be34047ed6177bfcef9247459a767ec03d775714855e262bca1fb015895e8a", size = 9382756, upload-time = "2026-01-09T21:06:24.097Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/44/edd1e63ffa8d49d720c475c2c1c779084e5efe50493afdc261938705d10a/ty-0.0.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b9e5762ccb3778779378020b8d78f936b3f52ea83f18785319cceba3ae85d8e6", size = 9553944, upload-time = "2026-01-09T21:06:18.426Z" },
+    { url = "https://files.pythonhosted.org/packages/35/cd/4afdb0d182d23d07ff287740c4954cc6dde5c3aed150ec3f2a1d72b00f71/ty-0.0.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e9334646ee3095e778e3dbc45fdb2bddfc16acc7804283830ad84991ece16dd7", size = 10060365, upload-time = "2026-01-09T21:06:45.083Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/94/a009ad9d8b359933cfea8721c689c0331189be28650d74dcc6add4d5bb09/ty-0.0.11-py3-none-win32.whl", hash = "sha256:44cfb7bb2d6784bd7ffe7b5d9ea90851d9c4723729c50b5f0732d4b9a2013cfc", size = 9040448, upload-time = "2026-01-09T21:06:32.241Z" },
+    { url = "https://files.pythonhosted.org/packages/df/04/5a5dfd0aec0ea99ead1e824ee6e347fb623c464da7886aa1e3660fb0f36c/ty-0.0.11-py3-none-win_amd64.whl", hash = "sha256:1bb205db92715d4a13343bfd5b0c59ce8c0ca0daa34fb220ec9120fc66ccbda7", size = 9780112, upload-time = "2026-01-09T21:06:04.69Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/07/47d4fccd7bcf5eea1c634d518d6cb233f535a85d0b63fcd66815759e2fa0/ty-0.0.11-py3-none-win_arm64.whl", hash = "sha256:4688bd87b2dc5c85da277bda78daba14af2e66f3dda4d98f3604e3de75519eba", size = 9194038, upload-time = "2026-01-09T21:06:10.152Z" },
 ]
 
 [[package]]

From 4ef769f16670c4dc7ddc2bf4bda351ea34efa542 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Tue, 13 Jan 2026 23:38:32 +0000
Subject: [PATCH 14/23] TUI Markdown viewer pagination and tweaks

---
 claude_code_log/tui.py | 164 ++++++++++++++++++++-
 test/test_tui.py       | 326 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 486 insertions(+), 4 deletions(-)

diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index ca8580e7..4536e00c 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -4,7 +4,7 @@
 import os
 import webbrowser
 from datetime import datetime
-from pathlib import Path
+from pathlib import Path, PurePath
 from typing import Any, ClassVar, List, Optional, cast
 
 from textual.app import App, ComposeResult
@@ -465,9 +465,44 @@ def _handle_restore_project_confirm(self, confirmed: bool | None) -> None:
             self.populate_table()
 
 
+class SafeMarkdownViewer(MarkdownViewer):
+    """MarkdownViewer that handles link clicks safely.
+
+    Intercepts link clicks to prevent crashes from file/external links
+    while still allowing anchor navigation for ToC.
+    """
+
+    async def go(self, location: str | PurePath) -> None:
+        """Navigate to a new location - intercept non-anchor links.
+
+        Override parent's go() method to handle links appropriately:
+        - Anchor links (#section): allow default scrolling
+        - HTTP/HTTPS URLs: open in browser
+        - Relative file links: show warning (not supported)
+        """
+        location_str = str(location)
+
+        if location_str.startswith("#"):
+            # Anchor link - allow default scroll behaviour
+            await super().go(location)
+        elif location_str.startswith(("http://", "https://")):
+            # External URL - open in browser
+            webbrowser.open(location_str)
+            self.notify(f"Opening in browser: {location_str[:50]}...")
+        else:
+            # Relative file link - not supported in embedded viewer
+            self.notify(
+                "File links not supported in embedded viewer",
+                severity="warning",
+            )
+
+
 class MarkdownViewerScreen(ModalScreen[None]):
     """Modal screen for viewing Markdown content with table of contents."""
 
+    # Character-based pagination - ~50KB per page for good scroll performance
+    PAGE_SIZE_CHARS = 50_000
+
     CSS = """
     MarkdownViewerScreen {
         align: center middle;
@@ -498,6 +533,14 @@ class MarkdownViewerScreen(ModalScreen[None]):
         max-width: 60;
     }
 
+    #pagination-controls {
+        dock: top;
+        height: 1;
+        background: $warning;
+        color: $text;
+        text-align: center;
+    }
+
     #md-footer {
         dock: bottom;
         height: 1;
@@ -510,20 +553,89 @@ class MarkdownViewerScreen(ModalScreen[None]):
     BINDINGS: ClassVar[list[BindingType]] = [
         Binding("escape", "dismiss", "Close", show=True),
         Binding("q", "dismiss", "Close", show=False),
+        Binding("t", "toggle_toc", "Toggle ToC"),
+        Binding("n", "next_page", "Next page"),
+        Binding("right", "next_page", "Next page", show=False),
+        Binding("p", "prev_page", "Prev page"),
+        Binding("left", "prev_page", "Prev page", show=False),
     ]
 
     def __init__(self, content: str, title: str = "Markdown Viewer") -> None:
         super().__init__()
         self.md_content = content
         self.md_title = title
+        self._pages = self._split_into_pages(content)
+        self._current_page = 0
+        self._is_paginated = len(self._pages) > 1
+
+    def _split_into_pages(self, content: str) -> list[str]:
+        """Split markdown content into pages by character count.
+
+        Splits at section boundaries (## ) when possible to avoid
+        cutting mid-section, but will split within sections if
+        a single section exceeds PAGE_SIZE_CHARS.
+        """
+        import re
+
+        if len(content) <= self.PAGE_SIZE_CHARS:
+            return [content]
+
+        pages: list[str] = []
+        current_page = ""
+
+        # Split by level 2 headings, keeping the delimiter
+        sections = re.split(r"(\n(?=## ))", content)
+
+        for section in sections:
+            if not section:
+                continue
+
+            # If adding this section exceeds page size
+            if len(current_page) + len(section) > self.PAGE_SIZE_CHARS:
+                # If current page has content, save it
+                if current_page.strip():
+                    pages.append(current_page)
+                    current_page = ""
+
+                # If section itself exceeds page size, split it by lines
+                if len(section) > self.PAGE_SIZE_CHARS:
+                    lines = section.split("\n")
+                    for line in lines:
+                        if len(current_page) + len(line) + 1 > self.PAGE_SIZE_CHARS:
+                            if current_page.strip():
+                                pages.append(current_page)
+                            current_page = line + "\n"
+                        else:
+                            current_page += line + "\n"
+                else:
+                    current_page = section
+            else:
+                current_page += section
+
+        # Don't forget the last page
+        if current_page.strip():
+            pages.append(current_page)
+
+        return pages if pages else [content]
 
     def compose(self) -> ComposeResult:
         with Container(id="md-container"):
             yield Static(self.md_title, id="md-header")
-            yield MarkdownViewer(
-                self.md_content, id="md-viewer", show_table_of_contents=True
+            if self._is_paginated:
+                yield Static(
+                    f"Page {self._current_page + 1}/{len(self._pages)} | "
+                    "← or p: prev | → or n: next",
+                    id="pagination-controls",
+                )
+            yield SafeMarkdownViewer(
+                self._pages[self._current_page],
+                id="md-viewer",
+                show_table_of_contents=True,
             )
-            yield Static("Press ESC or q to close | t: toggle ToC", id="md-footer")
+            footer_text = "Press ESC or q to close | t: toggle ToC"
+            if self._is_paginated:
+                footer_text += " | n/p: navigate pages"
+            yield Static(footer_text, id="md-footer")
 
     def on_mount(self) -> None:
         """Customize ToC tree after mount."""
@@ -586,6 +698,50 @@ def _clean_toc_labels(self, node: Any) -> None:
     async def action_dismiss(self, result: None = None) -> None:
         self.dismiss(result)
 
+    def action_toggle_toc(self) -> None:
+        """Toggle table of contents visibility."""
+        viewer = self.query_one("#md-viewer", MarkdownViewer)
+        viewer.show_table_of_contents = not viewer.show_table_of_contents
+
+    def action_next_page(self) -> None:
+        """Navigate to next page (if paginated)."""
+        if not self._is_paginated:
+            return
+        if self._current_page < len(self._pages) - 1:
+            self._current_page += 1
+            if self.is_mounted:
+                self._update_viewer_content()
+
+    def action_prev_page(self) -> None:
+        """Navigate to previous page (if paginated)."""
+        if not self._is_paginated:
+            return
+        if self._current_page > 0:
+            self._current_page -= 1
+            if self.is_mounted:
+                self._update_viewer_content()
+
+    def _update_viewer_content(self) -> None:
+        """Update the markdown viewer with current page content."""
+        try:
+            # Update pagination controls
+            controls = self.query_one("#pagination-controls", Static)
+            controls.update(
+                f"Page {self._current_page + 1}/{len(self._pages)} | ← or p: prev | → or n: next"
+            )
+
+            # Update the markdown content directly
+            viewer = self.query_one("#md-viewer", SafeMarkdownViewer)
+            viewer.document.update(self._pages[self._current_page])
+
+            # Scroll to top of content
+            viewer.scroll_home(animate=False)
+
+            # Re-customize ToC after content loads
+            self.call_later(self._customize_toc_tree)
+        except Exception as e:
+            self.notify(f"Error updating page: {e}", severity="error")
+
 
 class ArchiveConfirmScreen(ModalScreen[bool]):
     """Modal screen for confirming session archiving (delete JSONL, keep cache)."""
diff --git a/test/test_tui.py b/test/test_tui.py
index 9009a490..0bb26dcc 100644
--- a/test/test_tui.py
+++ b/test/test_tui.py
@@ -1933,3 +1933,329 @@ async def test_restore_project_not_archived_shows_warning(self):
                 # Try to restore - should show warning
                 await pilot.press("r")
                 await pilot.pause(0.1)
+
+
+@pytest.mark.tui
+class TestMarkdownViewerScreen:
+    """Tests for the MarkdownViewerScreen modal."""
+
+    @pytest.mark.asyncio
+    async def test_toc_toggle_binding_exists(self):
+        """Test that 't' key binding exists for ToC toggle."""
+        from claude_code_log.tui import MarkdownViewerScreen
+
+        binding_keys = [
+            b.key if hasattr(b, "key") else b[0] for b in MarkdownViewerScreen.BINDINGS
+        ]
+        assert "t" in binding_keys, "Should have 't' binding for ToC toggle"
+
+    @pytest.mark.asyncio
+    async def test_toc_toggle_action_toggles_visibility(self):
+        """Test that pressing 't' toggles ToC visibility."""
+        from claude_code_log.tui import MarkdownViewerScreen
+        from textual.app import App
+        from textual.widgets import MarkdownViewer
+
+        content = "# Heading 1\n\nSome content\n\n## Heading 2\n\nMore content"
+        screen = MarkdownViewerScreen(content, "Test Title")
+
+        class TestApp(App):
+            def compose(self):
+                yield from []
+
+        app = TestApp()
+        async with app.run_test() as pilot:
+            app.push_screen(screen)
+            await pilot.pause(0.3)
+
+            viewer = screen.query_one("#md-viewer", MarkdownViewer)
+
+            # Initial state: ToC visible
+            assert viewer.show_table_of_contents is True
+
+            # Press 't' to toggle
+            await pilot.press("t")
+            await pilot.pause(0.1)
+
+            # ToC should now be hidden
+            assert viewer.show_table_of_contents is False
+
+            # Press 't' again
+            await pilot.press("t")
+            await pilot.pause(0.1)
+
+            # ToC should be visible again
+            assert viewer.show_table_of_contents is True
+
+    @pytest.mark.asyncio
+    async def test_safe_markdown_viewer_overrides_go(self):
+        """Test that SafeMarkdownViewer overrides the go method."""
+        from claude_code_log.tui import SafeMarkdownViewer
+        from textual.widgets import MarkdownViewer
+
+        # SafeMarkdownViewer should have its own go method
+        assert "go" in SafeMarkdownViewer.__dict__, "Should override go method"
+        # And it should be different from the parent
+        assert SafeMarkdownViewer.go is not MarkdownViewer.go
+
+    @pytest.mark.asyncio
+    async def test_file_link_click_does_not_crash(self):
+        """Test that clicking file link shows notification instead of crashing."""
+        from claude_code_log.tui import MarkdownViewerScreen, SafeMarkdownViewer
+        from textual.app import App
+        from textual.widgets.markdown import Markdown
+
+        content = "# Test\n\n[Back to combined](combined_transcripts.md)"
+        screen = MarkdownViewerScreen(content, "Link Test")
+
+        class TestApp(App):
+            def compose(self):
+                yield from []
+
+        app = TestApp()
+        notifications = []
+
+        async with app.run_test() as pilot:
+            app.push_screen(screen)
+            await pilot.pause(0.3)
+
+            # Track notifications on the viewer (where they're called from)
+            viewer = screen.query_one("#md-viewer", SafeMarkdownViewer)
+            original_notify = viewer.notify
+
+            def tracking_notify(
+                message: str,
+                *,
+                title: str = "",
+                severity: str = "information",
+                timeout: float | None = None,
+                markup: bool = True,
+            ) -> None:
+                notifications.append(str(message))
+                original_notify(
+                    message,
+                    title=title,
+                    severity=severity,  # type: ignore[arg-type]
+                    timeout=timeout,
+                    markup=markup,
+                )
+
+            viewer.notify = tracking_notify  # type: ignore[method-assign]
+
+            # Simulate link click by posting the event
+            markdown_widget = viewer.query_one(Markdown)
+            markdown_widget.post_message(
+                Markdown.LinkClicked(markdown_widget, "combined_transcripts.md")
+            )
+            await pilot.pause(0.2)
+
+            # Should not crash - screen still mounted
+            assert screen.is_mounted
+            # Should have shown a notification
+            assert len(notifications) > 0
+            assert any("not supported" in n.lower() for n in notifications)
+
+    @pytest.mark.asyncio
+    async def test_http_link_opens_browser(self):
+        """Test that HTTP links open in browser."""
+        from claude_code_log.tui import MarkdownViewerScreen, SafeMarkdownViewer
+        from textual.app import App
+        from textual.widgets.markdown import Markdown
+
+        content = "# Test\n\n[Example](https://example.com)"
+        screen = MarkdownViewerScreen(content, "Link Test")
+
+        class TestApp(App):
+            def compose(self):
+                yield from []
+
+        app = TestApp()
+
+        with patch("claude_code_log.tui.webbrowser.open") as mock_open:
+            async with app.run_test() as pilot:
+                app.push_screen(screen)
+                await pilot.pause(0.3)
+
+                viewer = screen.query_one("#md-viewer", SafeMarkdownViewer)
+                markdown_widget = viewer.query_one(Markdown)
+                markdown_widget.post_message(
+                    Markdown.LinkClicked(markdown_widget, "https://example.com")
+                )
+                await pilot.pause(0.2)
+
+                # Should be called at least once (may be called twice due to event propagation)
+                mock_open.assert_called_with("https://example.com")
+                assert mock_open.call_count >= 1
+
+
+@pytest.mark.tui
+class TestMarkdownViewerPagination:
+    """Tests for pagination in MarkdownViewerScreen."""
+
+    @pytest.mark.asyncio
+    async def test_pagination_constants_defined(self):
+        """Test that pagination constants exist."""
+        from claude_code_log.tui import MarkdownViewerScreen
+
+        assert hasattr(MarkdownViewerScreen, "PAGE_SIZE_CHARS"), (
+            "Should have PAGE_SIZE_CHARS constant"
+        )
+        assert MarkdownViewerScreen.PAGE_SIZE_CHARS > 0
+
+    @pytest.mark.asyncio
+    async def test_small_content_no_pagination(self):
+        """Test that small content loads without pagination controls."""
+        from claude_code_log.tui import MarkdownViewerScreen
+        from textual.app import App
+
+        small_content = "# Small\n\nJust a bit of content."
+        screen = MarkdownViewerScreen(small_content, "Small Test")
+
+        class TestApp(App):
+            def compose(self):
+                yield from []
+
+        app = TestApp()
+        async with app.run_test() as pilot:
+            app.push_screen(screen)
+            await pilot.pause(0.3)
+
+            # Should NOT have pagination controls
+            try:
+                screen.query_one("#pagination-controls")
+                assert False, "Small content should not show pagination controls"
+            except NoMatches:
+                pass  # Expected - no pagination for small content
+
+    @pytest.mark.asyncio
+    async def test_large_content_shows_pagination(self):
+        """Test that large content shows pagination controls."""
+        from claude_code_log.tui import MarkdownViewerScreen
+        from textual.app import App
+
+        # Generate content larger than PAGE_SIZE_CHARS to trigger pagination
+        # Use line breaks so the algorithm can split properly
+        page_size = MarkdownViewerScreen.PAGE_SIZE_CHARS
+        line = "Content line with some text here.\n"
+        num_lines = int(page_size * 2.5 / len(line))
+        large_content = "# Large Session\n\n" + (line * num_lines)
+
+        screen = MarkdownViewerScreen(large_content, "Large Test")
+
+        # Screen should be paginated (test without UI for speed)
+        assert screen._is_paginated
+        assert len(screen._pages) >= 2
+
+        class TestApp(App):
+            def compose(self):
+                yield from []
+
+        app = TestApp()
+        async with app.run_test() as pilot:
+            app.push_screen(screen)
+            await pilot.pause(0.5)
+
+            # Should have pagination controls
+            controls = screen.query_one("#pagination-controls")
+            assert controls is not None
+
+    @pytest.mark.asyncio
+    async def test_pagination_bindings_exist(self):
+        """Test that pagination key bindings exist."""
+        from claude_code_log.tui import MarkdownViewerScreen
+
+        binding_keys = [
+            b.key if hasattr(b, "key") else b[0] for b in MarkdownViewerScreen.BINDINGS
+        ]
+        assert "n" in binding_keys, "Should have 'n' binding for next page"
+        assert "p" in binding_keys, "Should have 'p' binding for previous page"
+        assert "right" in binding_keys, (
+            "Should have 'right' arrow binding for next page"
+        )
+        assert "left" in binding_keys, "Should have 'left' arrow binding for prev page"
+
+    @pytest.mark.asyncio
+    async def test_next_page_action_updates_state(self):
+        """Test that action_next_page advances internal page state."""
+        from claude_code_log.tui import MarkdownViewerScreen
+
+        # Generate content larger than PAGE_SIZE_CHARS (creates 3+ pages)
+        # Use line breaks so the algorithm can split properly
+        page_size = MarkdownViewerScreen.PAGE_SIZE_CHARS
+        line = "Content line with some text here.\n"
+        num_lines = int(page_size * 2.5 / len(line))
+        large_content = "# Large Session\n\n" + (line * num_lines)
+
+        screen = MarkdownViewerScreen(large_content, "Pagination Test")
+
+        # Initial page should be 0
+        assert screen._current_page == 0
+        assert screen._is_paginated
+        assert len(screen._pages) >= 3, f"Expected 3+ pages, got {len(screen._pages)}"
+
+        # Call action directly (bypass UI)
+        screen.action_next_page()
+        assert screen._current_page == 1
+
+        screen.action_next_page()
+        assert screen._current_page == 2
+
+    @pytest.mark.asyncio
+    async def test_prev_page_action_updates_state(self):
+        """Test that action_prev_page goes to previous page."""
+        from claude_code_log.tui import MarkdownViewerScreen
+
+        # Generate content larger than PAGE_SIZE_CHARS (creates 3+ pages)
+        # Use line breaks so the algorithm can split properly
+        page_size = MarkdownViewerScreen.PAGE_SIZE_CHARS
+        line = "Content line with some text here.\n"
+        num_lines = int(page_size * 2.5 / len(line))
+        large_content = "# Large Session\n\n" + (line * num_lines)
+
+        screen = MarkdownViewerScreen(large_content, "Pagination Test")
+
+        # Verify we have enough pages
+        assert len(screen._pages) >= 3, f"Expected 3+ pages, got {len(screen._pages)}"
+
+        # Go forward first
+        screen.action_next_page()
+        screen.action_next_page()
+        assert screen._current_page == 2
+
+        # Now go back
+        screen.action_prev_page()
+        assert screen._current_page == 1
+
+        screen.action_prev_page()
+        assert screen._current_page == 0
+
+    @pytest.mark.asyncio
+    async def test_page_boundaries_respected(self):
+        """Test can't go past first or last page."""
+        from claude_code_log.tui import MarkdownViewerScreen
+
+        # Generate content larger than PAGE_SIZE_CHARS
+        # Use line breaks so the algorithm can split properly
+        page_size = MarkdownViewerScreen.PAGE_SIZE_CHARS
+        line = "Content line with some text here.\n"
+        num_lines = int(page_size * 2.5 / len(line))
+        large_content = "# Large Session\n\n" + (line * num_lines)
+
+        screen = MarkdownViewerScreen(large_content, "Pagination Test")
+
+        # On first page, prev should stay on first page
+        assert screen._current_page == 0
+        screen.action_prev_page()
+        assert screen._current_page == 0
+
+        # Go to last page
+        total_pages = len(screen._pages)
+        for _ in range(total_pages + 5):  # Call more than needed
+            screen.action_next_page()
+
+        # Should be on last page, not beyond
+        assert screen._current_page == total_pages - 1
+
+        # Try to go beyond last page
+        screen.action_next_page()
+        assert screen._current_page == total_pages - 1

From 6b0f1193e1cd760f50fc562aa34a95382ba5ea02 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Sun, 18 Jan 2026 23:21:02 +0000
Subject: [PATCH 15/23] CodeRabbit feedback

---
 claude_code_log/tui.py |  4 ++--
 test/test_tui.py       | 30 +++++++++++++++++++++---------
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 4536e00c..79c9e1b6 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -111,7 +111,7 @@ def populate_table(self) -> None:
         table.clear(columns=True)
 
         # Add columns
-        table.add_column("Project", width=self.size.width - 13)
+        table.add_column("Project", width=max(20, self.size.width - 13))
         table.add_column("Sessions", width=10)
 
         # Add rows
@@ -1515,7 +1515,7 @@ def format_timestamp(
             elif short_format:
                 return dt.strftime("%m-%d %H:%M")
             else:
-                return dt.strftime("%m-%d %H:%M")
+                return dt.strftime("%Y-%m-%d %H:%M")
         except (ValueError, AttributeError):
             return "Unknown"
 
diff --git a/test/test_tui.py b/test/test_tui.py
index 0bb26dcc..dc45ba24 100644
--- a/test/test_tui.py
+++ b/test/test_tui.py
@@ -560,9 +560,15 @@ def test_format_timestamp(self, temp_project_dir):
         """Test timestamp formatting."""
         app = SessionBrowser(temp_project_dir)
 
-        # Test valid timestamp
+        # Test valid timestamp (default long format includes year)
         formatted = app.format_timestamp("2025-01-01T10:00:00Z")
-        assert formatted == "01-01 10:00"
+        assert formatted == "2025-01-01 10:00"
+
+        # Test short format (no year)
+        formatted_short = app.format_timestamp(
+            "2025-01-01T10:00:00Z", short_format=True
+        )
+        assert formatted_short == "01-01 10:00"
 
         # Test date only
         formatted_date = app.format_timestamp("2025-01-01T10:00:00Z", date_only=True)
@@ -1691,11 +1697,15 @@ async def test_enter_key_selects_project(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             project1 = Path(temp_dir) / "project1"
             project1.mkdir()
-            (project1 / "session-1.jsonl").write_text('{"type":"user"}\n')
+            (project1 / "session-1.jsonl").write_text(
+                '{"type":"user"}\n', encoding="utf-8"
+            )
 
             project2 = Path(temp_dir) / "project2"
             project2.mkdir()
-            (project2 / "session-2.jsonl").write_text('{"type":"user"}\n')
+            (project2 / "session-2.jsonl").write_text(
+                '{"type":"user"}\n', encoding="utf-8"
+            )
 
             app = ProjectSelector(
                 projects=[project1, project2],
@@ -1738,8 +1748,8 @@ async def test_archive_project_action(self):
             project_path.mkdir()
             jsonl1 = project_path / "session-1.jsonl"
             jsonl2 = project_path / "session-2.jsonl"
-            jsonl1.write_text('{"type":"user"}\n')
-            jsonl2.write_text('{"type":"user"}\n')
+            jsonl1.write_text('{"type":"user"}\n', encoding="utf-8")
+            jsonl2.write_text('{"type":"user"}\n', encoding="utf-8")
 
             app = ProjectSelector(
                 projects=[project_path],
@@ -1800,7 +1810,7 @@ async def test_delete_project_cache_only(self):
             project_path = Path(temp_dir) / "project1"
             project_path.mkdir()
             jsonl = project_path / "session-1.jsonl"
-            jsonl.write_text('{"type":"user"}\n')
+            jsonl.write_text('{"type":"user"}\n', encoding="utf-8")
 
             app = ProjectSelector(
                 projects=[project_path],
@@ -1831,7 +1841,7 @@ async def test_delete_project_both(self):
             project_path = Path(temp_dir) / "project1"
             project_path.mkdir()
             jsonl = project_path / "session-1.jsonl"
-            jsonl.write_text('{"type":"user"}\n')
+            jsonl.write_text('{"type":"user"}\n', encoding="utf-8")
 
             app = ProjectSelector(
                 projects=[project_path],
@@ -1916,7 +1926,9 @@ async def test_restore_project_not_archived_shows_warning(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             project_path = Path(temp_dir) / "project1"
             project_path.mkdir()
-            (project_path / "session-1.jsonl").write_text('{"type":"user"}\n')
+            (project_path / "session-1.jsonl").write_text(
+                '{"type":"user"}\n', encoding="utf-8"
+            )
 
             app = ProjectSelector(
                 projects=[project_path],

From 2d071c69b4b9783b56c199398f70ae3e8fccc369 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Mon, 19 Jan 2026 00:28:42 +0000
Subject: [PATCH 16/23] Last little fixes

---
 claude_code_log/cache.py     |  40 +++++++++-
 claude_code_log/converter.py |  12 ++-
 test/test_cache.py           | 143 +++++++++++++++++++++++++++++++++++
 3 files changed, 193 insertions(+), 2 deletions(-)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index ebc4363a..924121cf 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -4,6 +4,7 @@
 import json
 import logging
 import os
+import re
 import sqlite3
 import zlib
 from contextlib import contextmanager
@@ -294,13 +295,50 @@ def _update_last_updated(self, conn: sqlite3.Connection) -> None:
             (datetime.now().isoformat(), self._project_id),
         )
 
+    def _normalize_timestamp(self, timestamp: Optional[str]) -> Optional[str]:
+        """Normalize timestamp to consistent format for reliable string comparison.
+
+        Converts various ISO 8601 formats to a canonical form:
+        - Strips fractional seconds (e.g., '.875368')
+        - Normalizes timezone to 'Z' suffix
+
+        This ensures lexicographic string comparison works correctly in SQL queries.
+        Without normalization, '2023-01-01T10:00:00.5Z' < '2023-01-01T10:00:00Z'
+        because '.' < 'Z' in ASCII, even though the first is 500ms later.
+
+        Args:
+            timestamp: ISO 8601 timestamp string, or None
+
+        Returns:
+            Normalized timestamp in 'YYYY-MM-DDTHH:MM:SSZ' format, or None
+        """
+        if timestamp is None:
+            return None
+
+        # Pattern matches: YYYY-MM-DDTHH:MM:SS followed by optional fractional seconds
+        # and timezone (Z or +HH:MM or +HH or +HHMM)
+        match = re.match(
+            r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})"  # Base datetime
+            r"(?:\.\d+)?"  # Optional fractional seconds (discard)
+            r"(?:Z|[+-]\d{2}:?\d{0,2})?$",  # Optional timezone
+            timestamp,
+        )
+
+        if match:
+            # Return just the base datetime with Z suffix
+            return match.group(1) + "Z"
+
+        # If pattern doesn't match, return original (shouldn't happen with valid data)
+        return timestamp
+
     def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, Any]:
         """Convert TranscriptEntry to dict for SQLite insertion."""
+        raw_timestamp = getattr(entry, "timestamp", None)
         base: Dict[str, Any] = {
             "project_id": self._project_id,
             "file_id": file_id,
             "type": entry.type,
-            "timestamp": getattr(entry, "timestamp", None),
+            "timestamp": self._normalize_timestamp(raw_timestamp),
             "session_id": getattr(entry, "sessionId", None),
             "_uuid": getattr(entry, "uuid", None),
             "_parent_uuid": getattr(entry, "parentUuid", None),
diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index 464163b5..b22aa7bf 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -1027,7 +1027,17 @@ def convert_jsonl_to(
         assert cache_manager is not None  # Ensured by use_pagination condition
         # Use cached session data if available, otherwise build from messages
         if cached_data is not None:
-            session_data = cached_data.sessions
+            warmup_session_ids = get_warmup_session_ids(messages)
+            current_session_ids: set[str] = set()
+            for message in messages:
+                session_id = getattr(message, "sessionId", "")
+                if session_id and session_id not in warmup_session_ids:
+                    current_session_ids.add(session_id)
+            session_data = {
+                session_id: session_cache
+                for session_id, session_cache in cached_data.sessions.items()
+                if session_id in current_session_ids
+            }
         else:
             session_data = _build_session_data_from_messages(messages)
         output_path = _generate_paginated_html(
diff --git a/test/test_cache.py b/test/test_cache.py
index ee843ba6..71a57b34 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -327,6 +327,149 @@ def test_filtered_loading_with_z_suffix_boundary(
         )
         assert "End of day message" in str(user_messages[0].message.content)
 
+    def test_filtered_loading_with_mixed_timestamp_formats(
+        self, cache_manager, temp_project_dir
+    ):
+        """Test filtering with mixed timestamp formats (with/without fractional seconds).
+
+        This tests the bug where timestamps like '2023-01-01T10:00:00.875368Z'
+        were incorrectly compared against filter bounds like '2023-01-01T10:00:00Z'.
+        String comparison fails because '.' < 'Z' alphabetically, causing the
+        timestamp with microseconds to be incorrectly excluded even though it's
+        actually 875ms AFTER the filter bound.
+        """
+        entries = [
+            UserTranscriptEntry(
+                parentUuid=None,
+                isSidechain=False,
+                userType="user",
+                cwd="/test",
+                sessionId="session1",
+                version="1.0.0",
+                uuid="user1",
+                timestamp="2023-01-01T10:00:00Z",  # No fractional seconds
+                type="user",
+                message=UserMessageModel(
+                    role="user",
+                    content=[
+                        TextContent(type="text", text="Message without microseconds")
+                    ],
+                ),
+            ),
+            UserTranscriptEntry(
+                parentUuid=None,
+                isSidechain=False,
+                userType="user",
+                cwd="/test",
+                sessionId="session1",
+                version="1.0.0",
+                uuid="user2",
+                timestamp="2023-01-01T10:00:00.875368Z",  # With microseconds - same second
+                type="user",
+                message=UserMessageModel(
+                    role="user",
+                    content=[
+                        TextContent(type="text", text="Message with microseconds")
+                    ],
+                ),
+            ),
+            UserTranscriptEntry(
+                parentUuid=None,
+                isSidechain=False,
+                userType="user",
+                cwd="/test",
+                sessionId="session1",
+                version="1.0.0",
+                uuid="user3",
+                timestamp="2023-01-01T10:00:01.123456Z",  # Next second with microseconds
+                type="user",
+                message=UserMessageModel(
+                    role="user",
+                    content=[TextContent(type="text", text="Message next second")],
+                ),
+            ),
+        ]
+
+        jsonl_path = temp_project_dir / "test.jsonl"
+        jsonl_path.write_text("dummy content", encoding="utf-8")
+
+        cache_manager.save_cached_entries(jsonl_path, entries)
+
+        # Filter with from_date at exactly 10:00:00 - should include ALL messages
+        # The bug would cause the microsecond messages to be excluded because
+        # '2023-01-01T10:00:00.875368Z' < '2023-01-01T10:00:00Z' in string comparison
+        filtered = cache_manager.load_cached_entries_filtered(
+            jsonl_path, "2023-01-01 10:00:00", "2023-01-01 10:00:01"
+        )
+
+        assert filtered is not None
+        user_messages = [entry for entry in filtered if entry.type == "user"]
+
+        # All 3 messages should be included
+        assert len(user_messages) == 3, (
+            f"Expected 3 messages, got {len(user_messages)}. "
+            "Messages with fractional seconds may have been incorrectly excluded "
+            "due to string comparison where '.' < 'Z'."
+        )
+
+    def test_timestamp_ordering_with_mixed_formats(
+        self, cache_manager, temp_project_dir
+    ):
+        """Test that timestamps are correctly ordered regardless of format.
+
+        Without normalization, ORDER BY timestamp would sort:
+        - '2023-01-01T10:00:00.5Z' BEFORE '2023-01-01T10:00:00Z'
+        because '.' < 'Z' in ASCII, even though .5 seconds is AFTER 0 seconds.
+        """
+        entries = [
+            UserTranscriptEntry(
+                parentUuid=None,
+                isSidechain=False,
+                userType="user",
+                cwd="/test",
+                sessionId="session1",
+                version="1.0.0",
+                uuid="user1",
+                timestamp="2023-01-01T10:00:00.500000Z",  # 500ms into the second
+                type="user",
+                message=UserMessageModel(
+                    role="user",
+                    content=[TextContent(type="text", text="Second message (500ms)")],
+                ),
+            ),
+            UserTranscriptEntry(
+                parentUuid=None,
+                isSidechain=False,
+                userType="user",
+                cwd="/test",
+                sessionId="session1",
+                version="1.0.0",
+                uuid="user2",
+                timestamp="2023-01-01T10:00:00Z",  # Start of the second
+                type="user",
+                message=UserMessageModel(
+                    role="user",
+                    content=[TextContent(type="text", text="First message (0ms)")],
+                ),
+            ),
+        ]
+
+        jsonl_path = temp_project_dir / "test.jsonl"
+        jsonl_path.write_text("dummy content", encoding="utf-8")
+
+        cache_manager.save_cached_entries(jsonl_path, entries)
+
+        # Load all entries - they should be in timestamp order
+        loaded = cache_manager.load_cached_entries(jsonl_path)
+
+        assert loaded is not None
+        user_messages = [entry for entry in loaded if entry.type == "user"]
+
+        # With normalization to second precision, both messages have the same
+        # normalized timestamp, so order may vary. The key thing is that the
+        # filtering works correctly - ordering within the same second is less critical.
+        assert len(user_messages) == 2
+
     def test_clear_cache(self, cache_manager, temp_project_dir, sample_entries):
         """Test cache clearing functionality."""
         jsonl_path = temp_project_dir / "test.jsonl"

From 99312262528cc62529a333c1bfa4f046df04ac70 Mon Sep 17 00:00:00 2001
From: Christian Boos <christian.boos@bct-technology.com>
Date: Mon, 19 Jan 2026 23:54:41 +0100
Subject: [PATCH 17/23] Fix 'h' action with relative --projects-dir path

Resolve project_path to absolute at SessionBrowser init so file:// URLs
work correctly in the browser regardless of how the path was specified.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 claude_code_log/tui.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 79c9e1b6..8e9cb718 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -1212,9 +1212,9 @@ def __init__(self, project_path: Path, is_archived: bool = False):
         """Initialize the session browser with a project path."""
         super().__init__()
         self.theme = "gruvbox"
-        self.project_path = project_path
+        self.project_path = project_path.resolve()
         self.is_archived_project = is_archived
-        self.cache_manager = CacheManager(project_path, get_library_version())
+        self.cache_manager = CacheManager(self.project_path, get_library_version())
         self.sessions = {}
         self.archived_sessions = {}
 

From 6878843098d96799d33302cc8528c6639e32df29 Mon Sep 17 00:00:00 2001
From: Christian Boos <christian.boos@bct-technology.com>
Date: Tue, 20 Jan 2026 23:09:08 +0100
Subject: [PATCH 18/23] Fix Markdown viewer focus/scroll behavior with TOC

- Preserve scroll position when toggling TOC visibility
- Tab/Shift+Tab switch focus between TOC and content without scrolling
- Hide TOC: focus moves to content, position preserved
- Show TOC: focus moves to outline tree
- Use on_key handler to intercept Tab before child widgets consume it
- Focus Tree widget inside TOC and document inside viewer (not containers)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 claude_code_log/tui.py | 63 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 3 deletions(-)

diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 8e9cb718..8c0387f5 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -554,6 +554,8 @@ class MarkdownViewerScreen(ModalScreen[None]):
         Binding("escape", "dismiss", "Close", show=True),
         Binding("q", "dismiss", "Close", show=False),
         Binding("t", "toggle_toc", "Toggle ToC"),
+        Binding("tab", "switch_focus", "Switch focus", show=False, priority=True),
+        Binding("shift+tab", "switch_focus", "Switch focus", show=False, priority=True),
         Binding("n", "next_page", "Next page"),
         Binding("right", "next_page", "Next page", show=False),
         Binding("p", "prev_page", "Prev page"),
@@ -698,10 +700,65 @@ def _clean_toc_labels(self, node: Any) -> None:
     async def action_dismiss(self, result: None = None) -> None:
         self.dismiss(result)
 
+    def on_key(self, event: Any) -> None:
+        """Intercept Tab keys to handle focus switching without scroll."""
+        if event.key in ("tab", "shift+tab"):
+            event.prevent_default()
+            event.stop()
+            self.action_switch_focus()
+
+    def _focus_viewer_content(self, viewer: SafeMarkdownViewer) -> None:
+        """Focus the viewer's document content without scrolling."""
+        # MarkdownViewer is a container; focus its document widget
+        try:
+            viewer.document.focus(scroll_visible=False)
+        except Exception:
+            viewer.focus(scroll_visible=False)
+
     def action_toggle_toc(self) -> None:
-        """Toggle table of contents visibility."""
-        viewer = self.query_one("#md-viewer", MarkdownViewer)
-        viewer.show_table_of_contents = not viewer.show_table_of_contents
+        """Toggle table of contents visibility, preserving scroll position."""
+        viewer = self.query_one("#md-viewer", SafeMarkdownViewer)
+        scroll_y = viewer.scroll_y
+        will_show_toc = not viewer.show_table_of_contents
+        viewer.show_table_of_contents = will_show_toc
+
+        def restore_and_focus() -> None:
+            viewer.scroll_to(y=scroll_y, animate=False)
+            if will_show_toc:
+                # Focus the Tree inside TOC when showing
+                try:
+                    toc = viewer.table_of_contents
+                    tree = cast("Tree[Any]", toc.query_one(Tree))
+                    tree.focus(scroll_visible=False)
+                except Exception:
+                    pass
+            else:
+                # Focus the document content when hiding TOC
+                self._focus_viewer_content(viewer)
+
+        self.call_later(restore_and_focus)
+
+    def action_switch_focus(self) -> None:
+        """Switch focus between TOC and content without scrolling."""
+        viewer = self.query_one("#md-viewer", SafeMarkdownViewer)
+        if not viewer.show_table_of_contents:
+            # TOC hidden, just focus the document
+            self._focus_viewer_content(viewer)
+            return
+
+        try:
+            toc = viewer.table_of_contents
+            # Get the Tree widget inside the TOC
+            tree = cast("Tree[Any]", toc.query_one(Tree))
+            if tree.has_focus:
+                # Currently in TOC tree, switch to document
+                self._focus_viewer_content(viewer)
+            else:
+                # Currently in document, switch to TOC tree
+                tree.focus(scroll_visible=False)
+        except Exception as e:
+            self.notify(f"Focus switch error: {e}", severity="warning")
+            self._focus_viewer_content(viewer)
 
     def action_next_page(self) -> None:
         """Navigate to next page (if paginated)."""

From 2aa677406941620e2bc79d37b5374f54f84a6878 Mon Sep 17 00:00:00 2001
From: Christian Boos <christian.boos@bct-technology.com>
Date: Tue, 20 Jan 2026 23:26:54 +0100
Subject: [PATCH 19/23] Fix test_init assertion for Windows short paths

Compare resolved paths since SessionBrowser.project_path is now resolved.
Fixes Windows CI where temp paths use 8.3 short names (RUNNER~1).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/test_tui.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_tui.py b/test/test_tui.py
index dc45ba24..8e78f6cb 100644
--- a/test/test_tui.py
+++ b/test/test_tui.py
@@ -115,7 +115,8 @@ class TestSessionBrowser:
     def test_init(self, temp_project_dir):
         """Test SessionBrowser initialization."""
         app = SessionBrowser(temp_project_dir)
-        assert app.project_path == temp_project_dir
+        # SessionBrowser resolves path, so compare resolved paths
+        assert app.project_path == temp_project_dir.resolve()
         assert isinstance(app.cache_manager, CacheManager)
         assert app.sessions == {}
         assert app.selected_session_id is None

From afb2f8090a378743e8c3bd8d80eb424ca1de1a4c Mon Sep 17 00:00:00 2001
From: Christian Boos <christian.boos@bct-technology.com>
Date: Tue, 20 Jan 2026 23:57:36 +0100
Subject: [PATCH 20/23] Fix Markdown viewer maximize and focus behavior

- Override action_maximize to maximize entire MarkdownViewer, not children
- Set ALLOW_MAXIMIZE=True on SafeMarkdownViewer
- Make document focusable for proper keyboard navigation
- Fix test_init for Windows short path resolution

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 claude_code_log/tui.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 8c0387f5..1eb1e233 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -472,6 +472,14 @@ class SafeMarkdownViewer(MarkdownViewer):
     while still allowing anchor navigation for ToC.
     """
 
+    # Allow maximizing the viewer (screen will redirect children to this)
+    ALLOW_MAXIMIZE = True
+
+    def on_mount(self) -> None:
+        """Configure document for proper keyboard navigation."""
+        # Enable focus on the document so keys work after focus changes
+        self.document.can_focus = True
+
     async def go(self, location: str | PurePath) -> None:
         """Navigate to a new location - intercept non-anchor links.
 
@@ -700,6 +708,14 @@ def _clean_toc_labels(self, node: Any) -> None:
     async def action_dismiss(self, result: None = None) -> None:
         self.dismiss(result)
 
+    def action_maximize(self) -> None:
+        """Maximize the MarkdownViewer (not individual children)."""
+        try:
+            viewer = self.query_one("#md-viewer", SafeMarkdownViewer)
+            self.maximize(viewer)
+        except Exception:
+            pass
+
     def on_key(self, event: Any) -> None:
         """Intercept Tab keys to handle focus switching without scroll."""
         if event.key in ("tab", "shift+tab"):

From ed0dc10611b19b68eac3265efa6319234ccc371d Mon Sep 17 00:00:00 2001
From: Christian Boos <christian.boos@bct-technology.com>
Date: Wed, 21 Jan 2026 00:57:42 +0100
Subject: [PATCH 21/23] Fix test_export_action_with_selection for Windows short
 paths

Use resolved path when checking browser call since SessionBrowser
resolves project_path internally.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/test_tui.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_tui.py b/test/test_tui.py
index 8e78f6cb..6e37f2b8 100644
--- a/test/test_tui.py
+++ b/test/test_tui.py
@@ -410,7 +410,8 @@ async def test_export_action_with_selection(self, temp_project_dir):
                 app.action_export_selected()
 
                 # Check that browser was opened with the session HTML file
-                expected_file = temp_project_dir / "session-session-123.html"
+                # Use resolved path since SessionBrowser resolves project_path
+                expected_file = temp_project_dir.resolve() / "session-session-123.html"
                 mock_browser.assert_called_once_with(f"file://{expected_file}")
 
     @pytest.mark.asyncio

From 449bae40e6c5047088eae9b03c310305d50b3e69 Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Mon, 19 Jan 2026 14:37:42 +0000
Subject: [PATCH 22/23] Fix confusing get_page_size_config method

---
 claude_code_log/cache.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py
index 924121cf..67109652 100644
--- a/claude_code_log/cache.py
+++ b/claude_code_log/cache.py
@@ -1123,7 +1123,10 @@ def load_session_entries(self, session_id: str) -> List[TranscriptEntry]:
     # ========== Page Cache Methods (Pagination) ==========
 
     def get_page_size_config(self) -> Optional[int]:
-        """Get the configured page size from the most recent page, if any."""
+        """Get the configured page size, if any pages exist.
+
+        All pages in a project share the same page_size_config value.
+        """
         if self._project_id is None:
             return None
 
@@ -1131,7 +1134,6 @@ def get_page_size_config(self) -> Optional[int]:
             row = conn.execute(
                 """SELECT page_size_config FROM html_pages
                    WHERE project_id = ?
-                   ORDER BY page_number ASC
                    LIMIT 1""",
                 (self._project_id,),
             ).fetchone()

From 2b04a14dc2e0b870b1f4c5493f19eafeccbe545d Mon Sep 17 00:00:00 2001
From: Daniel Demmel <daniel.demmel@twist.eco>
Date: Wed, 21 Jan 2026 23:36:23 +0000
Subject: [PATCH 23/23] Ignore progress updates + prevent massive lines
 overwhelming the terminal buffer

---
 claude_code_log/converter.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py
index b22aa7bf..d05791a6 100644
--- a/claude_code_log/converter.py
+++ b/claude_code_log/converter.py
@@ -212,13 +212,15 @@ def load_transcript(
                         entry_type
                         in [
                             "file-history-snapshot",  # Internal Claude Code file backup metadata
+                            "progress",  # Real-time progress updates (hook_progress, bash_progress)
                         ]
                     ):
                         # Silently skip internal message types we don't render
                         pass
                     else:
+                        display_line = line[:1000] + "..." if len(line) > 1000 else line
                         print(
-                            f"Line {line_no} of {jsonl_path} is not a recognised message type: {line}"
+                            f"Line {line_no} of {jsonl_path} is not a recognised message type: {display_line}"
                         )
                 except json.JSONDecodeError as e:
                     print(