From 3a411a92a03e31962dd273b102d14d2125b255e9 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sat, 3 Jan 2026 21:39:45 +0000 Subject: [PATCH 01/23] SQLite cache with incremental HTML regeneration and pagination - Migrate from JSON cache to SQLite for better performance and integrity - Add database migrations system for schema versioning - Implement incremental HTML regeneration (only rebuild changed sessions) - Add pagination support for large projects - Improve working directories handling - Use explicit UTF-8 encoding for Windows compatibility --- claude_code_log/cache.py | 1340 +++++++++++++---- claude_code_log/cli.py | 51 +- claude_code_log/converter.py | 709 ++++++++- .../templates/components/page_nav_styles.css | 74 + .../html/templates/transcript.html | 27 + .../migrations/001_initial_schema.sql | 114 ++ claude_code_log/migrations/002_html_cache.sql | 18 + .../migrations/003_html_pagination.sql | 39 + claude_code_log/migrations/__init__.py | 5 + claude_code_log/migrations/runner.py | 163 ++ claude_code_log/renderer.py | 15 + claude_code_log/tui.py | 6 +- claude_code_log/utils.py | 12 +- test/__snapshots__/test_snapshot_html.ambr | 308 +++- test/test_cache.py | 116 +- test/test_cache_integration.py | 109 +- test/test_cache_sqlite_integrity.py | 908 +++++++++++ test/test_html_regeneration.py | 194 ++- test/test_integration_realistic.py | 96 +- test/test_pagination.py | 550 +++++++ test/test_performance.py | 2 +- test/test_project_display_name.py | 33 +- test/test_project_matching.py | 33 +- test/test_sidechain_agents.py | 51 +- test/test_tui.py | 4 +- 25 files changed, 4353 insertions(+), 624 deletions(-) create mode 100644 claude_code_log/html/templates/components/page_nav_styles.css create mode 100644 claude_code_log/migrations/001_initial_schema.sql create mode 100644 claude_code_log/migrations/002_html_cache.sql create mode 100644 claude_code_log/migrations/003_html_pagination.sql create mode 100644 claude_code_log/migrations/__init__.py create mode 100644 claude_code_log/migrations/runner.py create mode 100644 test/test_cache_sqlite_integrity.py create mode 100644 test/test_pagination.py diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 3f5d43b7..81522d80 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -1,14 +1,29 @@ #!/usr/bin/env python3 -"""Cache management for Claude Code Log to improve performance.""" +"""SQLite-based cache management for Claude Code Log.""" import json -from pathlib import Path -from typing import Any, Optional, cast +import sqlite3 +from contextlib import contextmanager from datetime import datetime -from pydantic import BaseModel +from pathlib import Path +from typing import Any, Dict, Generator, List, Optional + from packaging import version +from pydantic import BaseModel + +from .migrations.runner import run_migrations +from .models import ( + AssistantTranscriptEntry, + QueueOperationTranscriptEntry, + SummaryTranscriptEntry, + SystemTranscriptEntry, + TranscriptEntry, + UserTranscriptEntry, + parse_transcript_entry, +) -from .models import TranscriptEntry + +# ========== Data Models ========== class CachedFileInfo(BaseModel): @@ -37,6 +52,38 @@ class SessionCacheData(BaseModel): total_cache_read_tokens: int = 0 +class HtmlCacheEntry(BaseModel): + """Information about a generated HTML file.""" + + html_path: str # e.g., "session-abc123.html" or "combined_transcripts.html" + generated_at: str # ISO timestamp when HTML was generated + source_session_id: Optional[str] = ( + None # session_id for individual files, None for combined + ) + message_count: int = 0 # for sanity checking + library_version: str # which version generated it + + +class PageCacheData(BaseModel): + """Information about a paginated combined transcript page.""" + + page_number: int + html_path: str # e.g., "combined_transcripts.html" or "combined_transcripts_2.html" + page_size_config: int # the --page-size value used + message_count: int # total messages on this page + session_ids: List[str] # sessions on this page, in order + first_session_id: str + last_session_id: str + first_timestamp: Optional[str] = None + last_timestamp: Optional[str] = None + total_input_tokens: int = 0 + total_output_tokens: int = 0 + total_cache_creation_tokens: int = 0 + total_cache_read_tokens: int = 0 + generated_at: str # ISO timestamp when page was generated + library_version: str + + class ProjectCache(BaseModel): """Project-level cache index structure for index.json.""" @@ -66,11 +113,63 @@ class ProjectCache(BaseModel): latest_timestamp: str = "" +# ========== Helper Functions ========== + + +def get_library_version() -> str: + """Get the current library version from package metadata or pyproject.toml.""" + # First try to get version from installed package metadata + try: + from importlib.metadata import version as get_version + + return get_version("claude-code-log") + except Exception: + # Package not installed or other error, continue to file-based detection + pass + + # Second approach: Use importlib.resources for more robust package location detection + try: + from importlib import resources + import toml + + # Get the package directory and navigate to parent for pyproject.toml + package_files = resources.files("claude_code_log") + # Convert to Path to access parent reliably + package_root = Path(str(package_files)).parent + pyproject_path = package_root / "pyproject.toml" + + if pyproject_path.exists(): + with open(pyproject_path, "r", encoding="utf-8") as f: + pyproject_data = toml.load(f) + return pyproject_data.get("project", {}).get("version", "unknown") + except Exception: + pass + + # Final fallback: Try to read from pyproject.toml using file-relative path + try: + import toml + + project_root = Path(__file__).parent.parent + pyproject_path = project_root / "pyproject.toml" + + if pyproject_path.exists(): + with open(pyproject_path, "r", encoding="utf-8") as f: + pyproject_data = toml.load(f) + return pyproject_data.get("project", {}).get("version", "unknown") + except Exception: + pass + + return "unknown" + + +# ========== Cache Manager ========== + + class CacheManager: - """Manages cache operations for a project directory.""" + """SQLite-based cache manager for Claude Code Log.""" def __init__(self, project_path: Path, library_version: str): - """Initialize cache manager for a project. + """Initialise cache manager for a project. Args: project_path: Path to the project directory containing JSONL files @@ -78,263 +177,363 @@ def __init__(self, project_path: Path, library_version: str): """ self.project_path = project_path self.library_version = library_version - self.cache_dir = project_path / "cache" - self.index_file = self.cache_dir / "index.json" - - # Ensure cache directory exists - self.cache_dir.mkdir(exist_ok=True) - - # Load existing cache index if available - self._project_cache: Optional[ProjectCache] = None - self._load_project_cache() - - def _load_project_cache(self) -> None: - """Load the project cache index from disk.""" - if self.index_file.exists(): - try: - with open(self.index_file, "r", encoding="utf-8") as f: - cache_data = json.load(f) - self._project_cache = ProjectCache.model_validate(cache_data) - - # Check if cache version is compatible with current library version - if not self._is_cache_version_compatible(self._project_cache.version): + + # Database at parent level (projects_dir/cache.db) + self.db_path = project_path.parent / "cache.db" + + # Initialise database and ensure project exists + self._init_database() + self._project_id: Optional[int] = None + self._ensure_project_exists() + + @contextmanager + def _get_connection(self) -> Generator[sqlite3.Connection, None, None]: + """Get a database connection with proper settings.""" + conn = sqlite3.connect(self.db_path, timeout=30.0) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + conn.execute("PRAGMA journal_mode = WAL") + try: + yield conn + finally: + conn.close() + + def _init_database(self) -> None: + """Create schema if needed using migration runner.""" + # Run any pending migrations + run_migrations(self.db_path) + + def _ensure_project_exists(self) -> None: + """Ensure project record exists and get its ID.""" + project_path_str = str(self.project_path) + + with self._get_connection() as conn: + row = conn.execute( + "SELECT id, version FROM projects WHERE project_path = ?", + (project_path_str,), + ).fetchone() + + if row: + self._project_id = row["id"] + cached_version = row["version"] + + # Check version compatibility + if not self._is_cache_version_compatible(cached_version): print( - f"Cache version incompatible: {self._project_cache.version} -> {self.library_version}, invalidating cache" + f"Cache version incompatible: {cached_version} -> {self.library_version}, invalidating cache" ) - self.clear_cache() - self._project_cache = None - except Exception as e: - print(f"Warning: Failed to load cache index, will rebuild: {e}") - self._project_cache = None - - # Initialize empty cache if none exists - if self._project_cache is None: - self._project_cache = ProjectCache( - version=self.library_version, - cache_created=datetime.now().isoformat(), - last_updated=datetime.now().isoformat(), - project_path=str(self.project_path), - cached_files={}, - sessions={}, - ) + self._clear_project_data(conn) + self._project_id = self._create_project(conn) + else: + self._project_id = self._create_project(conn) + + conn.commit() + + def _create_project(self, conn: sqlite3.Connection) -> int: + """Create a new project record.""" + now = datetime.now().isoformat() + cursor = conn.execute( + """ + INSERT INTO projects (project_path, version, cache_created, last_updated) + VALUES (?, ?, ?, ?) + """, + (str(self.project_path), self.library_version, now, now), + ) + return cursor.lastrowid or 0 - def _save_project_cache(self) -> None: - """Save the project cache index to disk.""" - if self._project_cache is None: + def _clear_project_data(self, conn: sqlite3.Connection) -> None: + """Clear all data for the current project.""" + if self._project_id is None: return - self._project_cache.last_updated = datetime.now().isoformat() + # Cascade delete will handle messages and files + conn.execute("DELETE FROM projects WHERE id = ?", (self._project_id,)) - with open(self.index_file, "w", encoding="utf-8") as f: - json.dump(self._project_cache.model_dump(), f, indent=2) + def _update_last_updated(self, conn: sqlite3.Connection) -> None: + """Update the last_updated timestamp for the project.""" + if self._project_id is None: + return - def _get_cache_file_path(self, jsonl_path: Path) -> Path: - """Get the cache file path for a given JSONL file.""" - return self.cache_dir / f"{jsonl_path.stem}.json" + conn.execute( + "UPDATE projects SET last_updated = ? WHERE id = ?", + (datetime.now().isoformat(), self._project_id), + ) + + def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, Any]: + """Convert TranscriptEntry to dict for SQLite insertion.""" + base: Dict[str, Any] = { + "project_id": self._project_id, + "file_id": file_id, + "type": entry.type, + "timestamp": getattr(entry, "timestamp", None), + "session_id": getattr(entry, "sessionId", None), + "_uuid": getattr(entry, "uuid", None), + "_parent_uuid": getattr(entry, "parentUuid", None), + "_is_sidechain": 1 if getattr(entry, "isSidechain", False) else 0, + "_user_type": getattr(entry, "userType", None), + "_cwd": getattr(entry, "cwd", None), + "_version": getattr(entry, "version", None), + "_is_meta": ( + 1 + if getattr(entry, "isMeta", None) is True + else (0 if getattr(entry, "isMeta", None) is False else None) + ), + "_agent_id": getattr(entry, "agentId", None), + "_request_id": None, + "input_tokens": None, + "output_tokens": None, + "cache_creation_tokens": None, + "cache_read_tokens": None, + "_leaf_uuid": None, + "_level": None, + "_operation": None, + "content": json.dumps(entry.model_dump()), + } + + # Extract flattened usage for assistant messages + if isinstance(entry, AssistantTranscriptEntry): + base["_request_id"] = entry.requestId + if entry.message and entry.message.usage: + usage = entry.message.usage + base["input_tokens"] = usage.input_tokens + base["output_tokens"] = usage.output_tokens + base["cache_creation_tokens"] = usage.cache_creation_input_tokens + base["cache_read_tokens"] = usage.cache_read_input_tokens + + # User entry specific + if isinstance(entry, UserTranscriptEntry): + if entry.agentId: + base["_agent_id"] = entry.agentId + + # Summary specific + if isinstance(entry, SummaryTranscriptEntry): + base["_leaf_uuid"] = entry.leafUuid + + # System specific + if isinstance(entry, SystemTranscriptEntry): + base["_level"] = entry.level + + # Queue-operation specific + if isinstance(entry, QueueOperationTranscriptEntry): + base["_operation"] = entry.operation + + return base + + def _deserialize_entry(self, row: sqlite3.Row) -> TranscriptEntry: + """Convert SQLite row back to TranscriptEntry.""" + content_dict = json.loads(row["content"]) + return parse_transcript_entry(content_dict) + + def _get_file_id(self, jsonl_path: Path) -> Optional[int]: + """Get the file ID for a JSONL file.""" + if self._project_id is None: + return None + + with self._get_connection() as conn: + row = conn.execute( + "SELECT id FROM cached_files WHERE project_id = ? AND file_name = ?", + (self._project_id, jsonl_path.name), + ).fetchone() + + return row["id"] if row else None def is_file_cached(self, jsonl_path: Path) -> bool: """Check if a JSONL file has a valid cache entry.""" - if self._project_cache is None: + if self._project_id is None: return False - file_key = jsonl_path.name - if file_key not in self._project_cache.cached_files: + if not jsonl_path.exists(): return False - # Check if source file exists and modification time matches - if not jsonl_path.exists(): + with self._get_connection() as conn: + row = conn.execute( + "SELECT source_mtime FROM cached_files WHERE project_id = ? AND file_name = ?", + (self._project_id, jsonl_path.name), + ).fetchone() + + if not row: return False - cached_info = self._project_cache.cached_files[file_key] source_mtime = jsonl_path.stat().st_mtime + cached_mtime = row["source_mtime"] - # Cache is valid if modification times match and cache file exists - cache_file = self._get_cache_file_path(jsonl_path) - return ( - abs(source_mtime - cached_info.source_mtime) < 1.0 and cache_file.exists() - ) + # Cache is valid if modification times match (within 1 second tolerance) + return abs(source_mtime - cached_mtime) < 1.0 - def load_cached_entries(self, jsonl_path: Path) -> Optional[list[TranscriptEntry]]: + def load_cached_entries(self, jsonl_path: Path) -> Optional[List[TranscriptEntry]]: """Load cached transcript entries for a JSONL file.""" if not self.is_file_cached(jsonl_path): return None - cache_file = self._get_cache_file_path(jsonl_path) - try: - with open(cache_file, "r", encoding="utf-8") as f: - cache_data = json.load(f) - - # Expect timestamp-keyed format - flatten all entries - entries_data: list[dict[str, Any]] = [] - for timestamp_entries in cache_data.values(): - if isinstance(timestamp_entries, list): - # Type cast to ensure Pyright knows this is list[dict[str, Any]] - entries_data.extend(cast(list[dict[str, Any]], timestamp_entries)) - - # Deserialize back to TranscriptEntry objects - from .factories import create_transcript_entry - - entries = [ - create_transcript_entry(entry_dict) for entry_dict in entries_data - ] - return entries - except Exception as e: - print(f"Warning: Failed to load cached entries from {cache_file}: {e}") + file_id = self._get_file_id(jsonl_path) + if file_id is None: return None + with self._get_connection() as conn: + rows = conn.execute( + "SELECT content FROM messages WHERE file_id = ? ORDER BY timestamp NULLS LAST", + (file_id,), + ).fetchall() + + return [self._deserialize_entry(row) for row in rows] + def load_cached_entries_filtered( self, jsonl_path: Path, from_date: Optional[str], to_date: Optional[str] - ) -> Optional[list[TranscriptEntry]]: - """Load cached entries with efficient timestamp-based filtering.""" + ) -> Optional[List[TranscriptEntry]]: + """Load cached entries with SQL-based timestamp filtering.""" if not self.is_file_cached(jsonl_path): return None - cache_file = self._get_cache_file_path(jsonl_path) - try: - with open(cache_file, "r", encoding="utf-8") as f: - cache_data = json.load(f) - - # If no date filtering needed, fall back to regular loading - if not from_date and not to_date: - return self.load_cached_entries(jsonl_path) - - # Parse date filters - from .parser import parse_timestamp - import dateparser - - from_dt = None - to_dt = None - - if from_date: - from_dt = dateparser.parse(from_date) - if from_dt and ( - from_date in ["today", "yesterday"] or "days ago" in from_date - ): - from_dt = from_dt.replace(hour=0, minute=0, second=0, microsecond=0) - - if to_date: - to_dt = dateparser.parse(to_date) - if to_dt: - if to_date in ["today", "yesterday"] or "days ago" in to_date: - to_dt = to_dt.replace( - hour=23, minute=59, second=59, microsecond=999999 - ) - else: - # For simple date strings like "2023-01-01", set to end of day - to_dt = to_dt.replace( - hour=23, minute=59, second=59, microsecond=999999 - ) - - # Filter entries by timestamp - filtered_entries_data: list[dict[str, Any]] = [] - - for timestamp_key, timestamp_entries in cache_data.items(): - if timestamp_key == "_no_timestamp": - # Always include entries without timestamps (like summaries) - if isinstance(timestamp_entries, list): - # Type cast to ensure Pyright knows this is list[dict[str, Any]] - filtered_entries_data.extend( - cast(list[dict[str, Any]], timestamp_entries) - ) - else: - # Check if timestamp falls within range - message_dt = parse_timestamp(timestamp_key) - if message_dt: - # Convert to naive datetime for comparison - if message_dt.tzinfo: - message_dt = message_dt.replace(tzinfo=None) - - # Apply date filtering - if from_dt and message_dt < from_dt: - continue - if to_dt and message_dt > to_dt: - continue - - if isinstance(timestamp_entries, list): - # Type cast to ensure Pyright knows this is list[dict[str, Any]] - filtered_entries_data.extend( - cast(list[dict[str, Any]], timestamp_entries) - ) - - # Deserialize filtered entries - from .factories import create_transcript_entry - - entries = [ - create_transcript_entry(entry_dict) - for entry_dict in filtered_entries_data - ] - return entries - except Exception as e: - print( - f"Warning: Failed to load filtered cached entries from {cache_file}: {e}" - ) + # If no date filtering needed, fall back to regular loading + if not from_date and not to_date: + return self.load_cached_entries(jsonl_path) + + file_id = self._get_file_id(jsonl_path) + if file_id is None: return None + # Parse dates + import dateparser + + from_dt = None + to_dt = None + + if from_date: + from_dt = dateparser.parse(from_date) + if from_dt and ( + from_date in ["today", "yesterday"] or "days ago" in from_date + ): + from_dt = from_dt.replace(hour=0, minute=0, second=0, microsecond=0) + + if to_date: + to_dt = dateparser.parse(to_date) + if to_dt: + if to_date in ["today", "yesterday"] or "days ago" in to_date: + to_dt = to_dt.replace( + hour=23, minute=59, second=59, microsecond=999999 + ) + else: + to_dt = to_dt.replace( + hour=23, minute=59, second=59, microsecond=999999 + ) + + # Build query with SQL-based filtering + sql = "SELECT content FROM messages WHERE file_id = ?" + params: List[Any] = [file_id] + + if from_dt: + # Include entries with NULL timestamp (like summaries) OR within date range + sql += " AND (timestamp IS NULL OR timestamp >= ?)" + params.append(from_dt.isoformat()) + + if to_dt: + sql += " AND (timestamp IS NULL OR timestamp <= ?)" + params.append(to_dt.isoformat()) + + sql += " ORDER BY timestamp NULLS LAST" + + with self._get_connection() as conn: + rows = conn.execute(sql, params).fetchall() + + return [self._deserialize_entry(row) for row in rows] + def save_cached_entries( - self, jsonl_path: Path, entries: list[TranscriptEntry] + self, jsonl_path: Path, entries: List[TranscriptEntry] ) -> None: - """Save parsed transcript entries to cache with timestamp-based structure.""" - cache_file = self._get_cache_file_path(jsonl_path) + """Save parsed transcript entries to cache.""" + if self._project_id is None: + return - try: - # Create timestamp-keyed cache structure for efficient date filtering - cache_data: dict[str, Any] = {} + source_mtime = jsonl_path.stat().st_mtime + cached_mtime = datetime.now().timestamp() + + with self._get_connection() as conn: + # Insert or update file record + conn.execute( + """ + INSERT OR REPLACE INTO cached_files + (project_id, file_name, file_path, source_mtime, cached_mtime, message_count) + VALUES (?, ?, ?, ?, ?, ?) + """, + ( + self._project_id, + jsonl_path.name, + str(jsonl_path), + source_mtime, + cached_mtime, + len(entries), + ), + ) + + # Get the file ID + row = conn.execute( + "SELECT id FROM cached_files WHERE project_id = ? AND file_name = ?", + (self._project_id, jsonl_path.name), + ).fetchone() + file_id = row["id"] + # Delete existing messages for this file + conn.execute("DELETE FROM messages WHERE file_id = ?", (file_id,)) + + # Insert all entries in a batch for entry in entries: - # Get timestamp - use empty string as fallback for entries without timestamps - timestamp = ( - getattr(entry, "timestamp", "") - if hasattr(entry, "timestamp") - else "" - ) - if not timestamp: - # Use a special key for entries without timestamps (like summaries) - timestamp = "_no_timestamp" - - # Store entry data under timestamp - if timestamp not in cache_data: - cache_data[timestamp] = [] - - cache_data[timestamp].append(entry.model_dump()) - - with open(cache_file, "w", encoding="utf-8") as f: - json.dump(cache_data, f, indent=2) - - # Update cache index - if self._project_cache is not None: - source_mtime = jsonl_path.stat().st_mtime - cached_mtime = cache_file.stat().st_mtime - - # Extract session IDs from entries - session_ids: list[str] = [] - for entry in entries: - if hasattr(entry, "sessionId"): - session_id = getattr(entry, "sessionId", "") - if session_id: - session_ids.append(session_id) - session_ids = list(set(session_ids)) # Remove duplicates - - self._project_cache.cached_files[jsonl_path.name] = CachedFileInfo( - file_path=str(jsonl_path), - source_mtime=source_mtime, - cached_mtime=cached_mtime, - message_count=len(entries), - session_ids=session_ids, + serialized = self._serialize_entry(entry, file_id) + conn.execute( + """ + INSERT INTO messages ( + project_id, file_id, type, timestamp, session_id, + _uuid, _parent_uuid, _is_sidechain, _user_type, _cwd, _version, + _is_meta, _agent_id, _request_id, + input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, + _leaf_uuid, _level, _operation, content + ) VALUES ( + :project_id, :file_id, :type, :timestamp, :session_id, + :_uuid, :_parent_uuid, :_is_sidechain, :_user_type, :_cwd, :_version, + :_is_meta, :_agent_id, :_request_id, + :input_tokens, :output_tokens, :cache_creation_tokens, :cache_read_tokens, + :_leaf_uuid, :_level, :_operation, :content + ) + """, + serialized, ) - self._save_project_cache() - except Exception as e: - print(f"Warning: Failed to save cached entries to {cache_file}: {e}") + self._update_last_updated(conn) + conn.commit() - def update_session_cache(self, session_data: dict[str, SessionCacheData]) -> None: + def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> None: """Update cached session information.""" - if self._project_cache is None: + if self._project_id is None: return - self._project_cache.sessions.update( - {session_id: data for session_id, data in session_data.items()} - ) - self._save_project_cache() + with self._get_connection() as conn: + for session_id, data in session_data.items(): + conn.execute( + """ + INSERT OR REPLACE INTO sessions ( + project_id, session_id, summary, first_timestamp, last_timestamp, + message_count, first_user_message, cwd, + total_input_tokens, total_output_tokens, + total_cache_creation_tokens, total_cache_read_tokens + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + self._project_id, + session_id, + data.summary, + data.first_timestamp, + data.last_timestamp, + data.message_count, + data.first_user_message, + data.cwd, + data.total_input_tokens, + data.total_output_tokens, + data.total_cache_creation_tokens, + data.total_cache_read_tokens, + ), + ) + + self._update_last_updated(conn) + conn.commit() def update_project_aggregates( self, @@ -347,165 +546,650 @@ def update_project_aggregates( latest_timestamp: str, ) -> None: """Update project-level aggregate information.""" - if self._project_cache is None: + if self._project_id is None: return - self._project_cache.total_message_count = total_message_count - self._project_cache.total_input_tokens = total_input_tokens - self._project_cache.total_output_tokens = total_output_tokens - self._project_cache.total_cache_creation_tokens = total_cache_creation_tokens - self._project_cache.total_cache_read_tokens = total_cache_read_tokens - self._project_cache.earliest_timestamp = earliest_timestamp - self._project_cache.latest_timestamp = latest_timestamp + with self._get_connection() as conn: + conn.execute( + """ + UPDATE projects SET + total_message_count = ?, + total_input_tokens = ?, + total_output_tokens = ?, + total_cache_creation_tokens = ?, + total_cache_read_tokens = ?, + earliest_timestamp = ?, + latest_timestamp = ?, + last_updated = ? + WHERE id = ? + """, + ( + total_message_count, + total_input_tokens, + total_output_tokens, + total_cache_creation_tokens, + total_cache_read_tokens, + earliest_timestamp, + latest_timestamp, + datetime.now().isoformat(), + self._project_id, + ), + ) + conn.commit() - self._save_project_cache() + def get_working_directories(self) -> List[str]: + """Get list of working directories associated with this project. - def update_working_directories(self, working_directories: list[str]) -> None: - """Update the list of working directories associated with this project.""" - if self._project_cache is None: - return + Queries distinct cwd values from sessions table. + """ + if self._project_id is None: + return [] - self._project_cache.working_directories = working_directories - self._save_project_cache() + with self._get_connection() as conn: + rows = conn.execute( + "SELECT DISTINCT cwd FROM sessions WHERE project_id = ? AND cwd IS NOT NULL", + (self._project_id,), + ).fetchall() - def get_modified_files(self, jsonl_files: list[Path]) -> list[Path]: - """Get list of JSONL files that need to be reprocessed.""" - modified_files: list[Path] = [] + return [row["cwd"] for row in rows] - for jsonl_file in jsonl_files: - if not self.is_file_cached(jsonl_file): - modified_files.append(jsonl_file) - - return modified_files + def get_modified_files(self, jsonl_files: List[Path]) -> List[Path]: + """Get list of JSONL files that need to be reprocessed.""" + return [ + jsonl_file + for jsonl_file in jsonl_files + if not self.is_file_cached(jsonl_file) + ] def get_cached_project_data(self) -> Optional[ProjectCache]: """Get the cached project data if available.""" - return self._project_cache + if self._project_id is None: + return None - def clear_cache(self) -> None: - """Clear all cache files and reset the project cache.""" - if self.cache_dir.exists(): - for cache_file in self.cache_dir.glob("*.json"): - if cache_file.name != "index.json": # Don't delete the index file here - try: - cache_file.unlink() - except Exception as e: - print(f"Warning: Failed to delete cache file {cache_file}: {e}") - - if self.index_file.exists(): - try: - self.index_file.unlink() - except Exception as e: - print(f"Warning: Failed to delete cache index {self.index_file}: {e}") - - # Reset the project cache - self._project_cache = ProjectCache( - version=self.library_version, - cache_created=datetime.now().isoformat(), - last_updated=datetime.now().isoformat(), - project_path=str(self.project_path), - cached_files={}, - sessions={}, + with self._get_connection() as conn: + # Get project data + project_row = conn.execute( + "SELECT * FROM projects WHERE id = ?", (self._project_id,) + ).fetchone() + + if not project_row: + return None + + # Get cached files + file_rows = conn.execute( + "SELECT * FROM cached_files WHERE project_id = ?", (self._project_id,) + ).fetchall() + + cached_files: Dict[str, CachedFileInfo] = {} + for row in file_rows: + # Get session IDs for this file from messages + session_rows = conn.execute( + "SELECT DISTINCT session_id FROM messages WHERE file_id = ? AND session_id IS NOT NULL", + (row["id"],), + ).fetchall() + session_ids = [r["session_id"] for r in session_rows] + + cached_files[row["file_name"]] = CachedFileInfo( + file_path=row["file_path"], + source_mtime=row["source_mtime"], + cached_mtime=row["cached_mtime"], + message_count=row["message_count"], + session_ids=session_ids, + ) + + # Get sessions + session_rows = conn.execute( + "SELECT * FROM sessions WHERE project_id = ?", (self._project_id,) + ).fetchall() + + sessions: Dict[str, SessionCacheData] = {} + for row in session_rows: + sessions[row["session_id"]] = SessionCacheData( + session_id=row["session_id"], + summary=row["summary"], + first_timestamp=row["first_timestamp"], + last_timestamp=row["last_timestamp"], + message_count=row["message_count"], + first_user_message=row["first_user_message"], + cwd=row["cwd"], + total_input_tokens=row["total_input_tokens"], + total_output_tokens=row["total_output_tokens"], + total_cache_creation_tokens=row["total_cache_creation_tokens"], + total_cache_read_tokens=row["total_cache_read_tokens"], + ) + + return ProjectCache( + version=project_row["version"], + cache_created=project_row["cache_created"], + last_updated=project_row["last_updated"], + project_path=project_row["project_path"], + cached_files=cached_files, + total_message_count=project_row["total_message_count"], + total_input_tokens=project_row["total_input_tokens"], + total_output_tokens=project_row["total_output_tokens"], + total_cache_creation_tokens=project_row["total_cache_creation_tokens"], + total_cache_read_tokens=project_row["total_cache_read_tokens"], + sessions=sessions, + working_directories=self.get_working_directories(), + earliest_timestamp=project_row["earliest_timestamp"], + latest_timestamp=project_row["latest_timestamp"], ) - def _is_cache_version_compatible(self, cache_version: str) -> bool: - """Check if a cache version is compatible with the current library version. + def clear_cache(self) -> None: + """Clear all cache data for this project.""" + if self._project_id is None: + return - This uses a compatibility matrix to determine if cache invalidation is needed. - Only breaking changes require cache invalidation, not every version bump. - """ + with self._get_connection() as conn: + self._clear_project_data(conn) + self._project_id = self._create_project(conn) + conn.commit() + + def _is_cache_version_compatible(self, cache_version: str) -> bool: + """Check if a cache version is compatible with the current library version.""" if cache_version == self.library_version: return True # Define compatibility rules - # Format: "cache_version": "minimum_library_version_required" - # If cache version is older than the minimum required, it needs invalidation breaking_changes: dict[str, str] = { - # 0.9.0 introduced _compact_ide_tags_for_preview() which transforms - # first_user_message to use emoji indicators instead of raw IDE tags - "0.8.0": "0.9.0", + # Example: "0.3.3": "0.3.4" means cache from 0.3.3 needs invalidation if lib is >= 0.3.4 } cache_ver = version.parse(cache_version) current_ver = version.parse(self.library_version) - # Check if cache version requires invalidation due to breaking changes for breaking_version_pattern, min_required in breaking_changes.items(): min_required_ver = version.parse(min_required) - # If current version is at or above the minimum required for this breaking change if current_ver >= min_required_ver: - # Check if cache version is affected by this breaking change if breaking_version_pattern.endswith(".x"): - # Pattern like "0.2.x" matches any 0.2.* version major_minor = breaking_version_pattern[:-2] if str(cache_ver).startswith(major_minor): return False else: - # Exact version or version comparison breaking_ver = version.parse(breaking_version_pattern) if cache_ver <= breaking_ver: return False - # If no breaking changes affect this cache version, it's compatible return True - def get_cache_stats(self) -> dict[str, Any]: + def get_cache_stats(self) -> Dict[str, Any]: """Get cache statistics for reporting.""" - if self._project_cache is None: + if self._project_id is None: + return {"cache_enabled": False} + + with self._get_connection() as conn: + project_row = conn.execute( + "SELECT * FROM projects WHERE id = ?", (self._project_id,) + ).fetchone() + + file_count = conn.execute( + "SELECT COUNT(*) as cnt FROM cached_files WHERE project_id = ?", + (self._project_id,), + ).fetchone() + + session_count = conn.execute( + "SELECT COUNT(*) as cnt FROM sessions WHERE project_id = ?", + (self._project_id,), + ).fetchone() + + if not project_row: return {"cache_enabled": False} return { "cache_enabled": True, - "cached_files_count": len(self._project_cache.cached_files), - "total_cached_messages": self._project_cache.total_message_count, - "total_sessions": len(self._project_cache.sessions), - "cache_created": self._project_cache.cache_created, - "last_updated": self._project_cache.last_updated, + "cached_files_count": file_count["cnt"] if file_count else 0, + "total_cached_messages": project_row["total_message_count"], + "total_sessions": session_count["cnt"] if session_count else 0, + "cache_created": project_row["cache_created"], + "last_updated": project_row["last_updated"], } + # ========== HTML Cache Methods ========== -def get_library_version() -> str: - """Get the current library version from package metadata or pyproject.toml.""" - # First try to get version from installed package metadata - try: - from importlib.metadata import version + def get_html_cache(self, html_path: str) -> Optional[HtmlCacheEntry]: + """Get HTML cache entry for a given path.""" + if self._project_id is None: + return None - return version("claude-code-log") - except Exception: - # Package not installed or other error, continue to file-based detection - pass + with self._get_connection() as conn: + row = conn.execute( + """SELECT html_path, generated_at, source_session_id, message_count, library_version + FROM html_cache + WHERE project_id = ? AND html_path = ?""", + (self._project_id, html_path), + ).fetchone() - # Second approach: Use importlib.resources for more robust package location detection - try: - from importlib import resources - import toml + if not row: + return None - # Get the package directory and navigate to parent for pyproject.toml - package_files = resources.files("claude_code_log") - # Convert to Path to access parent reliably - package_root = Path(str(package_files)).parent - pyproject_path = package_root / "pyproject.toml" + return HtmlCacheEntry( + html_path=row["html_path"], + generated_at=row["generated_at"], + source_session_id=row["source_session_id"], + message_count=row["message_count"] or 0, + library_version=row["library_version"], + ) - if pyproject_path.exists(): - with open(pyproject_path, "r", encoding="utf-8") as f: - pyproject_data = toml.load(f) - return pyproject_data.get("project", {}).get("version", "unknown") - except Exception: - pass + def update_html_cache( + self, + html_path: str, + session_id: Optional[str], + message_count: int, + ) -> None: + """Update or insert HTML cache entry.""" + if self._project_id is None: + return - # Final fallback: Try to read from pyproject.toml using file-relative path - try: - import toml + with self._get_connection() as conn: + conn.execute( + """INSERT INTO html_cache + (project_id, html_path, generated_at, source_session_id, message_count, library_version) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(project_id, html_path) + DO UPDATE SET + generated_at = excluded.generated_at, + source_session_id = excluded.source_session_id, + message_count = excluded.message_count, + library_version = excluded.library_version""", + ( + self._project_id, + html_path, + datetime.now().isoformat(), + session_id, + message_count, + self.library_version, + ), + ) + conn.commit() - project_root = Path(__file__).parent.parent - pyproject_path = project_root / "pyproject.toml" + def is_html_stale( + self, html_path: str, session_id: Optional[str] = None + ) -> tuple[bool, str]: + """Check if HTML file needs regeneration. - if pyproject_path.exists(): - with open(pyproject_path, "r", encoding="utf-8") as f: - pyproject_data = toml.load(f) - return pyproject_data.get("project", {}).get("version", "unknown") - except Exception: - pass + Args: + html_path: Path to HTML file (e.g., "session-abc123.html") + session_id: Session ID for individual session files, None for combined - return "unknown" + Returns: + Tuple of (is_stale: bool, reason: str) + """ + from .renderer import is_html_outdated + + if self._project_id is None: + return True, "no_cache" + + # Get existing HTML cache entry + html_cache = self.get_html_cache(html_path) + if html_cache is None: + return True, "not_cached" + + # Check library version in cache + if html_cache.library_version != self.library_version: + return True, "version_mismatch" + + # Check if file exists and has correct version + actual_file = self.project_path / html_path + if not actual_file.exists(): + return True, "file_missing" + if is_html_outdated(actual_file): + return True, "file_version_mismatch" + + with self._get_connection() as conn: + if session_id is not None: + # For individual session HTML: check if session message count changed + row = conn.execute( + """SELECT message_count FROM sessions + WHERE project_id = ? AND session_id = ?""", + (self._project_id, session_id), + ).fetchone() + + if not row: + return True, "session_not_found" + + # Compare message counts + if row["message_count"] != html_cache.message_count: + return True, "session_updated" + else: + # For combined transcript: check if total message count changed + # This is more reliable than timestamp comparison, which can + # trigger false positives when cache metadata is updated + row = conn.execute( + """SELECT total_message_count FROM projects + WHERE id = ?""", + (self._project_id,), + ).fetchone() + + if row and row["total_message_count"] != html_cache.message_count: + return True, "project_updated" + + return False, "up_to_date" + + def get_stale_sessions(self) -> List[tuple[str, str]]: + """Get list of sessions that need HTML regeneration. + + Returns: + List of (session_id, reason) tuples for sessions needing regeneration + """ + if self._project_id is None: + return [] + + stale_sessions: List[tuple[str, str]] = [] + + with self._get_connection() as conn: + # Get all sessions + session_rows = conn.execute( + """SELECT session_id, last_timestamp FROM sessions + WHERE project_id = ?""", + (self._project_id,), + ).fetchall() + + for row in session_rows: + session_id = row["session_id"] + html_path = f"session-{session_id}.html" + + is_stale, reason = self.is_html_stale(html_path, session_id) + if is_stale: + stale_sessions.append((session_id, reason)) + + return stale_sessions + + # ========== Page Cache Methods (Pagination) ========== + + def get_page_size_config(self) -> Optional[int]: + """Get the configured page size from the most recent page, if any.""" + if self._project_id is None: + return None + + with self._get_connection() as conn: + row = conn.execute( + """SELECT page_size_config FROM html_pages + WHERE project_id = ? + ORDER BY page_number ASC + LIMIT 1""", + (self._project_id,), + ).fetchone() + + return row["page_size_config"] if row else None + + def get_page_data(self, page_number: int) -> Optional[PageCacheData]: + """Get cache data for a specific page.""" + if self._project_id is None: + return None + + with self._get_connection() as conn: + # Get page info + page_row = conn.execute( + """SELECT * FROM html_pages + WHERE project_id = ? AND page_number = ?""", + (self._project_id, page_number), + ).fetchone() + + if not page_row: + return None + + # Get sessions for this page + session_rows = conn.execute( + """SELECT session_id FROM page_sessions + WHERE page_id = ? + ORDER BY session_order ASC""", + (page_row["id"],), + ).fetchall() + + session_ids = [row["session_id"] for row in session_rows] + + return PageCacheData( + page_number=page_row["page_number"], + html_path=page_row["html_path"], + page_size_config=page_row["page_size_config"], + message_count=page_row["message_count"], + session_ids=session_ids, + first_session_id=page_row["first_session_id"], + last_session_id=page_row["last_session_id"], + first_timestamp=page_row["first_timestamp"], + last_timestamp=page_row["last_timestamp"], + total_input_tokens=page_row["total_input_tokens"] or 0, + total_output_tokens=page_row["total_output_tokens"] or 0, + total_cache_creation_tokens=page_row["total_cache_creation_tokens"] or 0, + total_cache_read_tokens=page_row["total_cache_read_tokens"] or 0, + generated_at=page_row["generated_at"], + library_version=page_row["library_version"], + ) + + def get_all_pages(self) -> List[PageCacheData]: + """Get all cached pages for this project.""" + if self._project_id is None: + return [] + + pages: List[PageCacheData] = [] + with self._get_connection() as conn: + page_rows = conn.execute( + """SELECT * FROM html_pages + WHERE project_id = ? + ORDER BY page_number ASC""", + (self._project_id,), + ).fetchall() + + for page_row in page_rows: + session_rows = conn.execute( + """SELECT session_id FROM page_sessions + WHERE page_id = ? + ORDER BY session_order ASC""", + (page_row["id"],), + ).fetchall() + + session_ids = [row["session_id"] for row in session_rows] + + pages.append( + PageCacheData( + page_number=page_row["page_number"], + html_path=page_row["html_path"], + page_size_config=page_row["page_size_config"], + message_count=page_row["message_count"], + session_ids=session_ids, + first_session_id=page_row["first_session_id"], + last_session_id=page_row["last_session_id"], + first_timestamp=page_row["first_timestamp"], + last_timestamp=page_row["last_timestamp"], + total_input_tokens=page_row["total_input_tokens"] or 0, + total_output_tokens=page_row["total_output_tokens"] or 0, + total_cache_creation_tokens=page_row[ + "total_cache_creation_tokens" + ] + or 0, + total_cache_read_tokens=page_row["total_cache_read_tokens"] + or 0, + generated_at=page_row["generated_at"], + library_version=page_row["library_version"], + ) + ) + + return pages + + def update_page_cache( + self, + page_number: int, + html_path: str, + page_size_config: int, + session_ids: List[str], + message_count: int, + first_timestamp: Optional[str], + last_timestamp: Optional[str], + total_input_tokens: int, + total_output_tokens: int, + total_cache_creation_tokens: int, + total_cache_read_tokens: int, + ) -> None: + """Update or insert page cache entry.""" + if self._project_id is None or not session_ids: + return + + with self._get_connection() as conn: + # Insert or update page + conn.execute( + """INSERT INTO html_pages + (project_id, page_number, html_path, page_size_config, message_count, + first_session_id, last_session_id, first_timestamp, last_timestamp, + total_input_tokens, total_output_tokens, + total_cache_creation_tokens, total_cache_read_tokens, + generated_at, library_version) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(project_id, page_number) + DO UPDATE SET + html_path = excluded.html_path, + page_size_config = excluded.page_size_config, + message_count = excluded.message_count, + first_session_id = excluded.first_session_id, + last_session_id = excluded.last_session_id, + first_timestamp = excluded.first_timestamp, + last_timestamp = excluded.last_timestamp, + total_input_tokens = excluded.total_input_tokens, + total_output_tokens = excluded.total_output_tokens, + total_cache_creation_tokens = excluded.total_cache_creation_tokens, + total_cache_read_tokens = excluded.total_cache_read_tokens, + generated_at = excluded.generated_at, + library_version = excluded.library_version""", + ( + self._project_id, + page_number, + html_path, + page_size_config, + message_count, + session_ids[0], + session_ids[-1], + first_timestamp, + last_timestamp, + total_input_tokens, + total_output_tokens, + total_cache_creation_tokens, + total_cache_read_tokens, + datetime.now().isoformat(), + self.library_version, + ), + ) + + # Get the page ID + row = conn.execute( + """SELECT id FROM html_pages + WHERE project_id = ? AND page_number = ?""", + (self._project_id, page_number), + ).fetchone() + page_id = row["id"] + + # Delete existing session mappings + conn.execute("DELETE FROM page_sessions WHERE page_id = ?", (page_id,)) + + # Insert session mappings + for order, session_id in enumerate(session_ids): + conn.execute( + """INSERT INTO page_sessions (page_id, session_id, session_order) + VALUES (?, ?, ?)""", + (page_id, session_id, order), + ) + + conn.commit() + + def is_page_stale( + self, page_number: int, page_size_config: int + ) -> tuple[bool, str]: + """Check if a page needs regeneration. + + Args: + page_number: The page number to check + page_size_config: The current page size configuration + + Returns: + Tuple of (is_stale: bool, reason: str) + """ + from .renderer import is_html_outdated + + if self._project_id is None: + return True, "no_cache" + + page_data = self.get_page_data(page_number) + if page_data is None: + return True, "not_cached" + + # Check if page size config changed + if page_data.page_size_config != page_size_config: + return True, "page_size_changed" + + # Check library version + if page_data.library_version != self.library_version: + return True, "version_mismatch" + + # Check if HTML file exists and has correct version + actual_file = self.project_path / page_data.html_path + if not actual_file.exists(): + return True, "file_missing" + if is_html_outdated(actual_file): + return True, "file_version_mismatch" + + # Check if any session on this page has changed + with self._get_connection() as conn: + for session_id in page_data.session_ids: + row = conn.execute( + """SELECT message_count FROM sessions + WHERE project_id = ? AND session_id = ?""", + (self._project_id, session_id), + ).fetchone() + + if not row: + return True, "session_missing" + + # We need to check if session content changed + # For now, just check if session exists + + return False, "up_to_date" + + def invalidate_all_pages(self) -> List[str]: + """Delete all page cache entries for this project. + + Returns: + List of HTML file paths that were invalidated (for cleanup) + """ + if self._project_id is None: + return [] + + html_paths: List[str] = [] + + with self._get_connection() as conn: + # Get all page paths before deleting + rows = conn.execute( + """SELECT html_path FROM html_pages WHERE project_id = ?""", + (self._project_id,), + ).fetchall() + html_paths = [row["html_path"] for row in rows] + + # Delete all pages (cascade deletes page_sessions) + conn.execute( + "DELETE FROM html_pages WHERE project_id = ?", (self._project_id,) + ) + conn.commit() + + return html_paths + + def get_page_count(self) -> int: + """Get the number of cached pages for this project.""" + if self._project_id is None: + return 0 + + with self._get_connection() as conn: + row = conn.execute( + """SELECT COUNT(*) as cnt FROM html_pages WHERE project_id = ?""", + (self._project_id,), + ).fetchone() + + return row["cnt"] if row else 0 + + +__all__ = [ + "CacheManager", + "CachedFileInfo", + "HtmlCacheEntry", + "PageCacheData", + "ProjectCache", + "SessionCacheData", + "get_library_version", +] diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index cef23725..90f0e4b3 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -193,24 +193,23 @@ def _find_relative_matches( try: # Load cache to check for working directories cache_manager = CacheManager(project_dir, get_library_version()) - project_cache = cache_manager.get_cached_project_data() + working_directories = cache_manager.get_working_directories() # Build cache if needed - if not project_cache or not project_cache.working_directories: + if not working_directories: jsonl_files = list(project_dir.glob("*.jsonl")) if jsonl_files: try: convert_jsonl_to_html(project_dir, silent=True) - project_cache = cache_manager.get_cached_project_data() + working_directories = cache_manager.get_working_directories() except Exception as e: logging.warning( f"Failed to build cache for project {project_dir.name}: {e}" ) - project_cache = None - if project_cache and project_cache.working_directories: + if working_directories: # Check for relative matches - for cwd in project_cache.working_directories: + for cwd in working_directories: cwd_path = Path(cwd).resolve() if current_cwd_path.is_relative_to(cwd_path): relative_matches.append(project_dir) @@ -263,6 +262,17 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None: if all_projects: # Clear cache for all project directories click.echo("Clearing caches for all projects...") + + # Delete the shared SQLite cache database + cache_db = input_path / "cache.db" + if cache_db.exists(): + try: + cache_db.unlink() + click.echo(f" Deleted SQLite cache database: {cache_db}") + except Exception as e: + click.echo(f" Warning: Failed to delete cache database: {e}") + + # Also clean up old JSON cache directories (migration cleanup) project_dirs = [ d for d in input_path.iterdir() @@ -271,12 +281,16 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None: for project_dir in project_dirs: try: - cache_manager = CacheManager(project_dir, library_version) - cache_manager.clear_cache() - click.echo(f" Cleared cache for {project_dir.name}") + # Clean up old JSON cache directory if it exists + old_cache_dir = project_dir / "cache" + if old_cache_dir.exists(): + import shutil + + shutil.rmtree(old_cache_dir) + click.echo(f" Cleared old JSON cache for {project_dir.name}") except Exception as e: click.echo( - f" Warning: Failed to clear cache for {project_dir.name}: {e}" + f" Warning: Failed to clear old cache for {project_dir.name}: {e}" ) elif input_path.is_dir(): @@ -284,6 +298,14 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None: click.echo(f"Clearing cache for {input_path}...") cache_manager = CacheManager(input_path, library_version) cache_manager.clear_cache() + + # Also clean up old JSON cache directory if it exists + old_cache_dir = input_path / "cache" + if old_cache_dir.exists(): + import shutil + + shutil.rmtree(old_cache_dir) + click.echo(" Cleared old JSON cache directory") else: # Single file - no cache to clear click.echo("Cache clearing not applicable for single files.") @@ -434,6 +456,12 @@ def _clear_output_files(input_path: Path, all_projects: bool, file_ext: str) -> default=None, help="Image export mode: placeholder (mark position), embedded (base64), referenced (PNG files). Default: embedded for HTML, referenced for Markdown.", ) +@click.option( + "--page-size", + type=int, + default=2000, + help="Maximum messages per page for combined transcript (default: 2000). Sessions are never split across pages.", +) @click.option( "--debug", is_flag=True, @@ -455,6 +483,7 @@ def main( projects_dir: Optional[Path], output_format: str, image_export_mode: Optional[str], + page_size: int, debug: bool, ) -> None: """Convert Claude transcript JSONL files to HTML or Markdown. @@ -595,6 +624,7 @@ def main( not no_individual_sessions, output_format, image_export_mode, + page_size=page_size, ) # Count processed projects @@ -646,6 +676,7 @@ def main( not no_individual_sessions, not no_cache, image_export_mode=image_export_mode, + page_size=page_size, ) if input_path.is_file(): click.echo(f"Successfully converted {input_path} to {output_path}") diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index b6175992..b56db8ed 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -3,6 +3,7 @@ import json import re +from dataclasses import dataclass, field from pathlib import Path import traceback from typing import Optional, Any, TYPE_CHECKING @@ -17,7 +18,6 @@ get_project_display_name, should_use_as_session_starter, create_session_preview, - extract_working_directories, get_warmup_session_ids, ) from .cache import CacheManager, SessionCacheData, get_library_version @@ -403,6 +403,306 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr return deduplicated +@dataclass +class GenerationStats: + """Track statistics for HTML generation across a project.""" + + # Cache statistics + files_loaded_from_cache: int = 0 + files_updated: int = 0 + + # HTML generation statistics + sessions_total: int = 0 + sessions_regenerated: int = 0 + combined_regenerated: bool = False + + # Timing (seconds) + cache_time: float = 0.0 + render_time: float = 0.0 + total_time: float = 0.0 + + # Errors/warnings collected during processing + warnings: List[str] = field(default_factory=lambda: []) + errors: List[str] = field(default_factory=lambda: []) + + def add_warning(self, msg: str) -> None: + """Add a warning message.""" + self.warnings.append(msg) + + def add_error(self, msg: str) -> None: + """Add an error message.""" + self.errors.append(msg) + + def summary(self, project_name: str) -> str: + """Generate a concise summary line for this project.""" + parts: List[str] = [f"Project: {project_name}"] + + # Cache info + cache_parts: List[str] = [] + if self.files_loaded_from_cache > 0: + cache_parts.append(f"{self.files_loaded_from_cache} cached") + if self.files_updated > 0: + cache_parts.append(f"{self.files_updated} updated") + if cache_parts: + parts.append(f" Cache: {', '.join(cache_parts)}") + + # HTML info + html_parts: List[str] = [] + if self.sessions_total > 0: + html_parts.append( + f"{self.sessions_regenerated}/{self.sessions_total} sessions" + ) + if self.combined_regenerated: + html_parts.append("combined") + if html_parts: + parts.append(f" HTML: {', '.join(html_parts)} regenerated") + elif self.sessions_total > 0: + parts.append(" HTML: up to date") + + # Timing + if self.total_time > 0: + time_str = f" Time: {self.total_time:.1f}s" + if self.cache_time > 0 or self.render_time > 0: + time_str += ( + f" (cache: {self.cache_time:.1f}s, render: {self.render_time:.1f}s)" + ) + parts.append(time_str) + + return "\n".join(parts) + + +def _get_page_html_path(page_number: int) -> str: + """Get the HTML filename for a given page number. + + Page 1 is combined_transcripts.html, page 2+ are combined_transcripts_N.html + """ + if page_number == 1: + return "combined_transcripts.html" + return f"combined_transcripts_{page_number}.html" + + +def _assign_sessions_to_pages( + sessions: Dict[str, SessionCacheData], page_size: int +) -> List[List[str]]: + """Assign sessions to pages, never splitting sessions across pages. + + Args: + sessions: Dict mapping session_id to SessionCacheData + page_size: Maximum messages per page (overflow allowed to keep sessions intact) + + Returns: + List of pages, each containing a list of session_ids + """ + pages: List[List[str]] = [] + current_page: List[str] = [] + current_count = 0 + + # Sort sessions chronologically by first_timestamp + sorted_sessions = sorted(sessions.values(), key=lambda s: s.first_timestamp or "") + + for session in sorted_sessions: + # Add session to current page (never split sessions) + current_page.append(session.session_id) + current_count += session.message_count + + # If page now exceeds limit, close it and start fresh + if current_count > page_size: + pages.append(current_page) + current_page = [] + current_count = 0 + + # Don't forget the last page + if current_page: + pages.append(current_page) + + return pages + + +def _generate_paginated_html( + messages: List[TranscriptEntry], + output_dir: Path, + title: str, + page_size: int, + cache_manager: "CacheManager", + session_data: Dict[str, SessionCacheData], + working_directories: List[str], + silent: bool = False, +) -> Path: + """Generate paginated HTML files for combined transcript. + + Args: + messages: All messages (deduplicated) + output_dir: Directory to write HTML files + title: Base title for the pages + page_size: Maximum messages per page + cache_manager: Cache manager for the project + session_data: Session metadata from cache + working_directories: Working directories for project display name + silent: Suppress verbose output + + Returns: + Path to the first page (combined_transcripts.html) + """ + from .renderer import generate_html, format_timestamp + + # Check if page size changed - if so, invalidate all pages + cached_page_size = cache_manager.get_page_size_config() + if cached_page_size is not None and cached_page_size != page_size: + if not silent: + print( + f"Page size changed from {cached_page_size} to {page_size}, regenerating all pages" + ) + old_paths = cache_manager.invalidate_all_pages() + # Delete old page files + for html_path in old_paths: + page_file = output_dir / html_path + if page_file.exists(): + page_file.unlink() + + # Assign sessions to pages + pages: List[List[str]] = _assign_sessions_to_pages(session_data, page_size) + + if not pages: + # No sessions, generate empty page + pages = [[]] + + # Clean up orphan pages if page count decreased + old_page_count = cache_manager.get_page_count() + new_page_count = len(pages) + if old_page_count > new_page_count: + for orphan_page_num in range(new_page_count + 1, old_page_count + 1): + orphan_path = output_dir / _get_page_html_path(orphan_page_num) + if orphan_path.exists(): + orphan_path.unlink() + + # Group messages by session for fast lookup + messages_by_session: Dict[str, List[TranscriptEntry]] = {} + for msg in messages: + session_id = getattr(msg, "sessionId", None) + if session_id: + if session_id not in messages_by_session: + messages_by_session[session_id] = [] + messages_by_session[session_id].append(msg) + + first_page_path = output_dir / _get_page_html_path(1) + + # Generate each page + for page_num, page_session_ids in enumerate(pages, start=1): + html_path = _get_page_html_path(page_num) + page_file = output_dir / html_path + + # Check if page is stale + is_stale, reason = cache_manager.is_page_stale(page_num, page_size) + + if not is_stale and page_file.exists(): + if not silent: + print(f"Page {page_num} is current, skipping regeneration") + continue + + if not silent: + print(f"Generating page {page_num} ({reason})...") + + # Collect messages for this page + page_messages: List[TranscriptEntry] = [] + for session_id in page_session_ids: + if session_id in messages_by_session: + page_messages.extend(messages_by_session[session_id]) + + # Calculate page stats + page_message_count = len(page_messages) + first_timestamp = None + last_timestamp = None + total_input_tokens = 0 + total_output_tokens = 0 + total_cache_creation_tokens = 0 + total_cache_read_tokens = 0 + + for session_id in page_session_ids: + if session_id in session_data: + s = session_data[session_id] + if s.first_timestamp and ( + first_timestamp is None or s.first_timestamp < first_timestamp + ): + first_timestamp = s.first_timestamp + if s.last_timestamp and ( + last_timestamp is None or s.last_timestamp > last_timestamp + ): + last_timestamp = s.last_timestamp + total_input_tokens += s.total_input_tokens + total_output_tokens += s.total_output_tokens + total_cache_creation_tokens += s.total_cache_creation_tokens + total_cache_read_tokens += s.total_cache_read_tokens + + # Build page_info for navigation + has_prev = page_num > 1 + # Pre-enable next link if this page exceeds threshold (anticipating future pages) + # or if there are more pages + page_exceeds_threshold = page_message_count > page_size + has_next = page_num < len(pages) or page_exceeds_threshold + + page_info = { + "page_number": page_num, + "prev_link": _get_page_html_path(page_num - 1) if has_prev else None, + "next_link": _get_page_html_path(page_num + 1) if has_next else None, + } + + # Build page_stats + date_range = "" + if first_timestamp and last_timestamp: + first_fmt = format_timestamp(first_timestamp) + last_fmt = format_timestamp(last_timestamp) + if first_fmt == last_fmt: + date_range = first_fmt + else: + date_range = f"{first_fmt} - {last_fmt}" + elif first_timestamp: + date_range = format_timestamp(first_timestamp) + + token_parts: List[str] = [] + if total_input_tokens: + token_parts.append(f"Input: {total_input_tokens:,}") + if total_output_tokens: + token_parts.append(f"Output: {total_output_tokens:,}") + if total_cache_creation_tokens: + token_parts.append(f"Cache Create: {total_cache_creation_tokens:,}") + if total_cache_read_tokens: + token_parts.append(f"Cache Read: {total_cache_read_tokens:,}") + token_summary = " | ".join(token_parts) if token_parts else None + + page_stats = { + "message_count": page_message_count, + "date_range": date_range, + "token_summary": token_summary, + } + + # Generate HTML for this page + page_title = f"{title} - Page {page_num}" if page_num > 1 else title + html_content = generate_html( + page_messages, + page_title, + page_info=page_info, + page_stats=page_stats, + ) + page_file.write_text(html_content, encoding="utf-8") + + # Update cache + cache_manager.update_page_cache( + page_number=page_num, + html_path=html_path, + page_size_config=page_size, + session_ids=page_session_ids, + message_count=page_message_count, + first_timestamp=first_timestamp, + last_timestamp=last_timestamp, + total_input_tokens=total_input_tokens, + total_output_tokens=total_output_tokens, + total_cache_creation_tokens=total_cache_creation_tokens, + total_cache_read_tokens=total_cache_read_tokens, + ) + + return first_page_path + + def convert_jsonl_to_html( input_path: Path, output_path: Optional[Path] = None, @@ -411,6 +711,7 @@ def convert_jsonl_to_html( generate_individual_sessions: bool = True, use_cache: bool = True, silent: bool = False, + page_size: int = 2000, ) -> Path: """Convert JSONL transcript(s) to HTML file(s). @@ -466,6 +767,10 @@ def convert_jsonl_to( print(f"Warning: Failed to initialize cache manager: {e}") ext = get_file_extension(format) + + # Initialize working_directories for both branches (used by pagination in directory mode) + working_directories: List[str] = [] + if input_path.is_file(): # Single file mode - cache only available for directory mode if output_path is None: @@ -483,13 +788,37 @@ def convert_jsonl_to( input_path, cache_manager, from_date, to_date, silent ) + # Phase 1b: Early exit if nothing needs regeneration + # Skip expensive message loading if all HTML is up to date + if ( + cache_manager is not None + and not cache_was_updated + and from_date is None + and to_date is None + ): + # Check if combined HTML is stale + combined_stale, _ = cache_manager.is_html_stale(output_path.name, None) + if not combined_stale and not is_html_outdated(output_path): + # Check if any session HTML is stale + stale_sessions = cache_manager.get_stale_sessions() + if not stale_sessions or not generate_individual_sessions: + # Nothing needs regeneration - skip loading + if not silent: + print( + f"All HTML files are current for {input_path.name}, " + "skipping regeneration" + ) + return output_path + # Phase 2: Load messages (will use fresh cache when available) messages = load_directory_transcripts( input_path, cache_manager, from_date, to_date, silent ) - # Extract working directories directly from parsed messages - working_directories = extract_working_directories(messages) + # Get working directories from cache + working_directories = ( + cache_manager.get_working_directories() if cache_manager else [] + ) project_title = get_project_display_name(input_path.name, working_directories) title = f"Claude Transcripts - {project_title}" @@ -513,26 +842,77 @@ def convert_jsonl_to( # Generate combined output file (check if regeneration needed) assert output_path is not None renderer = get_renderer(format, image_export_mode) - should_regenerate = ( - renderer.is_outdated(output_path) - or from_date is not None - or to_date is not None - or not output_path.exists() - or ( - input_path.is_dir() and cache_was_updated - ) # Regenerate if JSONL files changed - ) - if should_regenerate: - # For referenced images, pass the output directory - output_dir = output_path.parent - content = renderer.generate(messages, title, output_dir=output_dir) - assert content is not None - output_path.write_text(content, encoding="utf-8") - else: - print( - f"{format.upper()} file {output_path.name} is current, skipping regeneration" + # Decide whether to use pagination (HTML only, directory mode, no date filter) + use_pagination = False + cached_data = cache_manager.get_cached_project_data() if cache_manager else None + total_message_count = ( + cached_data.total_message_count if cached_data else len(messages) + ) + existing_page_count = cache_manager.get_page_count() if cache_manager else 0 + + if ( + format == "html" + and cache_manager is not None + and input_path.is_dir() + and from_date is None + and to_date is None + ): + # Use pagination if total messages exceed page_size or there are existing pages + use_pagination = total_message_count > page_size or existing_page_count > 1 + + if use_pagination: + # Use paginated HTML generation + assert cache_manager is not None # Ensured by use_pagination condition + session_data = cached_data.sessions if cached_data else {} + output_path = _generate_paginated_html( + messages, + input_path, + title, + page_size, + cache_manager, + session_data, + working_directories, + silent=silent, ) + else: + # Use single-file generation for small projects or filtered views + # Use incremental regeneration via html_cache when available + if cache_manager is not None and input_path.is_dir(): + is_stale, _reason = cache_manager.is_html_stale(output_path.name, None) + should_regenerate = ( + is_stale + or renderer.is_outdated(output_path) + or from_date is not None + or to_date is not None + or not output_path.exists() + ) + else: + # Fallback: old logic for single file mode or no cache + should_regenerate = ( + renderer.is_outdated(output_path) + or from_date is not None + or to_date is not None + or not output_path.exists() + or (input_path.is_dir() and cache_was_updated) + ) + + if should_regenerate: + # For referenced images, pass the output directory + output_dir = output_path.parent + content = renderer.generate(messages, title, output_dir=output_dir) + assert content is not None + output_path.write_text(content, encoding="utf-8") + + # Update html_cache for combined transcript (HTML only) + if format == "html" and cache_manager is not None: + cache_manager.update_html_cache( + output_path.name, None, total_message_count + ) + elif not silent: + print( + f"{format.upper()} file {output_path.name} is current, skipping regeneration" + ) # Generate individual session files if requested and in directory mode if generate_individual_sessions and input_path.is_dir(): @@ -545,11 +925,45 @@ def convert_jsonl_to( cache_manager, cache_was_updated, image_export_mode, + silent=silent, ) return output_path +def has_cache_changes( + project_dir: Path, + cache_manager: Optional[CacheManager], + from_date: Optional[str] = None, + to_date: Optional[str] = None, +) -> bool: + """Check if cache needs updating (fast mtime comparison only). + + Returns True if there are modified files or cache is stale. + Does NOT load any messages - that's deferred to ensure_fresh_cache. + """ + if cache_manager is None: + return True # No cache means we need to process + + jsonl_files = list(project_dir.glob("*.jsonl")) + if not jsonl_files: + return False + + # Get cached project data + cached_project_data = cache_manager.get_cached_project_data() + + # Check various invalidation conditions + modified_files = cache_manager.get_modified_files(jsonl_files) + + return ( + cached_project_data is None + or from_date is not None + or to_date is not None + or bool(modified_files) + or (cached_project_data.total_message_count == 0 and bool(jsonl_files)) + ) + + def ensure_fresh_cache( project_dir: Path, cache_manager: Optional[CacheManager], @@ -557,7 +971,11 @@ def ensure_fresh_cache( to_date: Optional[str] = None, silent: bool = False, ) -> bool: - """Ensure cache is fresh and populated. Returns True if cache was updated.""" + """Ensure cache is fresh and populated. Returns True if cache was updated. + + This does the heavy lifting of loading and parsing files. + Call has_cache_changes() first for a fast check. + """ if cache_manager is None: return False @@ -744,11 +1162,6 @@ def _update_cache_with_session_data( # Update cache with filtered session data cache_manager.update_session_cache(sessions_cache_data) - # Update cache with working directories (from filtered sessions) - cache_manager.update_working_directories( - extract_working_directories(list(sessions_cache_data.values())) - ) - # Update cache with project aggregates cache_manager.update_project_aggregates( total_message_count=total_message_count, @@ -874,8 +1287,13 @@ def _generate_individual_session_files( cache_manager: Optional["CacheManager"] = None, cache_was_updated: bool = False, image_export_mode: Optional[str] = None, -) -> None: - """Generate individual files for each session in the specified format.""" + silent: bool = False, +) -> int: + """Generate individual files for each session in the specified format. + + Returns: + Number of sessions regenerated + """ ext = get_file_extension(format) # Pre-compute warmup sessions to exclude them warmup_session_ids = get_warmup_session_ids(messages) @@ -890,19 +1308,23 @@ def _generate_individual_session_files( # Get session data from cache for better titles session_data: dict[str, Any] = {} - working_directories = None + working_directories: list[str] = [] if cache_manager is not None: project_cache = cache_manager.get_cached_project_data() if project_cache: session_data = {s.session_id: s for s in project_cache.sessions.values()} - # Get working directories for project title - if project_cache.working_directories: - working_directories = project_cache.working_directories + # Get working directories for project title + working_directories = cache_manager.get_working_directories() + + # Only generate HTML for sessions that are tracked in the sessions table + # (filters out warmup-only and sessions without user messages) + session_ids = session_ids & set(session_data.keys()) project_title = get_project_display_name(output_dir.name, working_directories) # Get renderer once outside the loop renderer = get_renderer(format, image_export_mode) + regenerated_count = 0 # Generate HTML file for each session for session_id in session_ids: @@ -937,15 +1359,29 @@ def _generate_individual_session_files( # Check if session file needs regeneration session_file_path = output_dir / f"session-{session_id}.{ext}" + session_file_name = f"session-{session_id}.{ext}" - # Only regenerate if outdated, doesn't exist, or date filtering is active - should_regenerate_session = ( - renderer.is_outdated(session_file_path) - or from_date is not None - or to_date is not None - or not session_file_path.exists() - or cache_was_updated # Regenerate if JSONL files changed - ) + # Use incremental regeneration: check per-session staleness via html_cache + if cache_manager is not None and format == "html": + is_stale, _reason = cache_manager.is_html_stale( + session_file_name, session_id + ) + should_regenerate_session = ( + is_stale + or renderer.is_outdated(session_file_path) + or from_date is not None + or to_date is not None + or not session_file_path.exists() + ) + else: + # Fallback without cache or non-HTML formats + should_regenerate_session = ( + renderer.is_outdated(session_file_path) + or from_date is not None + or to_date is not None + or not session_file_path.exists() + or cache_was_updated + ) if should_regenerate_session: # Generate session content @@ -955,11 +1391,32 @@ def _generate_individual_session_files( assert session_content is not None # Write session file session_file_path.write_text(session_content, encoding="utf-8") - else: + regenerated_count += 1 + + # Update html_cache to track this generation (HTML only) + if cache_manager is not None and format == "html": + # Use message count from cache (pre-deduplication) to match + # the count used in is_html_stale() + if session_id in session_data: + session_message_count = session_data[session_id].message_count + else: + # Fallback: count from messages list (less accurate due to dedup) + session_message_count = sum( + 1 + for m in messages + if hasattr(m, "sessionId") + and getattr(m, "sessionId") == session_id + ) + cache_manager.update_html_cache( + session_file_name, session_id, session_message_count + ) + elif not silent: print( f"Session file {session_file_path.name} is current, skipping regeneration" ) + return regenerated_count + def process_projects_hierarchy( projects_path: Path, @@ -969,8 +1426,26 @@ def process_projects_hierarchy( generate_individual_sessions: bool = True, output_format: str = "html", image_export_mode: Optional[str] = None, + silent: bool = True, + page_size: int = 2000, ) -> Path: - """Process the entire ~/.claude/projects/ hierarchy and create linked output files.""" + """Process the entire ~/.claude/projects/ hierarchy and create linked output files. + + Args: + projects_path: Path to the projects directory + from_date: Optional date filter start + to_date: Optional date filter end + use_cache: Whether to use SQLite cache + generate_individual_sessions: Whether to generate per-session HTML files + output_format: Output format (html, md, markdown) + image_export_mode: Image export mode for markdown + silent: If True, suppress verbose per-file logging (show summary only) + page_size: Maximum messages per page for combined transcript pagination + """ + import time + + start_time = time.time() + if not projects_path.exists(): raise FileNotFoundError(f"Projects path not found: {projects_path}") @@ -991,7 +1466,19 @@ def process_projects_hierarchy( # Process each project directory project_summaries: list[dict[str, Any]] = [] any_cache_updated = False # Track if any project had cache updates + + # Aggregated stats + total_projects = len(project_dirs) + projects_with_updates = 0 + total_sessions = 0 + + # Per-project stats for summary output + project_stats: List[tuple[str, GenerationStats]] = [] + for project_dir in sorted(project_dirs): + project_start_time = time.time() + stats = GenerationStats() + try: # Initialize cache manager for this project cache_manager = None @@ -999,27 +1486,81 @@ def process_projects_hierarchy( try: cache_manager = CacheManager(project_dir, library_version) except Exception as e: - print(f"Warning: Failed to initialize cache for {project_dir}: {e}") + stats.add_warning(f"Failed to initialize cache: {e}") - # Phase 1: Ensure cache is fresh and populated - cache_was_updated = ensure_fresh_cache( - project_dir, cache_manager, from_date, to_date + # Phase 1: Fast check if anything needs updating (mtime comparison only) + jsonl_files = list(project_dir.glob("*.jsonl")) + modified_files = ( + cache_manager.get_modified_files(jsonl_files) if cache_manager else [] ) - if cache_was_updated: - any_cache_updated = True - - # Phase 2: Generate output for this project (optionally individual session files) - output_path = convert_jsonl_to( - output_format, - project_dir, - None, - from_date, - to_date, - generate_individual_sessions, - use_cache, - image_export_mode=image_export_mode, + stale_sessions = cache_manager.get_stale_sessions() if cache_manager else [] + output_path = project_dir / "combined_transcripts.html" + # Check combined_stale using the appropriate cache: + # - Paginated projects store data in html_pages table (via save_page_cache) + # - Non-paginated projects store data in html_cache table (via update_html_cache) + if cache_manager is not None: + existing_page_count = cache_manager.get_page_count() + if existing_page_count > 0: + # Paginated project: check page 1 staleness + combined_stale = cache_manager.is_page_stale(1, page_size)[0] + else: + # Non-paginated project: check html_cache + combined_stale = cache_manager.is_html_stale( + output_path.name, None + )[0] + else: + combined_stale = True + + # Determine if we need to do any work + needs_work = ( + bool(modified_files) + or bool(stale_sessions) + or combined_stale + or not output_path.exists() ) + if not needs_work: + # Fast path: nothing to do, just collect stats for index + stats.files_loaded_from_cache = len(jsonl_files) + stats.total_time = time.time() - project_start_time + # Show progress + print(f" {project_dir.name}: cached ({stats.total_time:.1f}s)") + else: + # Slow path: update cache and regenerate output + stats.files_updated = len(modified_files) if modified_files else 0 + stats.files_loaded_from_cache = len(jsonl_files) - stats.files_updated + stats.sessions_regenerated = len(stale_sessions) + + # Track if cache was updated (for index regeneration) + if modified_files: + any_cache_updated = True + projects_with_updates += 1 + + # Generate output for this project (handles cache updates internally) + output_path = convert_jsonl_to( + output_format, + project_dir, + None, + from_date, + to_date, + generate_individual_sessions, + use_cache, + silent=silent, + image_export_mode=image_export_mode, + page_size=page_size, + ) + + # Track timing + stats.total_time = time.time() - project_start_time + # Show progress + progress_parts: List[str] = [] + if stats.files_updated > 0: + progress_parts.append(f"{stats.files_updated} files updated") + if stats.sessions_regenerated > 0: + progress_parts.append(f"{stats.sessions_regenerated} sessions") + detail = ", ".join(progress_parts) if progress_parts else "regenerated" + print(f" {project_dir.name}: {detail} ({stats.total_time:.1f}s)") + # Get project info for index - use cached data if available # Exclude agent files (they are loaded via session references) jsonl_files = [ @@ -1036,6 +1577,8 @@ def process_projects_hierarchy( if cache_manager is not None: cached_project_data = cache_manager.get_cached_project_data() if cached_project_data is not None: + # Track total sessions for stats + stats.sessions_total = len(cached_project_data.sessions) # Use cached aggregation data project_summaries.append( { @@ -1051,7 +1594,7 @@ def process_projects_hierarchy( "total_cache_read_tokens": cached_project_data.total_cache_read_tokens, "latest_timestamp": cached_project_data.latest_timestamp, "earliest_timestamp": cached_project_data.earliest_timestamp, - "working_directories": cached_project_data.working_directories, + "working_directories": cache_manager.get_working_directories(), "sessions": [ { "id": session_data.session_id, @@ -1073,6 +1616,8 @@ def process_projects_hierarchy( ], } ) + # Add project stats + project_stats.append((project_dir.name, stats)) continue # Fallback for when cache is not available (should be rare) @@ -1080,8 +1625,11 @@ def process_projects_hierarchy( f"Warning: No cached data available for {project_dir.name}, using fallback processing" ) messages = load_directory_transcripts( - project_dir, cache_manager, from_date, to_date + project_dir, cache_manager, from_date, to_date, silent=silent ) + # Ensure cache is populated with session data (including working directories) + if cache_manager: + _update_cache_with_session_data(cache_manager, messages) if from_date or to_date: messages = filter_messages_by_date(messages, from_date, to_date) @@ -1153,12 +1701,20 @@ def process_projects_hierarchy( "total_cache_read_tokens": total_cache_read_tokens, "latest_timestamp": latest_timestamp, "earliest_timestamp": earliest_timestamp, - "working_directories": extract_working_directories(messages), + "working_directories": cache_manager.get_working_directories() + if cache_manager + else [], "sessions": sessions_data, } ) + # Track session count in stats for fallback path + stats.sessions_total = len(sessions_data) + project_stats.append((project_dir.name, stats)) + except Exception as e: prev_project = project_summaries[-1] if project_summaries else "(none)" + stats.add_error(str(e)) + project_stats.append((project_dir.name, stats)) print( f"Warning: Failed to process {project_dir}: {e}\n" f"Previous (in alphabetical order) project before error: {prev_project}" @@ -1170,13 +1726,38 @@ def process_projects_hierarchy( ext = get_file_extension(output_format) index_path = projects_path / f"index.{ext}" renderer = get_renderer(output_format, image_export_mode) + index_regenerated = False if renderer.is_outdated(index_path) or from_date or to_date or any_cache_updated: index_content = renderer.generate_projects_index( project_summaries, from_date, to_date ) assert index_content is not None index_path.write_text(index_content, encoding="utf-8") - else: + index_regenerated = True + elif not silent: print(f"Index {ext.upper()} is current, skipping regeneration") + # Count total sessions from project summaries + for summary in project_summaries: + total_sessions += len(summary.get("sessions", [])) + + # Print summary + elapsed = time.time() - start_time + + # Print any errors/warnings that occurred + for project_name, stats in project_stats: + for warning in stats.warnings: + print(f" Warning ({project_name}): {warning}") + for error in stats.errors: + print(f" Error ({project_name}): {error}") + + # Global summary + summary_parts: List[str] = [] + summary_parts.append(f"Processed {total_projects} projects in {elapsed:.1f}s") + if projects_with_updates > 0: + summary_parts.append(f" {projects_with_updates} projects updated") + if index_regenerated: + summary_parts.append(" Index regenerated") + print("\n".join(summary_parts)) + return index_path diff --git a/claude_code_log/html/templates/components/page_nav_styles.css b/claude_code_log/html/templates/components/page_nav_styles.css new file mode 100644 index 00000000..d0254878 --- /dev/null +++ b/claude_code_log/html/templates/components/page_nav_styles.css @@ -0,0 +1,74 @@ +/* Page navigation styles for paginated combined transcripts */ + +.page-navigation { + text-align: center; + margin-bottom: 20px; + padding: 15px; + background-color: #ffffff66; + border-radius: 8px; + box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011; + border-left: #ffffff66 1px solid; + border-top: #ffffff66 1px solid; + border-bottom: #00000017 1px solid; + border-right: #00000017 1px solid; +} + +.page-header { + display: flex; + flex-direction: column; + align-items: center; + gap: 10px; +} + +.page-title { + font-size: 1.3em; + font-weight: 600; + color: var(--system-warning-color); +} + +.page-stats { + display: flex; + gap: 20px; + flex-wrap: wrap; + justify-content: center; + color: var(--text-muted); + font-size: 0.9em; +} + +.page-stats .stat { + display: flex; + align-items: center; + gap: 5px; +} + +.page-nav-links { + display: flex; + justify-content: center; + gap: 30px; + margin-top: 12px; + padding-top: 12px; + border-top: 1px solid var(--border-light); +} + +.page-nav-link { + text-decoration: none; + color: var(--system-warning-color); + font-weight: 500; + padding: 6px 16px; + border-radius: 4px; + background-color: var(--bg-hover); + transition: all 0.2s ease; +} + +.page-nav-link:hover { + background-color: var(--session-bg-dimmed); + transform: translateY(-1px); +} + +.page-nav-link.prev::before { + content: ''; +} + +.page-nav-link.next::after { + content: ''; +} diff --git a/claude_code_log/html/templates/transcript.html b/claude_code_log/html/templates/transcript.html index ef27d002..fed0bb53 100644 --- a/claude_code_log/html/templates/transcript.html +++ b/claude_code_log/html/templates/transcript.html @@ -17,12 +17,39 @@ {% include 'components/search_styles.css' %} {% include 'components/edit_diff_styles.css' %} {% include 'components/pygments_styles.css' %} +{% include 'components/page_nav_styles.css' %}

{{ title }}

+ {% if page_info %} + + + {% endif %} + {% include 'components/timeline.html' %} diff --git a/claude_code_log/migrations/001_initial_schema.sql b/claude_code_log/migrations/001_initial_schema.sql new file mode 100644 index 00000000..f7c5946e --- /dev/null +++ b/claude_code_log/migrations/001_initial_schema.sql @@ -0,0 +1,114 @@ +-- Initial schema for SQLite cache +-- Migration: 001 +-- Description: Creates all tables and indexes for the cache system + +-- Project metadata +CREATE TABLE IF NOT EXISTS projects ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_path TEXT UNIQUE NOT NULL, + version TEXT NOT NULL, + cache_created TEXT NOT NULL, + last_updated TEXT NOT NULL, + total_message_count INTEGER DEFAULT 0, + total_input_tokens INTEGER DEFAULT 0, + total_output_tokens INTEGER DEFAULT 0, + total_cache_creation_tokens INTEGER DEFAULT 0, + total_cache_read_tokens INTEGER DEFAULT 0, + earliest_timestamp TEXT DEFAULT '', + latest_timestamp TEXT DEFAULT '' +); + +CREATE INDEX IF NOT EXISTS idx_projects_path ON projects(project_path); + +-- File tracking for invalidation +CREATE TABLE IF NOT EXISTS cached_files ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL, + file_name TEXT NOT NULL, + file_path TEXT NOT NULL, + source_mtime REAL NOT NULL, + cached_mtime REAL NOT NULL, + message_count INTEGER DEFAULT 0, + FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE, + UNIQUE(project_id, file_name) +); + +CREATE INDEX IF NOT EXISTS idx_cached_files_project ON cached_files(project_id); +CREATE INDEX IF NOT EXISTS idx_cached_files_name ON cached_files(file_name); + +-- Session aggregates +CREATE TABLE IF NOT EXISTS sessions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL, + session_id TEXT NOT NULL, + summary TEXT, + first_timestamp TEXT NOT NULL DEFAULT '', + last_timestamp TEXT NOT NULL DEFAULT '', + message_count INTEGER DEFAULT 0, + first_user_message TEXT DEFAULT '', + cwd TEXT, + total_input_tokens INTEGER DEFAULT 0, + total_output_tokens INTEGER DEFAULT 0, + total_cache_creation_tokens INTEGER DEFAULT 0, + total_cache_read_tokens INTEGER DEFAULT 0, + FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE, + UNIQUE(project_id, session_id) +); + +CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_id); +CREATE INDEX IF NOT EXISTS idx_sessions_session_id ON sessions(session_id); +CREATE INDEX IF NOT EXISTS idx_sessions_first_timestamp ON sessions(first_timestamp); +CREATE INDEX IF NOT EXISTS idx_sessions_cwd ON sessions(cwd); + +-- Fully normalised messages +CREATE TABLE IF NOT EXISTS messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL, + file_id INTEGER NOT NULL, + + -- Core fields + type TEXT NOT NULL, + timestamp TEXT, + session_id TEXT, + + -- BaseTranscriptEntry fields (prefixed) + _uuid TEXT, + _parent_uuid TEXT, + _is_sidechain INTEGER DEFAULT 0, + _user_type TEXT, + _cwd TEXT, + _version TEXT, + _is_meta INTEGER, + _agent_id TEXT, + + -- AssistantTranscriptEntry + _request_id TEXT, + + -- Flattened usage tokens + input_tokens INTEGER, + output_tokens INTEGER, + cache_creation_tokens INTEGER, + cache_read_tokens INTEGER, + + -- SummaryTranscriptEntry + _leaf_uuid TEXT, + + -- SystemTranscriptEntry + _level TEXT, + + -- QueueOperationTranscriptEntry + _operation TEXT, + + -- Message content as JSON + content JSON NOT NULL, + + FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE, + FOREIGN KEY (file_id) REFERENCES cached_files(id) ON DELETE CASCADE +); + +-- Indexes for performance +CREATE INDEX IF NOT EXISTS idx_messages_timestamp ON messages(timestamp); +CREATE INDEX IF NOT EXISTS idx_messages_project_timestamp ON messages(project_id, timestamp); +CREATE INDEX IF NOT EXISTS idx_messages_file ON messages(file_id); +CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id); +CREATE INDEX IF NOT EXISTS idx_messages_uuid ON messages(_uuid); diff --git a/claude_code_log/migrations/002_html_cache.sql b/claude_code_log/migrations/002_html_cache.sql new file mode 100644 index 00000000..00db64ec --- /dev/null +++ b/claude_code_log/migrations/002_html_cache.sql @@ -0,0 +1,18 @@ +-- HTML cache for incremental regeneration +-- Migration: 002 +-- Description: Tracks when HTML files were generated to enable incremental regeneration + +CREATE TABLE IF NOT EXISTS html_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL, + html_path TEXT NOT NULL, -- e.g., "session-abc123.html" or "combined_transcripts.html" + generated_at TEXT NOT NULL, -- ISO timestamp when HTML was generated + source_session_id TEXT, -- session_id for individual files, NULL for combined + message_count INTEGER, -- for sanity checking + library_version TEXT NOT NULL, -- which version generated it + FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE, + UNIQUE(project_id, html_path) +); + +CREATE INDEX IF NOT EXISTS idx_html_cache_project ON html_cache(project_id); +CREATE INDEX IF NOT EXISTS idx_html_cache_session ON html_cache(source_session_id); diff --git a/claude_code_log/migrations/003_html_pagination.sql b/claude_code_log/migrations/003_html_pagination.sql new file mode 100644 index 00000000..61bd6f9c --- /dev/null +++ b/claude_code_log/migrations/003_html_pagination.sql @@ -0,0 +1,39 @@ +-- HTML pagination for combined transcripts +-- Migration: 003 +-- Description: Tracks page assignments for paginated combined transcript HTML files + +-- Pages table: tracks each generated page file +CREATE TABLE IF NOT EXISTS html_pages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL, + page_number INTEGER NOT NULL, + html_path TEXT NOT NULL, -- e.g., "combined_transcripts.html" or "combined_transcripts_2.html" + page_size_config INTEGER NOT NULL, -- the --page-size value used + message_count INTEGER NOT NULL, -- total messages on this page + first_session_id TEXT NOT NULL, + last_session_id TEXT NOT NULL, + first_timestamp TEXT, + last_timestamp TEXT, + total_input_tokens INTEGER DEFAULT 0, + total_output_tokens INTEGER DEFAULT 0, + total_cache_creation_tokens INTEGER DEFAULT 0, + total_cache_read_tokens INTEGER DEFAULT 0, + generated_at TEXT NOT NULL, -- ISO timestamp when page was generated + library_version TEXT NOT NULL, + FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE, + UNIQUE(project_id, page_number) +); + +-- Page-session mapping: tracks which sessions are on which page +CREATE TABLE IF NOT EXISTS page_sessions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + page_id INTEGER NOT NULL, + session_id TEXT NOT NULL, + session_order INTEGER NOT NULL, -- order of session within the page + FOREIGN KEY (page_id) REFERENCES html_pages(id) ON DELETE CASCADE, + UNIQUE(page_id, session_id) +); + +CREATE INDEX IF NOT EXISTS idx_html_pages_project ON html_pages(project_id); +CREATE INDEX IF NOT EXISTS idx_page_sessions_page ON page_sessions(page_id); +CREATE INDEX IF NOT EXISTS idx_page_sessions_session ON page_sessions(session_id); diff --git a/claude_code_log/migrations/__init__.py b/claude_code_log/migrations/__init__.py new file mode 100644 index 00000000..db9bb5bb --- /dev/null +++ b/claude_code_log/migrations/__init__.py @@ -0,0 +1,5 @@ +"""Database migrations for Claude Code Log cache.""" + +from .runner import run_migrations + +__all__ = ["run_migrations"] diff --git a/claude_code_log/migrations/runner.py b/claude_code_log/migrations/runner.py new file mode 100644 index 00000000..40e2b29d --- /dev/null +++ b/claude_code_log/migrations/runner.py @@ -0,0 +1,163 @@ +"""Migration runner for SQLite cache database.""" + +import hashlib +import re +import sqlite3 +from datetime import datetime +from pathlib import Path +from typing import List, Tuple + + +def _get_migrations_dir() -> Path: + """Get the migrations directory path.""" + return Path(__file__).parent + + +def _compute_checksum(content: str) -> str: + """Compute SHA256 checksum of migration content.""" + return hashlib.sha256(content.encode("utf-8")).hexdigest() + + +def _parse_migration_number(filename: str) -> int: + """Extract migration number from filename (e.g., '001_initial.sql' -> 1).""" + match = re.match(r"^(\d+)_", filename) + if match: + return int(match.group(1)) + raise ValueError(f"Invalid migration filename: {filename}") + + +def _ensure_schema_version_table(conn: sqlite3.Connection) -> None: + """Create _schema_version table if it doesn't exist or upgrade from old format.""" + # Check if table exists + row = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='_schema_version'" + ).fetchone() + + if row: + # Check if it has the new schema (with checksum column) + columns = conn.execute("PRAGMA table_info(_schema_version)").fetchall() + column_names = {col[1] for col in columns} + + if "checksum" not in column_names: + # Old format table - drop it and recreate + # This triggers a fresh start as per migration plan + conn.execute("DROP TABLE _schema_version") + conn.commit() + + # Create table with new schema + conn.execute(""" + CREATE TABLE IF NOT EXISTS _schema_version ( + version INTEGER PRIMARY KEY, + filename TEXT NOT NULL, + applied_at TEXT NOT NULL, + checksum TEXT NOT NULL + ) + """) + conn.commit() + + +def get_applied_migrations(conn: sqlite3.Connection) -> List[Tuple[int, str]]: + """Get list of applied migrations as (version, checksum) tuples.""" + _ensure_schema_version_table(conn) + rows = conn.execute( + "SELECT version, checksum FROM _schema_version ORDER BY version" + ).fetchall() + return [(row[0], row[1]) for row in rows] + + +def get_available_migrations() -> List[Tuple[int, Path]]: + """Get list of available migration files as (version, path) tuples.""" + migrations_dir = _get_migrations_dir() + sql_files = sorted(migrations_dir.glob("*.sql")) + + migrations: List[Tuple[int, Path]] = [] + for sql_file in sql_files: + try: + version = _parse_migration_number(sql_file.name) + migrations.append((version, sql_file)) + except ValueError: + # Skip files that don't match the naming convention + continue + + return migrations + + +def get_pending_migrations(conn: sqlite3.Connection) -> List[Tuple[int, Path]]: + """Get list of migrations that haven't been applied yet.""" + applied = {v for v, _ in get_applied_migrations(conn)} + available = get_available_migrations() + return [(v, p) for v, p in available if v not in applied] + + +def apply_migration( + conn: sqlite3.Connection, version: int, migration_path: Path +) -> None: + """Apply a single migration and record it in _schema_version.""" + content = migration_path.read_text(encoding="utf-8") + checksum = _compute_checksum(content) + + # Execute the migration SQL + conn.executescript(content) + + # Record the migration + conn.execute( + """ + INSERT INTO _schema_version (version, filename, applied_at, checksum) + VALUES (?, ?, ?, ?) + """, + (version, migration_path.name, datetime.now().isoformat(), checksum), + ) + conn.commit() + + +def verify_migrations(conn: sqlite3.Connection) -> List[str]: + """Verify applied migrations match their checksums. + + Returns list of warnings for any mismatches. + """ + warnings: List[str] = [] + applied = get_applied_migrations(conn) + available = {v: p for v, p in get_available_migrations()} + + for version, stored_checksum in applied: + if version in available: + current_content = available[version].read_text(encoding="utf-8") + current_checksum = _compute_checksum(current_content) + if current_checksum != stored_checksum: + warnings.append( + f"Migration {version} ({available[version].name}) has been modified " + f"since it was applied. This may indicate database inconsistency." + ) + + return warnings + + +def run_migrations(db_path: Path) -> int: + """Apply all pending migrations to the database. + + Args: + db_path: Path to the SQLite database file + + Returns: + Number of migrations applied + """ + conn = sqlite3.connect(db_path, timeout=30.0) + conn.execute("PRAGMA foreign_keys = ON") + + try: + _ensure_schema_version_table(conn) + pending = get_pending_migrations(conn) + + for version, migration_path in sorted(pending): + apply_migration(conn, version, migration_path) + + return len(pending) + finally: + conn.close() + + +def get_current_version(conn: sqlite3.Connection) -> int: + """Get the current schema version (highest applied migration number).""" + _ensure_schema_version_table(conn) + row = conn.execute("SELECT MAX(version) FROM _schema_version").fetchone() + return row[0] if row[0] is not None else 0 diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 78e9c5f7..6eb2f1af 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -2303,3 +2303,18 @@ def get_renderer(format: str, image_export_mode: Optional[str] = None) -> Render mode = image_export_mode or "referenced" return MarkdownRenderer(image_export_mode=mode) raise ValueError(f"Unsupported format: {format}") + + +def is_html_outdated(html_file_path: Path) -> bool: + """Check if an HTML file is outdated based on its version comment. + + This is a convenience function that uses the HtmlRenderer's is_outdated method. + + Returns: + True if the file should be regenerated (missing version, different version, or file doesn't exist). + False if the file is current. + """ + from .html.renderer import HtmlRenderer + + renderer = HtmlRenderer() + return renderer.is_outdated(html_file_path) diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 760dd3d3..146a709c 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -521,11 +521,9 @@ def update_stats(self) -> None: ) # Get project name using shared logic - working_directories = None + working_directories: List[str] = [] try: - project_cache = self.cache_manager.get_cached_project_data() - if project_cache and project_cache.working_directories: - working_directories = project_cache.working_directories + working_directories = self.cache_manager.get_working_directories() except Exception: # Fall back to directory name if cache fails pass diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index 9fe494c3..0456c868 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -6,7 +6,6 @@ from pathlib import Path from typing import Optional -from claude_code_log.cache import SessionCacheData from .models import ContentItem, TextContent, TranscriptEntry, UserTranscriptEntry from .factories import ( IDE_DIAGNOSTICS_PATTERN, @@ -201,6 +200,17 @@ def extract_working_directories( return [path for path, _ in sorted_dirs] +# IDE tag patterns for compact preview rendering (same as renderer.py) +IDE_OPENED_FILE_PATTERN = re.compile( + r"(.*?)", re.DOTALL +) +IDE_SELECTION_PATTERN = re.compile(r"(.*?)", re.DOTALL) +IDE_DIAGNOSTICS_PATTERN = re.compile( + r"\s*(.*?)\s*", + re.DOTALL, +) + + def _compact_ide_tags_for_preview(text_content: str) -> str: """Replace verbose IDE/system tags with compact emoji indicators for previews. diff --git a/test/__snapshots__/test_snapshot_html.ambr b/test/__snapshots__/test_snapshot_html.ambr index bb6c3651..863b29ec 100644 --- a/test/__snapshots__/test_snapshot_html.ambr +++ b/test/__snapshots__/test_snapshot_html.ambr @@ -4287,12 +4287,88 @@ .highlight .vi { color: #19177C } /* Name.Variable.Instance */ .highlight .vm { color: #19177C } /* Name.Variable.Magic */ .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ + /* Page navigation styles for paginated combined transcripts */ + + .page-navigation { + text-align: center; + margin-bottom: 20px; + padding: 15px; + background-color: #ffffff66; + border-radius: 8px; + box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011; + border-left: #ffffff66 1px solid; + border-top: #ffffff66 1px solid; + border-bottom: #00000017 1px solid; + border-right: #00000017 1px solid; + } + + .page-header { + display: flex; + flex-direction: column; + align-items: center; + gap: 10px; + } + + .page-title { + font-size: 1.3em; + font-weight: 600; + color: var(--system-warning-color); + } + + .page-stats { + display: flex; + gap: 20px; + flex-wrap: wrap; + justify-content: center; + color: var(--text-muted); + font-size: 0.9em; + } + + .page-stats .stat { + display: flex; + align-items: center; + gap: 5px; + } + + .page-nav-links { + display: flex; + justify-content: center; + gap: 30px; + margin-top: 12px; + padding-top: 12px; + border-top: 1px solid var(--border-light); + } + + .page-nav-link { + text-decoration: none; + color: var(--system-warning-color); + font-weight: 500; + padding: 6px 16px; + border-radius: 4px; + background-color: var(--bg-hover); + transition: all 0.2s ease; + } + + .page-nav-link:hover { + background-color: var(--session-bg-dimmed); + transform: translateY(-1px); + } + + .page-nav-link.prev::before { + content: ''; + } + + .page-nav-link.next::after { + content: ''; + }

Test Session

+ + @@ -9088,12 +9164,88 @@ .highlight .vi { color: #19177C } /* Name.Variable.Instance */ .highlight .vm { color: #19177C } /* Name.Variable.Magic */ .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ + /* Page navigation styles for paginated combined transcripts */ + + .page-navigation { + text-align: center; + margin-bottom: 20px; + padding: 15px; + background-color: #ffffff66; + border-radius: 8px; + box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011; + border-left: #ffffff66 1px solid; + border-top: #ffffff66 1px solid; + border-bottom: #00000017 1px solid; + border-right: #00000017 1px solid; + } + + .page-header { + display: flex; + flex-direction: column; + align-items: center; + gap: 10px; + } + + .page-title { + font-size: 1.3em; + font-weight: 600; + color: var(--system-warning-color); + } + + .page-stats { + display: flex; + gap: 20px; + flex-wrap: wrap; + justify-content: center; + color: var(--text-muted); + font-size: 0.9em; + } + + .page-stats .stat { + display: flex; + align-items: center; + gap: 5px; + } + + .page-nav-links { + display: flex; + justify-content: center; + gap: 30px; + margin-top: 12px; + padding-top: 12px; + border-top: 1px solid var(--border-light); + } + + .page-nav-link { + text-decoration: none; + color: var(--system-warning-color); + font-weight: 500; + padding: 6px 16px; + border-radius: 4px; + background-color: var(--bg-hover); + transition: all 0.2s ease; + } + + .page-nav-link:hover { + background-color: var(--session-bg-dimmed); + transform: translateY(-1px); + } + + .page-nav-link.prev::before { + content: ''; + } + + .page-nav-link.next::after { + content: ''; + }

Edge Cases

+ + @@ -11556,7 +11708,7 @@ - Claude Transcripts - tmp + Claude Transcripts - test_multi_session_html0 -

Claude Transcripts - tmp

+

Claude Transcripts - test_multi_session_html0

+ + @@ -18923,12 +19151,88 @@ .highlight .vi { color: #19177C } /* Name.Variable.Instance */ .highlight .vm { color: #19177C } /* Name.Variable.Magic */ .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ + /* Page navigation styles for paginated combined transcripts */ + + .page-navigation { + text-align: center; + margin-bottom: 20px; + padding: 15px; + background-color: #ffffff66; + border-radius: 8px; + box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011; + border-left: #ffffff66 1px solid; + border-top: #ffffff66 1px solid; + border-bottom: #00000017 1px solid; + border-right: #00000017 1px solid; + } + + .page-header { + display: flex; + flex-direction: column; + align-items: center; + gap: 10px; + } + + .page-title { + font-size: 1.3em; + font-weight: 600; + color: var(--system-warning-color); + } + + .page-stats { + display: flex; + gap: 20px; + flex-wrap: wrap; + justify-content: center; + color: var(--text-muted); + font-size: 0.9em; + } + + .page-stats .stat { + display: flex; + align-items: center; + gap: 5px; + } + + .page-nav-links { + display: flex; + justify-content: center; + gap: 30px; + margin-top: 12px; + padding-top: 12px; + border-top: 1px solid var(--border-light); + } + + .page-nav-link { + text-decoration: none; + color: var(--system-warning-color); + font-weight: 500; + padding: 6px 16px; + border-radius: 4px; + background-color: var(--bg-hover); + transition: all 0.2s ease; + } + + .page-nav-link:hover { + background-color: var(--session-bg-dimmed); + transform: translateY(-1px); + } + + .page-nav-link.prev::before { + content: ''; + } + + .page-nav-link.next::after { + content: ''; + }

Test Transcript

+ + diff --git a/test/test_cache.py b/test/test_cache.py index 4cb4a23f..8bb4302d 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -1,10 +1,8 @@ #!/usr/bin/env python3 """Tests for caching functionality.""" -import json import tempfile from pathlib import Path -from datetime import datetime from unittest.mock import patch import pytest @@ -12,7 +10,6 @@ from claude_code_log.cache import ( CacheManager, get_library_version, - ProjectCache, SessionCacheData, ) from claude_code_log.models import ( @@ -30,7 +27,10 @@ def temp_project_dir(): """Create a temporary project directory for testing.""" with tempfile.TemporaryDirectory() as temp_dir: - yield Path(temp_dir) + # Create project subdirectory so db_path (parent/cache.db) is unique per test + project_dir = Path(temp_dir) / "project" + project_dir.mkdir() + yield project_dir @pytest.fixture @@ -101,16 +101,16 @@ def test_initialization(self, temp_project_dir, mock_version): assert cache_manager.project_path == temp_project_dir assert cache_manager.library_version == mock_version - assert cache_manager.cache_dir == temp_project_dir / "cache" - assert cache_manager.cache_dir.exists() + # SQLite database should be created at parent level + assert cache_manager.db_path == temp_project_dir.parent / "cache.db" + assert cache_manager.db_path.exists() - def test_cache_file_path(self, cache_manager, temp_project_dir): - """Test cache file path generation.""" - jsonl_path = temp_project_dir / "test.jsonl" - cache_path = cache_manager._get_cache_file_path(jsonl_path) - - expected = temp_project_dir / "cache" / "test.json" - assert cache_path == expected + def test_database_path(self, cache_manager, temp_project_dir): + """Test that SQLite database is created at the correct location.""" + # Database should be at parent level (projects_dir/cache.db) + expected_db = temp_project_dir.parent / "cache.db" + assert cache_manager.db_path == expected_db + assert expected_db.exists() def test_save_and_load_entries( self, cache_manager, temp_project_dir, sample_entries @@ -122,9 +122,8 @@ def test_save_and_load_entries( # Save entries to cache cache_manager.save_cached_entries(jsonl_path, sample_entries) - # Verify cache file exists - cache_file = cache_manager._get_cache_file_path(jsonl_path) - assert cache_file.exists() + # Verify file is cached + assert cache_manager.is_file_cached(jsonl_path) # Load entries from cache loaded_entries = cache_manager.load_cached_entries(jsonl_path) @@ -136,30 +135,36 @@ def test_save_and_load_entries( assert loaded_entries[1].type == "assistant" assert loaded_entries[2].type == "summary" - def test_timestamp_based_cache_structure( + def test_message_storage_with_timestamps( self, cache_manager, temp_project_dir, sample_entries ): - """Test that cache uses timestamp-based structure.""" + """Test that messages are stored with correct timestamps in SQLite.""" + import sqlite3 + jsonl_path = temp_project_dir / "test.jsonl" jsonl_path.write_text("dummy content", encoding="utf-8") cache_manager.save_cached_entries(jsonl_path, sample_entries) - # Read raw cache file - cache_file = cache_manager._get_cache_file_path(jsonl_path) - with open(cache_file, "r") as f: - cache_data = json.load(f) - - # Verify timestamp-based structure - assert isinstance(cache_data, dict) - assert "2023-01-01T10:00:00Z" in cache_data - assert "2023-01-01T10:01:00Z" in cache_data - assert "_no_timestamp" in cache_data # Summary entry - - # Verify entry grouping - assert len(cache_data["2023-01-01T10:00:00Z"]) == 1 - assert len(cache_data["2023-01-01T10:01:00Z"]) == 1 - assert len(cache_data["_no_timestamp"]) == 1 + # Query the SQLite database directly to verify structure + # Filter by project_id since database is shared between tests + conn = sqlite3.connect(cache_manager.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.execute( + "SELECT timestamp, type FROM messages WHERE project_id = ? ORDER BY timestamp NULLS LAST", + (cache_manager._project_id,), + ) + rows = cursor.fetchall() + conn.close() + + # Verify entries are stored with timestamps + assert len(rows) == 3 + assert rows[0]["timestamp"] == "2023-01-01T10:00:00Z" + assert rows[0]["type"] == "user" + assert rows[1]["timestamp"] == "2023-01-01T10:01:00Z" + assert rows[1]["type"] == "assistant" + assert rows[2]["timestamp"] is None # Summary has no timestamp + assert rows[2]["type"] == "summary" def test_cache_invalidation_file_modification( self, cache_manager, temp_project_dir, sample_entries @@ -186,17 +191,10 @@ def test_cache_invalidation_version_mismatch(self, temp_project_dir): # Create cache with version 1.0.0 with patch("claude_code_log.cache.get_library_version", return_value="1.0.0"): cache_manager_v1 = CacheManager(temp_project_dir, "1.0.0") - # Create some cache data - index_data = ProjectCache( - version="1.0.0", - cache_created=datetime.now().isoformat(), - last_updated=datetime.now().isoformat(), - project_path=str(temp_project_dir), - cached_files={}, - sessions={}, - ) - with open(cache_manager_v1.index_file, "w") as f: - json.dump(index_data.model_dump(), f) + # Verify project was created with version 1.0.0 + cached_data = cache_manager_v1.get_cached_project_data() + assert cached_data is not None + assert cached_data.version == "1.0.0" # Create new cache manager with different version with patch("claude_code_log.cache.get_library_version", return_value="2.0.0"): @@ -269,16 +267,22 @@ def test_clear_cache(self, cache_manager, temp_project_dir, sample_entries): # Create cache cache_manager.save_cached_entries(jsonl_path, sample_entries) - cache_file = cache_manager._get_cache_file_path(jsonl_path) - assert cache_file.exists() - assert cache_manager.index_file.exists() + assert cache_manager.is_file_cached(jsonl_path) + + # Verify data exists before clearing + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert len(cached_data.cached_files) > 0 # Clear cache cache_manager.clear_cache() - # Verify files are deleted - assert not cache_file.exists() - assert not cache_manager.index_file.exists() + # Verify cache is cleared (no more files or sessions) + assert not cache_manager.is_file_cached(jsonl_path) + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert len(cached_data.cached_files) == 0 + assert len(cached_data.sessions) == 0 def test_session_cache_updates(self, cache_manager): """Test updating session cache data.""" @@ -586,17 +590,15 @@ def test_breaking_changes_0_8_0(self, temp_project_dir): class TestCacheErrorHandling: """Test cache error handling and edge cases.""" - def test_corrupted_cache_file(self, cache_manager, temp_project_dir): - """Test handling of corrupted cache files.""" + def test_missing_cache_entry(self, cache_manager, temp_project_dir): + """Test handling when cache entry doesn't exist.""" jsonl_path = temp_project_dir / "test.jsonl" jsonl_path.write_text("dummy content", encoding="utf-8") - # Create corrupted cache file - cache_file = cache_manager._get_cache_file_path(jsonl_path) - cache_file.parent.mkdir(exist_ok=True) - cache_file.write_text("invalid json content", encoding="utf-8") + # File exists but not cached + assert not cache_manager.is_file_cached(jsonl_path) - # Should handle gracefully + # Should return None when not cached result = cache_manager.load_cached_entries(jsonl_path) assert result is None diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py index 25ffcc7b..2bce66df 100644 --- a/test/test_cache_integration.py +++ b/test/test_cache_integration.py @@ -5,7 +5,6 @@ import tempfile from pathlib import Path from unittest.mock import patch -from datetime import datetime import pytest from click.testing import CliRunner @@ -96,9 +95,9 @@ def test_cli_no_cache_flag(self, setup_test_project): result1 = runner.invoke(main, [str(project_dir)]) assert result1.exit_code == 0 - # Check if cache was created - cache_dir = project_dir / "cache" - assert cache_dir.exists() + # Check if SQLite cache was created at parent level + cache_db = project_dir.parent / "cache.db" + assert cache_db.exists() # Clear the cache runner.invoke(main, [str(project_dir), "--clear-cache"]) @@ -107,12 +106,14 @@ def test_cli_no_cache_flag(self, setup_test_project): result2 = runner.invoke(main, [str(project_dir), "--no-cache"]) assert result2.exit_code == 0 - # Cache should not be created - cache_files = list(cache_dir.glob("*.json")) if cache_dir.exists() else [] - assert len(cache_files) == 0 + # Cache should be empty (project should not be populated) + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert cached_data.total_message_count == 0 def test_cli_clear_cache_flag(self, setup_test_project): - """Test --clear-cache flag removes cache files.""" + """Test --clear-cache flag clears cache data.""" project_dir = setup_test_project runner = CliRunner() @@ -121,19 +122,21 @@ def test_cli_clear_cache_flag(self, setup_test_project): result1 = runner.invoke(main, [str(project_dir)]) assert result1.exit_code == 0 - # Verify cache exists - cache_dir = project_dir / "cache" - assert cache_dir.exists() - cache_files = list(cache_dir.glob("*.json")) - assert len(cache_files) > 0 + # Verify cache exists with data + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert cached_data.total_message_count > 0 # Clear cache result2 = runner.invoke(main, [str(project_dir), "--clear-cache"]) assert result2.exit_code == 0 - # Verify cache is cleared - cache_files = list(cache_dir.glob("*.json")) if cache_dir.exists() else [] - assert len(cache_files) == 0 + # Verify cache is cleared (no files or sessions) + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert len(cached_data.cached_files) == 0 def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data): """Test caching with --all-projects flag.""" @@ -143,7 +146,7 @@ def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data): project_dir.mkdir() jsonl_file = project_dir / f"session-{i}.jsonl" - with open(jsonl_file, "w") as f: + with open(jsonl_file, "w", encoding="utf-8") as f: for entry in sample_jsonl_data: # Modify session ID for each project entry_copy = entry.copy() @@ -157,14 +160,17 @@ def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data): result = runner.invoke(main, [str(temp_projects_dir), "--all-projects"]) assert result.exit_code == 0 - # Verify cache created for each project + # Verify SQLite cache database created at projects level + cache_db = temp_projects_dir / "cache.db" + assert cache_db.exists() + + # Verify cache data exists for each project for i in range(3): project_dir = temp_projects_dir / f"project-{i}" - cache_dir = project_dir / "cache" - assert cache_dir.exists() - - cache_files = list(cache_dir.glob("*.json")) - assert len(cache_files) >= 1 # At least index.json + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert len(cached_data.cached_files) >= 1 def test_cli_date_filtering_with_cache(self, setup_test_project): """Test date filtering works correctly with caching.""" @@ -195,11 +201,15 @@ def test_convert_jsonl_to_html_with_cache(self, setup_test_project): output1 = convert_jsonl_to_html(input_path=project_dir, use_cache=True) assert output1.exists() - # Verify cache was created - cache_dir = project_dir / "cache" - assert cache_dir.exists() - cache_files = list(cache_dir.glob("*.json")) - assert len(cache_files) >= 1 + # Verify SQLite cache was created + cache_db = project_dir.parent / "cache.db" + assert cache_db.exists() + + # Verify cache has data + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert len(cached_data.cached_files) >= 1 # Second conversion (should use cache) output2 = convert_jsonl_to_html(input_path=project_dir, use_cache=True) @@ -213,11 +223,11 @@ def test_convert_jsonl_to_html_no_cache(self, setup_test_project): output = convert_jsonl_to_html(input_path=project_dir, use_cache=False) assert output.exists() - # Cache should not be created - cache_dir = project_dir / "cache" - if cache_dir.exists(): - cache_files = list(cache_dir.glob("*.json")) - assert len(cache_files) == 0 + # SQLite db may still exist from fixture setup, but project data should be empty + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert cached_data.total_message_count == 0 def test_process_projects_hierarchy_with_cache( self, temp_projects_dir, sample_jsonl_data @@ -229,7 +239,7 @@ def test_process_projects_hierarchy_with_cache( project_dir.mkdir() jsonl_file = project_dir / f"session-{i}.jsonl" - with open(jsonl_file, "w") as f: + with open(jsonl_file, "w", encoding="utf-8") as f: for entry in sample_jsonl_data: entry_copy = entry.copy() if "sessionId" in entry_copy: @@ -242,11 +252,17 @@ def test_process_projects_hierarchy_with_cache( ) assert output1.exists() - # Verify caches were created + # Verify SQLite cache database was created + cache_db = temp_projects_dir / "cache.db" + assert cache_db.exists() + + # Verify cache data exists for each project for i in range(2): project_dir = temp_projects_dir / f"project-{i}" - cache_dir = project_dir / "cache" - assert cache_dir.exists() + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert len(cached_data.cached_files) >= 1 # Second processing (should use cache) output2 = process_projects_hierarchy( @@ -306,7 +322,7 @@ def test_cache_performance_with_large_project(self, temp_projects_dir): ) jsonl_file = project_dir / "large-session.jsonl" - with open(jsonl_file, "w") as f: + with open(jsonl_file, "w", encoding="utf-8") as f: for entry in large_jsonl_data: f.write(json.dumps(entry) + "\n") @@ -415,19 +431,10 @@ def test_cache_version_upgrade_scenario(self, setup_test_project): # Create cache with old version with patch("claude_code_log.cache.get_library_version", return_value="1.0.0"): cache_manager_old = CacheManager(project_dir, "1.0.0") - # Create some dummy cache data - from claude_code_log.cache import ProjectCache - - old_cache = ProjectCache( - version="1.0.0", - cache_created=datetime.now().isoformat(), - last_updated=datetime.now().isoformat(), - project_path=str(project_dir), - cached_files={}, - sessions={}, - ) - with open(cache_manager_old.index_file, "w") as f: - json.dump(old_cache.model_dump(), f) + # Verify project was created in SQLite database + cached_data = cache_manager_old.get_cached_project_data() + assert cached_data is not None + assert cached_data.version == "1.0.0" # Process with new version (should handle version mismatch) with patch("claude_code_log.cache.get_library_version", return_value="2.0.0"): diff --git a/test/test_cache_sqlite_integrity.py b/test/test_cache_sqlite_integrity.py new file mode 100644 index 00000000..eaa14a9b --- /dev/null +++ b/test/test_cache_sqlite_integrity.py @@ -0,0 +1,908 @@ +#!/usr/bin/env python3 +"""Comprehensive SQL-level integrity tests for SQLite cache.""" + +import json +import sqlite3 +import tempfile +import threading +import time +from pathlib import Path + +import pytest + +from claude_code_log.cache import CacheManager, SessionCacheData +from claude_code_log.models import ( + AssistantMessage, + AssistantTranscriptEntry, + TextContent, + ThinkingContent, + ToolResultContent, + ToolUseContent, + UsageInfo, + UserMessage, + UserTranscriptEntry, +) + + +@pytest.fixture +def temp_project_dir(): + """Create a temporary project directory.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_dir = Path(temp_dir) / "test-project" + project_dir.mkdir() + yield project_dir + + +@pytest.fixture +def cache_manager(temp_project_dir): + """Create a cache manager for testing.""" + return CacheManager(temp_project_dir, "1.0.0") + + +@pytest.fixture +def sample_user_entry(): + """Create a sample user transcript entry.""" + return UserTranscriptEntry( + type="user", + uuid="user-123", + timestamp="2024-01-01T10:00:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="external", + cwd="/test/path", + message=UserMessage(role="user", content="Hello, world!"), + ) + + +@pytest.fixture +def sample_assistant_entry(): + """Create a sample assistant transcript entry with token usage.""" + return AssistantTranscriptEntry( + type="assistant", + uuid="assistant-123", + timestamp="2024-01-01T10:01:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid="user-123", + isSidechain=False, + userType="assistant", + cwd="/test/path", + requestId="req-123", + message=AssistantMessage( + id="msg-123", + type="message", + role="assistant", + model="claude-3", + content=[TextContent(type="text", text="Hi there!")], + usage=UsageInfo( + input_tokens=100, + output_tokens=50, + cache_creation_input_tokens=10, + cache_read_input_tokens=5, + ), + ), + ) + + +class TestCascadeDelete: + """Tests for cascade delete behaviour.""" + + def test_cascade_delete_project_removes_all_nested_records( + self, temp_project_dir, sample_user_entry, sample_assistant_entry + ): + """Deleting project cascades to files, messages, sessions.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Create a JSONL file with entries + jsonl_file = temp_project_dir / "test.jsonl" + jsonl_file.write_text( + json.dumps(sample_user_entry.model_dump()) + + "\n" + + json.dumps(sample_assistant_entry.model_dump()) + + "\n", + encoding="utf-8", + ) + + # Save entries to cache + cache_manager.save_cached_entries( + jsonl_file, [sample_user_entry, sample_assistant_entry] + ) + + # Update session cache + cache_manager.update_session_cache( + { + "session-1": SessionCacheData( + session_id="session-1", + summary="Test session", + first_timestamp="2024-01-01T10:00:00Z", + last_timestamp="2024-01-01T10:01:00Z", + message_count=2, + first_user_message="Hello, world!", + cwd="/test/path", + total_input_tokens=100, + total_output_tokens=50, + ) + } + ) + + # Get project ID + project_id = cache_manager._project_id + + # Verify data exists + with cache_manager._get_connection() as conn: + files = conn.execute( + "SELECT COUNT(*) FROM cached_files WHERE project_id = ?", + (project_id,), + ).fetchone()[0] + messages = conn.execute( + "SELECT COUNT(*) FROM messages WHERE project_id = ?", + (project_id,), + ).fetchone()[0] + sessions = conn.execute( + "SELECT COUNT(*) FROM sessions WHERE project_id = ?", + (project_id,), + ).fetchone()[0] + + assert files > 0 + assert messages > 0 + assert sessions > 0 + + # Delete the project + with cache_manager._get_connection() as conn: + conn.execute("DELETE FROM projects WHERE id = ?", (project_id,)) + conn.commit() + + # Verify cascade delete removed all nested records + with cache_manager._get_connection() as conn: + files = conn.execute( + "SELECT COUNT(*) FROM cached_files WHERE project_id = ?", + (project_id,), + ).fetchone()[0] + messages = conn.execute( + "SELECT COUNT(*) FROM messages WHERE project_id = ?", + (project_id,), + ).fetchone()[0] + sessions = conn.execute( + "SELECT COUNT(*) FROM sessions WHERE project_id = ?", + (project_id,), + ).fetchone()[0] + + assert files == 0 + assert messages == 0 + assert sessions == 0 + + +class TestTokenSumVerification: + """Tests for token sum calculations.""" + + def test_session_token_totals_match_message_sums( + self, temp_project_dir, sample_assistant_entry + ): + """Session token totals equal sum of message tokens.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Create multiple assistant entries with known token values + entries = [] + total_input = 0 + total_output = 0 + + for i in range(5): + entry = AssistantTranscriptEntry( + type="assistant", + uuid=f"assistant-{i}", + timestamp=f"2024-01-01T10:{i:02d}:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="assistant", + cwd="/test/path", + requestId=f"req-{i}", + message=AssistantMessage( + id=f"msg-{i}", + type="message", + role="assistant", + model="claude-3", + content=[TextContent(type="text", text=f"Response {i}")], + usage=UsageInfo( + input_tokens=100 + i * 10, + output_tokens=50 + i * 5, + ), + ), + ) + entries.append(entry) + total_input += 100 + i * 10 + total_output += 50 + i * 5 + + # Save entries + jsonl_file = temp_project_dir / "test.jsonl" + jsonl_file.write_text( + "\n".join(json.dumps(e.model_dump()) for e in entries), + encoding="utf-8", + ) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Query actual sums from database + with cache_manager._get_connection() as conn: + row = conn.execute( + """ + SELECT + COALESCE(SUM(input_tokens), 0) as total_input, + COALESCE(SUM(output_tokens), 0) as total_output + FROM messages + WHERE project_id = ? AND session_id = 'session-1' + """, + (cache_manager._project_id,), + ).fetchone() + + assert row["total_input"] == total_input + assert row["total_output"] == total_output + + +class TestForeignKeyConstraints: + """Tests for foreign key constraint enforcement.""" + + def test_cannot_insert_message_without_valid_file_id(self, cache_manager): + """Foreign key prevents orphaned messages.""" + with cache_manager._get_connection() as conn: + # Attempt to insert message with non-existent file_id + with pytest.raises(sqlite3.IntegrityError): + conn.execute( + """ + INSERT INTO messages (project_id, file_id, type, content) + VALUES (?, 99999, 'user', '{}') + """, + (cache_manager._project_id,), + ) + + def test_cannot_insert_message_without_valid_project_id(self, cache_manager): + """Foreign key prevents messages with invalid project.""" + with cache_manager._get_connection() as conn: + # First create a valid file + conn.execute( + """ + INSERT INTO cached_files (project_id, file_name, file_path, source_mtime, cached_mtime) + VALUES (?, 'test.jsonl', '/test/test.jsonl', 0, 0) + """, + (cache_manager._project_id,), + ) + file_id = conn.execute( + "SELECT id FROM cached_files WHERE file_name = 'test.jsonl'" + ).fetchone()[0] + + # Attempt to insert message with non-existent project_id + with pytest.raises(sqlite3.IntegrityError): + conn.execute( + """ + INSERT INTO messages (project_id, file_id, type, content) + VALUES (99999, ?, 'user', '{}') + """, + (file_id,), + ) + + +class TestSerializationRoundTrip: + """Tests for message serialization/deserialization.""" + + def test_complex_message_types_roundtrip_correctly(self, temp_project_dir): + """Tool use, images, thinking content survive JSON serialization.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Create entries with complex content types + entries = [ + # Tool use + AssistantTranscriptEntry( + type="assistant", + uuid="tool-use-msg", + timestamp="2024-01-01T10:00:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="assistant", + cwd="/test", + requestId="req-1", + message=AssistantMessage( + id="msg-tool", + type="message", + role="assistant", + model="claude-3", + content=[ + ToolUseContent( + type="tool_use", + id="tool-123", + name="read_file", + input={"path": "/test/file.txt"}, + ) + ], + ), + ), + # Tool result + UserTranscriptEntry( + type="user", + uuid="tool-result-msg", + timestamp="2024-01-01T10:01:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid="tool-use-msg", + isSidechain=False, + userType="tool_result", + cwd="/test", + message=UserMessage( + role="user", + content=[ + ToolResultContent( + type="tool_result", + tool_use_id="tool-123", + content="File contents here", + ) + ], + ), + ), + # Thinking content + AssistantTranscriptEntry( + type="assistant", + uuid="thinking-msg", + timestamp="2024-01-01T10:02:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="assistant", + cwd="/test", + requestId="req-2", + message=AssistantMessage( + id="msg-thinking", + type="message", + role="assistant", + model="claude-3", + content=[ + ThinkingContent( + type="thinking", + thinking="Let me think about this...", + ), + TextContent(type="text", text="Here's my answer"), + ], + ), + ), + ] + + # Save entries + jsonl_file = temp_project_dir / "complex.jsonl" + jsonl_file.write_text( + "\n".join(json.dumps(e.model_dump()) for e in entries), + encoding="utf-8", + ) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Load and compare + loaded = cache_manager.load_cached_entries(jsonl_file) + assert loaded is not None + assert len(loaded) == len(entries) + + for original, loaded_entry in zip(entries, loaded): + # Compare key fields - exact serialization may differ due to default values + assert original.type == loaded_entry.type + assert original.uuid == loaded_entry.uuid + assert original.timestamp == loaded_entry.timestamp + assert original.sessionId == loaded_entry.sessionId + + # For assistant entries, verify message content types are preserved + if hasattr(original, "message") and hasattr(original.message, "content"): + orig_content = original.message.content + loaded_content = loaded_entry.message.content + assert len(orig_content) == len(loaded_content) + for orig_item, loaded_item in zip(orig_content, loaded_content): + assert orig_item.type == loaded_item.type + + +class TestIndexUniquenessConstraints: + """Tests for UNIQUE constraints on indexes.""" + + def test_duplicate_file_name_in_project_fails(self, cache_manager): + """UNIQUE(project_id, file_name) enforced.""" + with cache_manager._get_connection() as conn: + # Insert first file + conn.execute( + """ + INSERT INTO cached_files (project_id, file_name, file_path, source_mtime, cached_mtime) + VALUES (?, 'duplicate.jsonl', '/path1', 0, 0) + """, + (cache_manager._project_id,), + ) + conn.commit() + + # Attempt to insert duplicate file name + with pytest.raises(sqlite3.IntegrityError): + conn.execute( + """ + INSERT INTO cached_files (project_id, file_name, file_path, source_mtime, cached_mtime) + VALUES (?, 'duplicate.jsonl', '/path2', 0, 0) + """, + (cache_manager._project_id,), + ) + + def test_duplicate_session_id_in_project_fails(self, cache_manager): + """UNIQUE(project_id, session_id) enforced.""" + with cache_manager._get_connection() as conn: + # Insert first session + conn.execute( + """ + INSERT INTO sessions (project_id, session_id, first_timestamp, last_timestamp) + VALUES (?, 'dup-session', '2024-01-01', '2024-01-01') + """, + (cache_manager._project_id,), + ) + conn.commit() + + # Attempt to insert duplicate session_id + with pytest.raises(sqlite3.IntegrityError): + conn.execute( + """ + INSERT INTO sessions (project_id, session_id, first_timestamp, last_timestamp) + VALUES (?, 'dup-session', '2024-01-02', '2024-01-02') + """, + (cache_manager._project_id,), + ) + + +class TestTimestampOrdering: + """Tests for message timestamp ordering.""" + + def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry): + """Messages retrieved in timestamp order.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Create entries with out-of-order timestamps + entries = [] + timestamps = [ + "2024-01-01T10:05:00Z", + "2024-01-01T10:01:00Z", + "2024-01-01T10:03:00Z", + "2024-01-01T10:02:00Z", + "2024-01-01T10:04:00Z", + ] + + for i, ts in enumerate(timestamps): + entry = UserTranscriptEntry( + type="user", + uuid=f"user-{i}", + timestamp=ts, + sessionId="session-1", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="external", + cwd="/test", + message=UserMessage(role="user", content=f"Message {i}"), + ) + entries.append(entry) + + jsonl_file = temp_project_dir / "order.jsonl" + jsonl_file.write_text( + "\n".join(json.dumps(e.model_dump()) for e in entries), + encoding="utf-8", + ) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Load and verify order + loaded = cache_manager.load_cached_entries(jsonl_file) + assert loaded is not None + + loaded_timestamps = [ + ts for e in loaded if (ts := getattr(e, "timestamp", None)) is not None + ] + assert loaded_timestamps == sorted(loaded_timestamps) + + +class TestNullTokenHandling: + """Tests for NULL token value handling.""" + + def test_null_tokens_handled_in_aggregates(self, temp_project_dir): + """NULL token values don't corrupt sums.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Create mix of entries with and without tokens + entries = [ + # Entry with tokens + AssistantTranscriptEntry( + type="assistant", + uuid="with-tokens", + timestamp="2024-01-01T10:00:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="assistant", + cwd="/test", + requestId="req-1", + message=AssistantMessage( + id="msg-1", + type="message", + role="assistant", + model="claude-3", + content=[TextContent(type="text", text="With tokens")], + usage=UsageInfo(input_tokens=100, output_tokens=50), + ), + ), + # Entry without usage (NULL tokens) + UserTranscriptEntry( + type="user", + uuid="without-tokens", + timestamp="2024-01-01T10:01:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="external", + cwd="/test", + message=UserMessage(role="user", content="No tokens"), + ), + ] + + jsonl_file = temp_project_dir / "mixed.jsonl" + jsonl_file.write_text( + "\n".join(json.dumps(e.model_dump()) for e in entries), + encoding="utf-8", + ) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Query sums - COALESCE should handle NULLs + with cache_manager._get_connection() as conn: + row = conn.execute( + """ + SELECT + COALESCE(SUM(input_tokens), 0) as total_input, + COALESCE(SUM(output_tokens), 0) as total_output + FROM messages + WHERE project_id = ? + """, + (cache_manager._project_id,), + ).fetchone() + + # Should only count the entry with tokens + assert row["total_input"] == 100 + assert row["total_output"] == 50 + + +class TestMessageFileRelationship: + """Tests for message-file relationships.""" + + def test_cached_file_message_count_matches_actual( + self, temp_project_dir, sample_user_entry, sample_assistant_entry + ): + """message_count column matches COUNT(*) FROM messages.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + entries = [sample_user_entry, sample_assistant_entry] + jsonl_file = temp_project_dir / "count.jsonl" + jsonl_file.write_text( + "\n".join(json.dumps(e.model_dump()) for e in entries), + encoding="utf-8", + ) + cache_manager.save_cached_entries(jsonl_file, entries) + + with cache_manager._get_connection() as conn: + # Get stored message count + file_row = conn.execute( + "SELECT id, message_count FROM cached_files WHERE file_name = ?", + ("count.jsonl",), + ).fetchone() + + # Get actual count + actual_count = conn.execute( + "SELECT COUNT(*) FROM messages WHERE file_id = ?", + (file_row["id"],), + ).fetchone()[0] + + assert file_row["message_count"] == actual_count + assert file_row["message_count"] == len(entries) + + +class TestWALMode: + """Tests for WAL journal mode.""" + + def test_wal_journal_mode_enabled(self, cache_manager): + """Verify WAL mode is active.""" + with cache_manager._get_connection() as conn: + row = conn.execute("PRAGMA journal_mode").fetchone() + assert row[0] == "wal" + + +class TestConcurrentAccess: + """Tests for concurrent database access.""" + + def test_concurrent_readers_dont_block(self, temp_project_dir): + """Multiple readers can access simultaneously.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Add some data + entry = UserTranscriptEntry( + type="user", + uuid="user-1", + timestamp="2024-01-01T10:00:00Z", + sessionId="session-1", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="external", + cwd="/test", + message=UserMessage(role="user", content="Test"), + ) + + jsonl_file = temp_project_dir / "concurrent.jsonl" + jsonl_file.write_text(json.dumps(entry.model_dump()), encoding="utf-8") + cache_manager.save_cached_entries(jsonl_file, [entry]) + + results = [] + errors = [] + + def read_data(): + try: + cm = CacheManager(temp_project_dir, "1.0.0") + data = cm.get_cached_project_data() + results.append(data is not None) + except Exception as e: + errors.append(str(e)) + + # Start multiple reader threads + threads = [threading.Thread(target=read_data) for _ in range(5)] + for t in threads: + t.start() + for t in threads: + t.join(timeout=5) + + assert len(errors) == 0, f"Errors occurred: {errors}" + assert all(results), "Not all reads succeeded" + + +class TestLargeDatasetPerformance: + """Tests for performance with large datasets.""" + + def test_query_performance_with_large_dataset(self, temp_project_dir): + """Queries complete in reasonable time with large datasets.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Create 1000 entries (reduced from 10k for test speed) + entries = [] + for i in range(1000): + entry = UserTranscriptEntry( + type="user", + uuid=f"user-{i}", + timestamp=f"2024-01-{(i % 30) + 1:02d}T{i % 24:02d}:00:00Z", + sessionId=f"session-{i % 10}", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="external", + cwd="/test", + message=UserMessage(role="user", content=f"Message {i}"), + ) + entries.append(entry) + + jsonl_file = temp_project_dir / "large.jsonl" + jsonl_file.write_text( + "\n".join(json.dumps(e.model_dump()) for e in entries), + encoding="utf-8", + ) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Time filtered loading + start = time.time() + loaded = cache_manager.load_cached_entries_filtered( + jsonl_file, "2024-01-15", "2024-01-20" + ) + elapsed = time.time() - start + + assert loaded is not None + assert elapsed < 2.0, f"Query took too long: {elapsed:.2f}s" + + +class TestSessionBoundaryDetection: + """Tests for session boundary correctness.""" + + def test_sessions_contain_correct_messages(self, temp_project_dir): + """Each session contains only its messages.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Create entries for multiple sessions + entries = [] + for session_num in range(3): + for msg_num in range(5): + entry = UserTranscriptEntry( + type="user", + uuid=f"user-s{session_num}-m{msg_num}", + timestamp=f"2024-01-01T{10 + session_num}:{msg_num * 10:02d}:00Z", + sessionId=f"session-{session_num}", + version="1.0.0", + parentUuid=None, + isSidechain=False, + userType="external", + cwd="/test", + message=UserMessage( + role="user", + content=f"Session {session_num} message {msg_num}", + ), + ) + entries.append(entry) + + jsonl_file = temp_project_dir / "sessions.jsonl" + jsonl_file.write_text( + "\n".join(json.dumps(e.model_dump()) for e in entries), + encoding="utf-8", + ) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Verify each session has exactly 5 messages + with cache_manager._get_connection() as conn: + for session_num in range(3): + count = conn.execute( + "SELECT COUNT(*) FROM messages WHERE project_id = ? AND session_id = ?", + (cache_manager._project_id, f"session-{session_num}"), + ).fetchone()[0] + assert count == 5, ( + f"Session {session_num} has {count} messages, expected 5" + ) + + +class TestCacheStatsAccuracy: + """Tests for cache statistics accuracy.""" + + def test_cache_stats_match_actual_counts( + self, temp_project_dir, sample_user_entry, sample_assistant_entry + ): + """get_cache_stats() returns accurate data.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + entries = [sample_user_entry, sample_assistant_entry] + jsonl_file = temp_project_dir / "stats.jsonl" + jsonl_file.write_text( + "\n".join(json.dumps(e.model_dump()) for e in entries), + encoding="utf-8", + ) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Update aggregates + cache_manager.update_project_aggregates( + total_message_count=2, + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=10, + total_cache_read_tokens=5, + earliest_timestamp="2024-01-01T10:00:00Z", + latest_timestamp="2024-01-01T10:01:00Z", + ) + + cache_manager.update_session_cache( + { + "session-1": SessionCacheData( + session_id="session-1", + summary=None, + first_timestamp="2024-01-01T10:00:00Z", + last_timestamp="2024-01-01T10:01:00Z", + message_count=2, + first_user_message="Hello, world!", + ) + } + ) + + stats = cache_manager.get_cache_stats() + + assert stats["cache_enabled"] is True + assert stats["cached_files_count"] == 1 + assert stats["total_cached_messages"] == 2 + assert stats["total_sessions"] == 1 + + +class TestWorkingDirectoryQuery: + """Tests for working directory queries.""" + + def test_get_working_directories_returns_distinct_cwds(self, temp_project_dir): + """get_working_directories() returns unique values.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + # Create sessions with duplicate cwds + cache_manager.update_session_cache( + { + "session-1": SessionCacheData( + session_id="session-1", + summary=None, + first_timestamp="2024-01-01T10:00:00Z", + last_timestamp="2024-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + cwd="/path/to/project", + ), + "session-2": SessionCacheData( + session_id="session-2", + summary=None, + first_timestamp="2024-01-02T10:00:00Z", + last_timestamp="2024-01-02T10:01:00Z", + message_count=1, + first_user_message="Test", + cwd="/path/to/project", # Same cwd + ), + "session-3": SessionCacheData( + session_id="session-3", + summary=None, + first_timestamp="2024-01-03T10:00:00Z", + last_timestamp="2024-01-03T10:01:00Z", + message_count=1, + first_user_message="Test", + cwd="/different/path", + ), + } + ) + + cwds = cache_manager.get_working_directories() + + # Should be deduplicated + assert len(cwds) == 2 + assert set(cwds) == {"/path/to/project", "/different/path"} + + +class TestFileModificationDetection: + """Tests for file modification time detection.""" + + def test_mtime_change_invalidates_cache(self, temp_project_dir, sample_user_entry): + """Changing file mtime marks cache as stale.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + jsonl_file = temp_project_dir / "mtime.jsonl" + jsonl_file.write_text( + json.dumps(sample_user_entry.model_dump()), encoding="utf-8" + ) + cache_manager.save_cached_entries(jsonl_file, [sample_user_entry]) + + # Verify cache is valid + assert cache_manager.is_file_cached(jsonl_file) is True + + # Wait and touch file to change mtime + time.sleep(1.1) + jsonl_file.write_text( + json.dumps(sample_user_entry.model_dump()) + "\n", encoding="utf-8" + ) + + # Cache should be invalidated + assert cache_manager.is_file_cached(jsonl_file) is False + + +class TestMigrationIntegrity: + """Tests for migration system integrity.""" + + def test_migration_checksum_stored(self, temp_project_dir): + """Migration checksums are stored in _schema_version.""" + cache_manager = CacheManager(temp_project_dir, "1.0.0") + + with cache_manager._get_connection() as conn: + rows = conn.execute( + "SELECT version, filename, checksum FROM _schema_version" + ).fetchall() + + assert len(rows) >= 1 + for row in rows: + assert row["version"] > 0 + assert row["filename"].endswith(".sql") + assert len(row["checksum"]) == 64 # SHA256 hex length + + def test_migration_applied_only_once(self, temp_project_dir): + """Migrations are not re-applied on subsequent runs.""" + # First run + cm1 = CacheManager(temp_project_dir, "1.0.0") + + with cm1._get_connection() as conn: + initial_count = conn.execute( + "SELECT COUNT(*) FROM _schema_version" + ).fetchone()[0] + + # Second run + cm2 = CacheManager(temp_project_dir, "1.0.0") + + with cm2._get_connection() as conn: + final_count = conn.execute( + "SELECT COUNT(*) FROM _schema_version" + ).fetchone()[0] + + assert initial_count == final_count diff --git a/test/test_html_regeneration.py b/test/test_html_regeneration.py index 26da5d32..a0ade6c3 100644 --- a/test/test_html_regeneration.py +++ b/test/test_html_regeneration.py @@ -48,9 +48,13 @@ def test_combined_transcript_regeneration_on_jsonl_change(self, tmp_path): # Second run: No changes, should skip regeneration with patch("builtins.print") as mock_print: convert_jsonl_to_html(project_dir) - mock_print.assert_any_call( - "HTML file combined_transcripts.html is current, skipping regeneration" + # Check for either the per-file skip message (legacy) or the early-exit message (new) + printed_calls = [str(call) for call in mock_print.call_args_list] + skip_found = any( + "skipping regeneration" in call or "All HTML files are current" in call + for call in printed_calls ) + assert skip_found, f"Expected skip message, got: {printed_calls}" # Verify file wasn't regenerated assert output_file.stat().st_mtime == original_mtime @@ -101,15 +105,13 @@ def test_individual_session_regeneration_on_jsonl_change(self, tmp_path): # Second run: No changes, should skip regeneration with patch("builtins.print") as mock_print: convert_jsonl_to_html(project_dir, generate_individual_sessions=True) - # Check that session file regeneration was skipped + # Check for either per-file skip message (legacy) or early-exit message (new) printed_calls = [str(call) for call in mock_print.call_args_list] - session_skip_found = any( - "Session file" in call and "skipping regeneration" in call + skip_found = any( + "skipping regeneration" in call or "All HTML files are current" in call for call in printed_calls ) - assert session_skip_found, ( - f"Expected session skip message, got: {printed_calls}" - ) + assert skip_found, f"Expected skip message, got: {printed_calls}" # Verify file wasn't regenerated assert session_file.stat().st_mtime == original_mtime @@ -170,7 +172,7 @@ def test_projects_index_regeneration_on_jsonl_change(self, tmp_path): # Second run: No changes, should skip regeneration with patch("builtins.print") as mock_print: - process_projects_hierarchy(projects_dir) + process_projects_hierarchy(projects_dir, silent=False) mock_print.assert_any_call("Index HTML is current, skipping regeneration") # Verify file wasn't regenerated @@ -314,3 +316,177 @@ def test_single_file_mode_regeneration_behavior(self, tmp_path): # Verify file wasn't regenerated (this is expected behavior for single file mode) assert output_file.stat().st_mtime == original_mtime + + +class TestIncrementalHtmlCache: + """Test incremental HTML regeneration using html_cache table.""" + + def test_html_cache_tracks_generation(self, tmp_path): + """Test that html_cache table correctly tracks when HTML was generated.""" + # Setup project with JSONL + project_dir = tmp_path / "test_project" + project_dir.mkdir() + + test_data_dir = Path(__file__).parent / "test_data" + jsonl_file = project_dir / "test.jsonl" + jsonl_file.write_text( + (test_data_dir / "representative_messages.jsonl").read_text( + encoding="utf-8" + ), + encoding="utf-8", + ) + + # Generate HTML + convert_jsonl_to_html(project_dir, generate_individual_sessions=True) + + # Check that html_cache has entries + cache_manager = CacheManager(project_dir, get_library_version()) + combined_cache = cache_manager.get_html_cache("combined_transcripts.html") + + assert combined_cache is not None + assert combined_cache.html_path == "combined_transcripts.html" + assert combined_cache.source_session_id is None # Combined has no session + assert combined_cache.message_count > 0 + + # Check session HTML cache + session_files = list(project_dir.glob("session-*.html")) + assert len(session_files) >= 1 + + session_file = session_files[0] + session_id = session_file.stem.replace("session-", "") + session_cache = cache_manager.get_html_cache(session_file.name) + + assert session_cache is not None + assert session_cache.source_session_id == session_id + assert session_cache.message_count > 0 + + def test_incremental_regeneration_only_updates_changed_sessions(self, tmp_path): + """Test that only sessions with changes are regenerated.""" + # Setup project with two JSONL files (different sessions) + project_dir = tmp_path / "test_project" + project_dir.mkdir() + + # Create two sessions in separate files + session1 = ( + '{"type":"user","timestamp":"2025-01-01T10:00:00Z","parentUuid":null,' + '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"session1",' + '"version":"1.0.0","uuid":"msg1","message":{"role":"user",' + '"content":[{"type":"text","text":"Session 1 message"}]}}\n' + ) + session2 = ( + '{"type":"user","timestamp":"2025-01-01T11:00:00Z","parentUuid":null,' + '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"session2",' + '"version":"1.0.0","uuid":"msg2","message":{"role":"user",' + '"content":[{"type":"text","text":"Session 2 message"}]}}\n' + ) + + (project_dir / "session1.jsonl").write_text(session1, encoding="utf-8") + (project_dir / "session2.jsonl").write_text(session2, encoding="utf-8") + + # First run: Generate all HTML + convert_jsonl_to_html(project_dir, generate_individual_sessions=True) + + session1_html = project_dir / "session-session1.html" + session2_html = project_dir / "session-session2.html" + assert session1_html.exists() + assert session2_html.exists() + + session1_mtime = session1_html.stat().st_mtime + session2_mtime = session2_html.stat().st_mtime + + # Wait and modify only session1 + time.sleep(1.1) # Ensure > 1.0 second difference + new_msg = ( + '{"type":"user","timestamp":"2025-01-01T10:05:00Z","parentUuid":"msg1",' + '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"session1",' + '"version":"1.0.0","uuid":"msg1b","message":{"role":"user",' + '"content":[{"type":"text","text":"New message in session 1"}]}}\n' + ) + with open(project_dir / "session1.jsonl", "a", encoding="utf-8") as f: + f.write(new_msg) + + # Second run: Should only regenerate session1 + convert_jsonl_to_html(project_dir, generate_individual_sessions=True) + + # Session 1 should be regenerated (newer mtime) + assert session1_html.stat().st_mtime > session1_mtime + + # Session 2 should NOT be regenerated (same mtime) + assert session2_html.stat().st_mtime == session2_mtime + + def test_html_cache_detects_library_version_change(self, tmp_path): + """Test that HTML is regenerated when library version changes.""" + # Setup project + project_dir = tmp_path / "test_project" + project_dir.mkdir() + + test_data_dir = Path(__file__).parent / "test_data" + jsonl_file = project_dir / "test.jsonl" + jsonl_file.write_text( + (test_data_dir / "representative_messages.jsonl").read_text( + encoding="utf-8" + ), + encoding="utf-8", + ) + + # Generate HTML with current version + convert_jsonl_to_html(project_dir) + + cache_manager = CacheManager(project_dir, get_library_version()) + + # Check staleness with same version + is_stale, reason = cache_manager.is_html_stale("combined_transcripts.html") + assert not is_stale + assert reason == "up_to_date" + + # Create new cache manager with different version + cache_manager_new = CacheManager(project_dir, "999.999.999") + is_stale, reason = cache_manager_new.is_html_stale("combined_transcripts.html") + assert is_stale + assert reason == "version_mismatch" + + def test_html_cache_message_count_validation(self, tmp_path): + """Test that session HTML regenerates when message count changes.""" + # Setup project + project_dir = tmp_path / "test_project" + project_dir.mkdir() + + session_msg = ( + '{"type":"user","timestamp":"2025-01-01T10:00:00Z","parentUuid":null,' + '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"test_session",' + '"version":"1.0.0","uuid":"msg1","message":{"role":"user",' + '"content":[{"type":"text","text":"Test message"}]}}\n' + ) + (project_dir / "test.jsonl").write_text(session_msg, encoding="utf-8") + + # Generate HTML + convert_jsonl_to_html(project_dir, generate_individual_sessions=True) + + cache_manager = CacheManager(project_dir, get_library_version()) + + # Check session is not stale + is_stale, reason = cache_manager.is_html_stale( + "session-test_session.html", "test_session" + ) + assert not is_stale + + # Add a new message (increases count) + time.sleep(1.1) + new_msg = ( + '{"type":"user","timestamp":"2025-01-01T10:01:00Z","parentUuid":"msg1",' + '"isSidechain":false,"userType":"human","cwd":"/tmp","sessionId":"test_session",' + '"version":"1.0.0","uuid":"msg2","message":{"role":"user",' + '"content":[{"type":"text","text":"Second message"}]}}\n' + ) + with open(project_dir / "test.jsonl", "a", encoding="utf-8") as f: + f.write(new_msg) + + # Update cache to reflect new message + ensure_fresh_cache(project_dir, cache_manager) + + # Now session should be stale (message count changed) + is_stale, reason = cache_manager.is_html_stale( + "session-test_session.html", "test_session" + ) + assert is_stale + assert reason == "session_updated" diff --git a/test/test_integration_realistic.py b/test/test_integration_realistic.py index 4431024c..1c051290 100644 --- a/test/test_integration_realistic.py +++ b/test/test_integration_realistic.py @@ -183,7 +183,9 @@ def test_projects_dont_merge_by_prefix(self, temp_projects_copy: Path) -> None: content=f"test message {suffix}", session_id=f"session{suffix}", ) - (project_dir / "test.jsonl").write_text(json.dumps(entry) + "\n") + (project_dir / "test.jsonl").write_text( + json.dumps(entry) + "\n", encoding="utf-8" + ) # Process all projects process_projects_hierarchy(temp_projects_copy) @@ -243,13 +245,9 @@ def test_clear_cache_with_projects_dir(self, temp_projects_copy: Path) -> None: ) assert result.exit_code == 0 - # Verify caches were created - cache_exists = False - for project_dir in temp_projects_copy.iterdir(): - if project_dir.is_dir() and (project_dir / "cache").exists(): - cache_exists = True - break - assert cache_exists, "Cache should exist after processing" + # Verify SQLite cache was created + cache_db = temp_projects_copy / "cache.db" + assert cache_db.exists(), "SQLite cache should exist after processing" # Clear caches result = runner.invoke( @@ -265,18 +263,8 @@ def test_clear_cache_with_projects_dir(self, temp_projects_copy: Path) -> None: assert result.exit_code == 0 assert "clear" in result.output.lower() - # Verify all cache files were actually deleted - remaining_cache_files: list[Path] = [] - for project_dir in temp_projects_copy.iterdir(): - if not project_dir.is_dir(): - continue - cache_dir = project_dir / "cache" - if cache_dir.exists(): - remaining_cache_files.extend(cache_dir.glob("*.json")) - - assert not remaining_cache_files, ( - f"Cache files should be deleted but found: {remaining_cache_files}" - ) + # Verify SQLite database was deleted + assert not cache_db.exists(), "SQLite cache database should be deleted" def test_clear_html_with_projects_dir(self, temp_projects_copy: Path) -> None: """Test HTML clearing with custom projects directory.""" @@ -430,17 +418,19 @@ def test_cache_creation_all_projects(self, temp_projects_copy: Path) -> None: """Test cache is created correctly for all projects.""" process_projects_hierarchy(temp_projects_copy) + # Verify SQLite cache database was created + cache_db = temp_projects_copy / "cache.db" + assert cache_db.exists(), "SQLite cache database should exist" + for project_dir in temp_projects_copy.iterdir(): if not project_dir.is_dir() or not list(project_dir.glob("*.jsonl")): continue - cache_file = project_dir / "cache" / "index.json" - assert cache_file.exists(), f"Cache index missing for {project_dir.name}" - - # Verify cache structure - cache_data = json.loads(cache_file.read_text()) - assert "version" in cache_data - assert "sessions" in cache_data + cache_manager = CacheManager(project_dir, get_library_version()) + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None, f"Cache missing for {project_dir.name}" + assert cached_data.version is not None + assert isinstance(cached_data.sessions, dict) def test_cache_invalidation_on_modification(self, temp_projects_copy: Path) -> None: """Test cache detects file modifications.""" @@ -459,12 +449,14 @@ def test_cache_invalidation_on_modification(self, temp_projects_copy: Path) -> N # Modify a file test_file = jsonl_files[0] - original_content = test_file.read_text() + original_content = test_file.read_text(encoding="utf-8") entry = make_valid_user_entry( content="test modification", session_id="test-modification", ) - test_file.write_text(original_content + "\n" + json.dumps(entry) + "\n") + test_file.write_text( + original_content + "\n" + json.dumps(entry) + "\n", encoding="utf-8" + ) # Check if modification is detected modified = cache_manager.get_modified_files(list(project_dir.glob("*.jsonl"))) @@ -478,11 +470,11 @@ def test_cache_version_stored(self, temp_projects_copy: Path) -> None: convert_jsonl_to_html(project_dir) - cache_file = project_dir / "cache" / "index.json" - cache_data = json.loads(cache_file.read_text()) + cache_manager = CacheManager(project_dir, get_library_version()) + cached_data = cache_manager.get_cached_project_data() - assert "version" in cache_data - assert cache_data["version"] == get_library_version() + assert cached_data is not None + assert cached_data.version == get_library_version() @pytest.mark.integration @@ -506,7 +498,9 @@ def test_worktree_projects_stay_separate(self, temp_projects_copy: Path) -> None content=f"worktree test {suffix}", session_id=f"session{suffix}", ) - (project_dir / "test.jsonl").write_text(json.dumps(entry) + "\n") + (project_dir / "test.jsonl").write_text( + json.dumps(entry) + "\n", encoding="utf-8" + ) # Process all process_projects_hierarchy(temp_projects_copy) @@ -671,15 +665,15 @@ def test_cache_version_mismatch_triggers_rebuild( pytest.skip("Cache not generated by fixture") # Corrupt version in cache - cache_data = json.loads(cache_index.read_text()) + cache_data = json.loads(cache_index.read_text(encoding="utf-8")) cache_data["version"] = "0.0.0-fake" - cache_index.write_text(json.dumps(cache_data)) + cache_index.write_text(json.dumps(cache_data), encoding="utf-8") # Process should rebuild cache convert_jsonl_to_html(project) # Cache should have correct version now - new_cache_data = json.loads(cache_index.read_text()) + new_cache_data = json.loads(cache_index.read_text(encoding="utf-8")) assert new_cache_data["version"] == get_library_version() def test_missing_cache_files_regenerated(self, projects_with_cache: Path) -> None: @@ -761,7 +755,7 @@ def test_adding_lines_triggers_cache_update( content="New message added", session_id="test-incremental", ) - with open(jsonl_file, "a") as f: + with open(jsonl_file, "a", encoding="utf-8") as f: f.write("\n" + json.dumps(entry) + "\n") time.sleep(0.01) @@ -796,7 +790,7 @@ def test_adding_lines_triggers_html_regeneration( content="Another new message", session_id="test-incremental", ) - with open(jsonl_file, "a") as f: + with open(jsonl_file, "a", encoding="utf-8") as f: f.write("\n" + json.dumps(entry) + "\n") time.sleep(0.01) @@ -825,7 +819,7 @@ def test_new_content_appears_in_html(self, projects_with_cache: Path) -> None: content=unique_content, session_id="test-content-check", ) - with open(jsonl_file, "a") as f: + with open(jsonl_file, "a", encoding="utf-8") as f: f.write("\n" + json.dumps(entry) + "\n") convert_jsonl_to_html(project) @@ -848,7 +842,7 @@ def test_new_file_detected_and_processed(self, projects_with_cache: Path) -> Non if not cache_index.exists(): pytest.skip("Cache not generated by fixture") - original_cache = json.loads(cache_index.read_text()) + original_cache = json.loads(cache_index.read_text(encoding="utf-8")) original_session_count = len(original_cache.get("sessions", {})) # Add new JSONL file @@ -857,12 +851,12 @@ def test_new_file_detected_and_processed(self, projects_with_cache: Path) -> Non content="First message in new file", session_id="brand-new-session", ) - new_file.write_text(json.dumps(entry) + "\n") + new_file.write_text(json.dumps(entry) + "\n", encoding="utf-8") convert_jsonl_to_html(project) # Cache should include new session - new_cache = json.loads(cache_index.read_text()) + new_cache = json.loads(cache_index.read_text(encoding="utf-8")) assert len(new_cache.get("sessions", {})) > original_session_count def test_new_session_html_generated(self, projects_with_cache: Path) -> None: @@ -877,7 +871,7 @@ def test_new_session_html_generated(self, projects_with_cache: Path) -> None: content="Message for new session", session_id=new_session_id, ) - new_file.write_text(json.dumps(entry) + "\n") + new_file.write_text(json.dumps(entry) + "\n", encoding="utf-8") convert_jsonl_to_html(project) @@ -905,7 +899,7 @@ def test_index_html_updated_with_new_project_stats( content="Extra session message", session_id="extra-session", ) - new_file.write_text(json.dumps(entry) + "\n") + new_file.write_text(json.dumps(entry) + "\n", encoding="utf-8") time.sleep(0.01) @@ -963,7 +957,7 @@ def test_output_overwrites_existing(self, temp_projects_copy: Path) -> None: pytest.skip("JSSoundRecorder test data not available") custom_output = temp_projects_copy / "overwrite_test.html" - custom_output.write_text("original content") + custom_output.write_text("original content", encoding="utf-8") result = runner.invoke(main, [str(project), "-o", str(custom_output)]) @@ -1127,7 +1121,7 @@ def test_index_regenerated_when_project_cache_updates( content="Trigger index update", session_id="index-test", ) - with open(jsonl_file, "a") as f: + with open(jsonl_file, "a", encoding="utf-8") as f: f.write(json.dumps(entry) + "\n") time.sleep(0.01) @@ -1198,13 +1192,13 @@ def test_corrupted_cache_index_handled(self, projects_with_cache: Path) -> None: pytest.skip("Cache not generated by fixture") # Corrupt the cache index - cache_index.write_text("{invalid json") + cache_index.write_text("{invalid json", encoding="utf-8") # Should recover and reprocess convert_jsonl_to_html(project) # Cache should be valid again - cache_data = json.loads(cache_index.read_text()) + cache_data = json.loads(cache_index.read_text(encoding="utf-8")) assert "version" in cache_data def test_missing_cache_directory_handled(self, projects_with_cache: Path) -> None: @@ -1510,7 +1504,7 @@ def test_project_with_many_sessions(self, temp_projects_copy: Path) -> None: session_id=f"stress-{i}", timestamp=f"2024-12-{10 + i % 20:02d}T10:00:00Z", ) - session_file.write_text(json.dumps(entry) + "\n") + session_file.write_text(json.dumps(entry) + "\n", encoding="utf-8") # Should handle many files without error convert_jsonl_to_html(project) @@ -1526,7 +1520,7 @@ def test_large_single_session(self, temp_projects_copy: Path) -> None: large_file = project / "large-session.jsonl" # Create file with 500 messages - with open(large_file, "w") as f: + with open(large_file, "w", encoding="utf-8") as f: for i in range(500): timestamp = f"2024-12-15T{10 + (i // 60):02d}:{i % 60:02d}:00Z" content = f"Message number {i} with some content" diff --git a/test/test_pagination.py b/test/test_pagination.py new file mode 100644 index 00000000..44db8b0c --- /dev/null +++ b/test/test_pagination.py @@ -0,0 +1,550 @@ +#!/usr/bin/env python3 +"""Tests for pagination functionality.""" + +import json +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pytest + +from claude_code_log.cache import ( + CacheManager, + SessionCacheData, +) +from claude_code_log.converter import ( + _get_page_html_path, + _assign_sessions_to_pages, +) + + +class TestPageHtmlPath: + """Tests for _get_page_html_path function.""" + + def test_page_1_returns_base_filename(self): + """Page 1 should return combined_transcripts.html.""" + assert _get_page_html_path(1) == "combined_transcripts.html" + + def test_page_2_returns_numbered_filename(self): + """Page 2 should return combined_transcripts_2.html.""" + assert _get_page_html_path(2) == "combined_transcripts_2.html" + + def test_page_10_returns_numbered_filename(self): + """Page 10 should return combined_transcripts_10.html.""" + assert _get_page_html_path(10) == "combined_transcripts_10.html" + + +class TestAssignSessionsToPages: + """Tests for _assign_sessions_to_pages function.""" + + def _make_session( + self, session_id: str, message_count: int, timestamp: str + ) -> SessionCacheData: + """Helper to create a SessionCacheData instance.""" + return SessionCacheData( + session_id=session_id, + message_count=message_count, + first_timestamp=timestamp, + last_timestamp=timestamp, + first_user_message="Test message", + total_input_tokens=0, + total_output_tokens=0, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + + def test_single_session_below_threshold(self): + """Single session below page_size should result in one page.""" + sessions = { + "s1": self._make_session("s1", 100, "2023-01-01T10:00:00Z"), + } + pages = _assign_sessions_to_pages(sessions, page_size=5000) + + assert len(pages) == 1 + assert pages[0] == ["s1"] + + def test_multiple_sessions_below_threshold(self): + """Multiple sessions below page_size should be on one page.""" + sessions = { + "s1": self._make_session("s1", 1000, "2023-01-01T10:00:00Z"), + "s2": self._make_session("s2", 2000, "2023-01-02T10:00:00Z"), + "s3": self._make_session("s3", 1500, "2023-01-03T10:00:00Z"), + } + pages = _assign_sessions_to_pages(sessions, page_size=5000) + + assert len(pages) == 1 + assert sorted(pages[0]) == ["s1", "s2", "s3"] + + def test_session_exceeds_threshold_creates_new_page(self): + """When adding a session exceeds threshold, it becomes last on current page.""" + sessions = { + "s1": self._make_session("s1", 3000, "2023-01-01T10:00:00Z"), + "s2": self._make_session("s2", 3000, "2023-01-02T10:00:00Z"), + "s3": self._make_session("s3", 2000, "2023-01-03T10:00:00Z"), + } + pages = _assign_sessions_to_pages(sessions, page_size=5000) + + # s1 (3000) + s2 (3000) > 5000, so s2 becomes last on page 1 + # s3 (2000) goes to page 2 + assert len(pages) == 2 + assert pages[0] == ["s1", "s2"] + assert pages[1] == ["s3"] + + def test_large_session_allows_overflow(self): + """A single large session is allowed to exceed page_size (no splitting).""" + sessions = { + "s1": self._make_session("s1", 10000, "2023-01-01T10:00:00Z"), + } + pages = _assign_sessions_to_pages(sessions, page_size=5000) + + # Single session, even if large, stays on one page + assert len(pages) == 1 + assert pages[0] == ["s1"] + + def test_sessions_sorted_chronologically(self): + """Sessions should be assigned to pages in chronological order.""" + sessions = { + "s3": self._make_session("s3", 1000, "2023-01-03T10:00:00Z"), + "s1": self._make_session("s1", 1000, "2023-01-01T10:00:00Z"), + "s2": self._make_session("s2", 1000, "2023-01-02T10:00:00Z"), + } + pages = _assign_sessions_to_pages(sessions, page_size=5000) + + assert len(pages) == 1 + # Should be in chronological order + assert pages[0] == ["s1", "s2", "s3"] + + def test_multiple_pages_with_overflow(self): + """Test complex pagination with multiple pages.""" + sessions = { + "s1": self._make_session("s1", 2000, "2023-01-01T10:00:00Z"), + "s2": self._make_session("s2", 4000, "2023-01-02T10:00:00Z"), # exceeds + "s3": self._make_session("s3", 3000, "2023-01-03T10:00:00Z"), + "s4": self._make_session("s4", 3000, "2023-01-04T10:00:00Z"), # exceeds + "s5": self._make_session("s5", 1000, "2023-01-05T10:00:00Z"), + } + pages = _assign_sessions_to_pages(sessions, page_size=5000) + + # s1 (2000) + s2 (4000) > 5000, s2 last on page 1 + # s3 (3000) + s4 (3000) > 5000, s4 last on page 2 + # s5 (1000) on page 3 + assert len(pages) == 3 + assert pages[0] == ["s1", "s2"] + assert pages[1] == ["s3", "s4"] + assert pages[2] == ["s5"] + + def test_empty_sessions(self): + """Empty sessions dict should return empty list.""" + pages = _assign_sessions_to_pages({}, page_size=5000) + assert pages == [] + + +@pytest.fixture +def temp_project_dir(): + """Create a temporary project directory for testing.""" + with tempfile.TemporaryDirectory() as temp_dir: + yield Path(temp_dir) + + +@pytest.fixture +def mock_version(): + """Mock library version for consistent testing.""" + return "1.0.0-test" + + +@pytest.fixture +def cache_manager(temp_project_dir, mock_version): + """Create a cache manager for testing.""" + with patch("claude_code_log.cache.get_library_version", return_value=mock_version): + return CacheManager(temp_project_dir, mock_version) + + +class TestPageCacheMethods: + """Tests for page cache methods in CacheManager.""" + + def test_get_page_count_empty(self, cache_manager): + """get_page_count should return 0 when no pages exist.""" + assert cache_manager.get_page_count() == 0 + + def test_get_page_size_config_empty(self, cache_manager): + """get_page_size_config should return None when no pages exist.""" + assert cache_manager.get_page_size_config() is None + + def test_update_and_get_page_cache(self, cache_manager): + """Test updating and retrieving page cache data.""" + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1", "s2"], + message_count=3000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-02T10:00:00Z", + total_input_tokens=1000, + total_output_tokens=500, + total_cache_creation_tokens=200, + total_cache_read_tokens=100, + ) + + page_data = cache_manager.get_page_data(1) + assert page_data is not None + assert page_data.page_number == 1 + assert page_data.html_path == "combined_transcripts.html" + assert page_data.page_size_config == 5000 + assert page_data.session_ids == ["s1", "s2"] + assert page_data.message_count == 3000 + assert page_data.first_timestamp == "2023-01-01T10:00:00Z" + assert page_data.last_timestamp == "2023-01-02T10:00:00Z" + assert page_data.total_input_tokens == 1000 + assert page_data.total_output_tokens == 500 + + def test_get_page_count_after_adding_pages(self, cache_manager): + """get_page_count should return correct count after adding pages.""" + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + cache_manager.update_page_cache( + page_number=2, + html_path="combined_transcripts_2.html", + page_size_config=5000, + session_ids=["s2"], + message_count=2000, + first_timestamp="2023-01-02T10:00:00Z", + last_timestamp="2023-01-02T11:00:00Z", + total_input_tokens=200, + total_output_tokens=100, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + + assert cache_manager.get_page_count() == 2 + + def test_get_page_size_config_after_adding_page(self, cache_manager): + """get_page_size_config should return the configured page size.""" + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + + assert cache_manager.get_page_size_config() == 5000 + + def test_is_page_stale_no_cache(self, cache_manager): + """is_page_stale should return True when page not in cache.""" + is_stale, reason = cache_manager.is_page_stale(1, 5000) + assert is_stale is True + assert "not_cached" in reason or "not in cache" in reason.lower() + + def test_is_page_stale_page_size_changed(self, cache_manager): + """is_page_stale should return True when page_size changed.""" + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + + is_stale, reason = cache_manager.is_page_stale(1, 10000) # Different page_size + assert is_stale is True + assert "page_size" in reason.lower() or "size" in reason.lower() + + def test_invalidate_all_pages(self, cache_manager): + """invalidate_all_pages should remove all page cache entries.""" + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + cache_manager.update_page_cache( + page_number=2, + html_path="combined_transcripts_2.html", + page_size_config=5000, + session_ids=["s2"], + message_count=2000, + first_timestamp="2023-01-02T10:00:00Z", + last_timestamp="2023-01-02T11:00:00Z", + total_input_tokens=200, + total_output_tokens=100, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + + old_paths = cache_manager.invalidate_all_pages() + + assert len(old_paths) == 2 + assert cache_manager.get_page_count() == 0 + assert cache_manager.get_page_data(1) is None + assert cache_manager.get_page_data(2) is None + + def test_get_all_pages(self, cache_manager): + """get_all_pages should return all page cache entries.""" + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + cache_manager.update_page_cache( + page_number=2, + html_path="combined_transcripts_2.html", + page_size_config=5000, + session_ids=["s2"], + message_count=2000, + first_timestamp="2023-01-02T10:00:00Z", + last_timestamp="2023-01-02T11:00:00Z", + total_input_tokens=200, + total_output_tokens=100, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + + all_pages = cache_manager.get_all_pages() + + assert len(all_pages) == 2 + assert all_pages[0].page_number == 1 + assert all_pages[1].page_number == 2 + + +# Integration tests for pagination with converter + + +def _create_session_messages(session_id: str, num_messages: int, base_timestamp: str): + """Helper to create messages for a session.""" + messages = [] + for i in range(num_messages): + # Alternate between user and assistant messages + if i % 2 == 0: + messages.append( + { + "type": "user", + "uuid": f"{session_id}-user-{i}", + "timestamp": f"{base_timestamp}T{10 + i // 60:02d}:{i % 60:02d}:00Z", + "sessionId": session_id, + "version": "1.0.0", + "parentUuid": None, + "isSidechain": False, + "userType": "user", + "cwd": "/test", + "message": {"role": "user", "content": f"Message {i} from user"}, + } + ) + else: + messages.append( + { + "type": "assistant", + "uuid": f"{session_id}-assistant-{i}", + "timestamp": f"{base_timestamp}T{10 + i // 60:02d}:{i % 60:02d}:00Z", + "sessionId": session_id, + "version": "1.0.0", + "parentUuid": None, + "isSidechain": False, + "userType": "assistant", + "cwd": "/test", + "requestId": f"req-{session_id}-{i}", + "message": { + "id": f"msg-{session_id}-{i}", + "type": "message", + "role": "assistant", + "model": "claude-3", + "content": [{"type": "text", "text": f"Response {i}"}], + "usage": {"input_tokens": 10, "output_tokens": 15}, + }, + } + ) + return messages + + +class TestPaginationIntegration: + """Integration tests for pagination with the converter.""" + + def test_small_project_no_pagination(self, temp_project_dir): + """Projects below page_size should create single combined file.""" + from claude_code_log.converter import convert_jsonl_to_html + + # Create a project with 50 messages (below default 5000) + jsonl_file = temp_project_dir / "session1.jsonl" + messages = _create_session_messages("session1", 50, "2023-01-01") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + # Convert with default page_size + output = convert_jsonl_to_html(temp_project_dir, page_size=5000, silent=True) + + # Should create single combined file + assert output.name == "combined_transcripts.html" + assert (temp_project_dir / "combined_transcripts.html").exists() + assert not (temp_project_dir / "combined_transcripts_2.html").exists() + + def test_large_project_creates_multiple_pages(self, temp_project_dir): + """Projects above page_size should create multiple page files.""" + from claude_code_log.converter import convert_jsonl_to_html + + # Create multiple sessions totaling > 30 messages with page_size=10 + for i, session_id in enumerate( + ["session1", "session2", "session3", "session4"] + ): + jsonl_file = temp_project_dir / f"{session_id}.jsonl" + messages = _create_session_messages(session_id, 15, f"2023-01-0{i + 1}") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + # Convert with small page_size to force pagination + output = convert_jsonl_to_html(temp_project_dir, page_size=20, silent=True) + + # Should create multiple page files + assert output.name == "combined_transcripts.html" + assert (temp_project_dir / "combined_transcripts.html").exists() + # With 4 sessions x 15 messages = 60 messages, page_size=20 + # Should create at least 2 pages + assert (temp_project_dir / "combined_transcripts_2.html").exists() + + def test_page_size_change_regenerates_all(self, temp_project_dir): + """Changing page_size should regenerate all pages.""" + from claude_code_log.converter import convert_jsonl_to_html + from claude_code_log.cache import CacheManager, get_library_version + + # Create sessions + for i, session_id in enumerate(["session1", "session2", "session3"]): + jsonl_file = temp_project_dir / f"{session_id}.jsonl" + messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + # First conversion with page_size=30 + convert_jsonl_to_html(temp_project_dir, page_size=30, silent=True) + + # Check cache has page_size=30 + cache_manager = CacheManager(temp_project_dir, get_library_version()) + assert cache_manager.get_page_size_config() == 30 + + # Second conversion with different page_size=25 + convert_jsonl_to_html(temp_project_dir, page_size=25, silent=True) + + # Cache should now have page_size=25 + cache_manager2 = CacheManager(temp_project_dir, get_library_version()) + assert cache_manager2.get_page_size_config() == 25 + + def test_pagination_with_very_small_page_size(self, temp_project_dir): + """Test pagination with very small page size respects session boundaries.""" + from claude_code_log.converter import convert_jsonl_to_html + + # Create 4 sessions with 10 messages each + for i, session_id in enumerate(["s1", "s2", "s3", "s4"]): + jsonl_file = temp_project_dir / f"{session_id}.jsonl" + messages = _create_session_messages(session_id, 10, f"2023-01-0{i + 1}") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + # Convert with tiny page_size=5 (each session has 10 messages) + # New simpler pagination logic: + # - Add session, then check if page > limit + # - If over, close page immediately + # s1: add, count=10 > 5 -> page 1 = [s1] + # s2: add, count=10 > 5 -> page 2 = [s2] + # s3: add, count=10 > 5 -> page 3 = [s3] + # s4: add, count=10 > 5 -> page 4 = [s4] + convert_jsonl_to_html(temp_project_dir, page_size=5, silent=True) + + # Should create 4 pages (one per session, each exceeds threshold) + assert (temp_project_dir / "combined_transcripts.html").exists() + assert (temp_project_dir / "combined_transcripts_2.html").exists() + assert (temp_project_dir / "combined_transcripts_3.html").exists() + assert (temp_project_dir / "combined_transcripts_4.html").exists() + + def test_pagination_html_contains_navigation(self, temp_project_dir): + """Paginated pages should contain navigation links.""" + from claude_code_log.converter import convert_jsonl_to_html + + # Create 4 sessions that will span multiple pages + # With page_size=15 and sessions of 10 messages: + # s1 (10): page empty, add s1 (count=10) + # s2 (10): 10+10 > 15 and page not empty -> s2 becomes last, page 1 = [s1, s2] + # s3 (10): page empty, add s3 (count=10) + # s4 (10): 10+10 > 15 and page not empty -> s4 becomes last, page 2 = [s3, s4] + for i, session_id in enumerate(["s1", "s2", "s3", "s4"]): + jsonl_file = temp_project_dir / f"{session_id}.jsonl" + messages = _create_session_messages(session_id, 10, f"2023-01-0{i + 1}") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True) + + # Check page 1 has Next link (pre-enabled when page exceeds threshold) + page1_content = (temp_project_dir / "combined_transcripts.html").read_text( + encoding="utf-8" + ) + assert "Next" in page1_content or "combined_transcripts_2.html" in page1_content + + # Check page 2 has Previous link + page2_content = (temp_project_dir / "combined_transcripts_2.html").read_text( + encoding="utf-8" + ) + assert ( + "Previous" in page2_content or "combined_transcripts.html" in page2_content + ) + + def test_page_contains_stats(self, temp_project_dir): + """Paginated pages should contain stats (message count, date range).""" + from claude_code_log.converter import convert_jsonl_to_html + + # Create sessions + for i, session_id in enumerate(["s1", "s2"]): + jsonl_file = temp_project_dir / f"{session_id}.jsonl" + messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True) + + # Check page contains stats + page1_content = (temp_project_dir / "combined_transcripts.html").read_text( + encoding="utf-8" + ) + assert "messages" in page1_content.lower() + assert "Page 1" in page1_content or "page-navigation" in page1_content diff --git a/test/test_performance.py b/test/test_performance.py index 8f9541b3..f813e9a3 100644 --- a/test/test_performance.py +++ b/test/test_performance.py @@ -206,7 +206,7 @@ def test_benchmark_all_projects(self, real_projects_path: Path) -> None: def _write_github_summary(self, summary_path: str, results: List[dict]) -> None: """Write benchmark results to GitHub Actions Job Summary.""" - with open(summary_path, "a") as f: + with open(summary_path, "a", encoding="utf-8") as f: f.write("\n## 📊 Performance Benchmark Results\n\n") f.write( "| Project | Messages | Input (MB) | Output (MB) | " diff --git a/test/test_project_display_name.py b/test/test_project_display_name.py index e4e60560..ae092294 100644 --- a/test/test_project_display_name.py +++ b/test/test_project_display_name.py @@ -73,15 +73,6 @@ def test_empty_working_directories(self): # Should fall back to path conversion assert result == "Users/dain/workspace/platform/frontend/next" - def test_none_working_directories(self): - """Test fallback when working directories is None.""" - project_dir_name = "-Users-dain-workspace-platform-frontend-next" - working_directories = None - - result = get_project_display_name(project_dir_name, working_directories) - # Should fall back to path conversion - assert result == "Users/dain/workspace/platform/frontend/next" - def test_single_working_directory(self): """Test with a single working directory.""" project_dir_name = "-Users-dain-workspace-simple-project" @@ -123,3 +114,27 @@ def test_working_directories_same_name_different_paths(self): result = get_project_display_name(project_dir_name, working_directories) # Should pick the root directory assert result == "shared-names" + + def test_tmp_paths_filtered_out(self): + """Test that temporary paths (pytest, macOS temp) are filtered out.""" + project_dir_name = "-tmp-pytest-123-test_foo0" + working_directories = [ + "/private/var/folders/4n/2f7pppjd2_n0fftzg8vrlg040000gn/T/pytest-91/test_foo0", + "/Users/dain/workspace/real-project", + ] + + result = get_project_display_name(project_dir_name, working_directories) + # Should use the real project, not the pytest temp dir + assert result == "real-project" + + def test_only_tmp_paths_falls_back(self): + """Test fallback when all working directories are tmp paths.""" + project_dir_name = "-tmp-pytest-123-test_foo0" + working_directories = [ + "/private/var/folders/4n/test", + "/tmp/pytest-91/test_foo0", + ] + + result = get_project_display_name(project_dir_name, working_directories) + # Should fall back to converting project directory name + assert result == "tmp/pytest/123/test_foo0" diff --git a/test/test_project_matching.py b/test/test_project_matching.py index c0283e70..86a73548 100644 --- a/test/test_project_matching.py +++ b/test/test_project_matching.py @@ -26,27 +26,20 @@ def test_find_projects_by_cwd_with_cache(self): (project1 / "test1.jsonl").touch() (project2 / "test2.jsonl").touch() - # Mock cache data for projects - mock_cache1 = Mock() - mock_cache1.working_directories = ["/Users/test/workspace/myproject"] - - mock_cache2 = Mock() - mock_cache2.working_directories = ["/Users/test/other/project"] - with patch("claude_code_log.cli.CacheManager") as mock_cache_manager: def cache_side_effect(project_dir, version): cache_instance = Mock() if project_dir == project1: - cache_instance.get_cached_project_data.return_value = ( - mock_cache1 - ) + cache_instance.get_working_directories.return_value = [ + "/Users/test/workspace/myproject" + ] elif project_dir == project2: - cache_instance.get_cached_project_data.return_value = ( - mock_cache2 - ) + cache_instance.get_working_directories.return_value = [ + "/Users/test/other/project" + ] else: - cache_instance.get_cached_project_data.return_value = None + cache_instance.get_working_directories.return_value = [] return cache_instance mock_cache_manager.side_effect = cache_side_effect @@ -74,20 +67,16 @@ def test_find_projects_by_cwd_subdirectory_matching(self): project1.mkdir() (project1 / "test1.jsonl").touch() - # Mock cache data with parent directory - mock_cache1 = Mock() - mock_cache1.working_directories = ["/Users/test/workspace/myproject"] - with patch("claude_code_log.cli.CacheManager") as mock_cache_manager: def cache_side_effect(project_dir, version): cache_instance = Mock() if project_dir == project1: - cache_instance.get_cached_project_data.return_value = ( - mock_cache1 - ) + cache_instance.get_working_directories.return_value = [ + "/Users/test/workspace/myproject" + ] else: - cache_instance.get_cached_project_data.return_value = None + cache_instance.get_working_directories.return_value = [] return cache_instance mock_cache_manager.side_effect = cache_side_effect diff --git a/test/test_sidechain_agents.py b/test/test_sidechain_agents.py index d0423b6d..195608ef 100644 --- a/test/test_sidechain_agents.py +++ b/test/test_sidechain_agents.py @@ -18,13 +18,19 @@ def test_agent_insertion(): # Write main transcript main_file = tmpdir_path / "main.jsonl" main_file.write_text( - (Path(__file__).parent / "test_data" / "sidechain_main.jsonl").read_text() + (Path(__file__).parent / "test_data" / "sidechain_main.jsonl").read_text( + encoding="utf-8" + ), + encoding="utf-8", ) # Write agent transcript (must match agentId in main file) agent_file = tmpdir_path / "agent-e1c84ba5.jsonl" agent_file.write_text( - (Path(__file__).parent / "test_data" / "sidechain_agent.jsonl").read_text() + (Path(__file__).parent / "test_data" / "sidechain_agent.jsonl").read_text( + encoding="utf-8" + ), + encoding="utf-8", ) # Load transcript with agent insertion (agent files discovered automatically) @@ -58,12 +64,18 @@ def test_deduplication_task_result_vs_sidechain(): # Write deduplication test data main_file = tmpdir_path / "main.jsonl" main_file.write_text( - (Path(__file__).parent / "test_data" / "dedup_main.jsonl").read_text() + (Path(__file__).parent / "test_data" / "dedup_main.jsonl").read_text( + encoding="utf-8" + ), + encoding="utf-8", ) agent_file = tmpdir_path / "agent-e1c84ba5.jsonl" agent_file.write_text( - (Path(__file__).parent / "test_data" / "dedup_agent.jsonl").read_text() + (Path(__file__).parent / "test_data" / "dedup_agent.jsonl").read_text( + encoding="utf-8" + ), + encoding="utf-8", ) # Load and render (agent files discovered automatically) @@ -91,13 +103,15 @@ def test_no_deduplication_when_content_different(): main_file.write_text( '{"parentUuid":null,"isSidechain":false,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"text","text":"Do something"}]},"uuid":"d-0","timestamp":"2025-01-15T12:00:00.000Z"}\n' '{"parentUuid":"d-0","isSidechain":false,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01test1","type":"message","role":"assistant","content":[{"type":"tool_use","id":"task-3","name":"Task","input":{"prompt":"Do it"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":10,"output_tokens":20}},"requestId":"req_01test1","type":"assistant","uuid":"d-1","timestamp":"2025-01-15T12:00:05.000Z"}\n' - '{"parentUuid":"d-1","isSidechain":false,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-3","content":"Done A"}]},"uuid":"d-2","timestamp":"2025-01-15T12:00:15.000Z","toolUseResult":{"agentId":"ghi789","content":"Done A"},"agentId":"ghi789"}\n' + '{"parentUuid":"d-1","isSidechain":false,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-3","content":"Done A"}]},"uuid":"d-2","timestamp":"2025-01-15T12:00:15.000Z","toolUseResult":{"agentId":"ghi789","content":"Done A"},"agentId":"ghi789"}\n', + encoding="utf-8", ) agent_file = tmpdir_path / "agent-ghi789.jsonl" agent_file.write_text( '{"parentUuid":null,"isSidechain":true,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","agentId":"ghi789","type":"user","message":{"role":"user","content":[{"type":"text","text":"Do it"}]},"uuid":"agent-d-0","timestamp":"2025-01-15T12:00:06.000Z"}\n' - '{"parentUuid":"agent-d-0","isSidechain":true,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","agentId":"ghi789","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01testagent1","type":"message","role":"assistant","content":[{"type":"text","text":"Done B"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":5,"output_tokens":10}},"requestId":"req_01testagent1","type":"assistant","uuid":"agent-d-1","timestamp":"2025-01-15T12:00:14.000Z"}\n' + '{"parentUuid":"agent-d-0","isSidechain":true,"userType":"external","cwd":"e:\\\\test","sessionId":"test-3","version":"2.0.46","gitBranch":"main","agentId":"ghi789","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01testagent1","type":"message","role":"assistant","content":[{"type":"text","text":"Done B"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":5,"output_tokens":10}},"requestId":"req_01testagent1","type":"assistant","uuid":"agent-d-1","timestamp":"2025-01-15T12:00:14.000Z"}\n', + encoding="utf-8", ) messages = load_transcript(main_file) @@ -116,12 +130,18 @@ def test_agent_messages_marked_as_sidechain(): main_file = tmpdir_path / "main.jsonl" main_file.write_text( - (Path(__file__).parent / "test_data" / "sidechain_main.jsonl").read_text() + (Path(__file__).parent / "test_data" / "sidechain_main.jsonl").read_text( + encoding="utf-8" + ), + encoding="utf-8", ) agent_file = tmpdir_path / "agent-e1c84ba5.jsonl" agent_file.write_text( - (Path(__file__).parent / "test_data" / "sidechain_agent.jsonl").read_text() + (Path(__file__).parent / "test_data" / "sidechain_agent.jsonl").read_text( + encoding="utf-8" + ), + encoding="utf-8", ) messages = load_transcript(main_file) @@ -153,7 +173,8 @@ def test_sidechain_tool_results_rendered(): main_file.write_text( '{"parentUuid":null,"isSidechain":false,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"text","text":"Search for files"}]},"uuid":"u-0","timestamp":"2025-01-15T14:00:00.000Z"}\n' '{"parentUuid":"u-0","isSidechain":false,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_main","type":"message","role":"assistant","content":[{"type":"tool_use","id":"task-glob","name":"Task","input":{"prompt":"Find test files"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":100,"output_tokens":50}},"requestId":"req_main","type":"assistant","uuid":"a-0","timestamp":"2025-01-15T14:00:05.000Z"}\n' - '{"parentUuid":"a-0","isSidechain":false,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-glob","content":"Found files: test.py"}]},"uuid":"u-1","timestamp":"2025-01-15T14:00:20.000Z","toolUseResult":{"agentId":"glob-agent","content":"Found files: test.py"},"agentId":"glob-agent"}\n' + '{"parentUuid":"a-0","isSidechain":false,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-glob","content":"Found files: test.py"}]},"uuid":"u-1","timestamp":"2025-01-15T14:00:20.000Z","toolUseResult":{"agentId":"glob-agent","content":"Found files: test.py"},"agentId":"glob-agent"}\n', + encoding="utf-8", ) # Create agent file with tool use (Glob) and its result @@ -166,7 +187,8 @@ def test_sidechain_tool_results_rendered(): # Tool result comes in a sidechain user message - THIS SHOULD BE RENDERED '{"parentUuid":"agent-a-0","isSidechain":true,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","agentId":"glob-agent","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"glob-123","content":"/workspace/test.py"}]},"uuid":"agent-u-1","timestamp":"2025-01-15T14:00:10.000Z"}\n' # Final assistant message - '{"parentUuid":"agent-u-1","isSidechain":true,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","agentId":"glob-agent","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_final","type":"message","role":"assistant","content":[{"type":"text","text":"Found files: test.py"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":60,"output_tokens":15}},"requestId":"req_agent_final","type":"assistant","uuid":"agent-a-1","timestamp":"2025-01-15T14:00:15.000Z"}\n' + '{"parentUuid":"agent-u-1","isSidechain":true,"userType":"external","cwd":"/workspace","sessionId":"test-tool-results","version":"2.0.46","gitBranch":"main","agentId":"glob-agent","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_final","type":"message","role":"assistant","content":[{"type":"text","text":"Found files: test.py"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":60,"output_tokens":15}},"requestId":"req_agent_final","type":"assistant","uuid":"agent-a-1","timestamp":"2025-01-15T14:00:15.000Z"}\n', + encoding="utf-8", ) messages = load_transcript(main_file) @@ -260,17 +282,20 @@ def test_multiple_agent_invocations(): '{"parentUuid":"d-0","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01","type":"message","role":"assistant","content":[{"type":"tool_use","id":"task-4a","name":"Task","input":{"prompt":"First task"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":100,"output_tokens":50}},"requestId":"req_01","type":"assistant","uuid":"d-1","timestamp":"2025-01-15T13:00:05.000Z"}\n' '{"parentUuid":"d-1","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-4a","content":"First done"}]},"uuid":"d-2","timestamp":"2025-01-15T13:00:15.000Z","toolUseResult":{"status":"completed","agentId":"first","content":[{"type":"text","text":"First done"}]},"agentId":"first"}\n' '{"parentUuid":"d-2","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_02","type":"message","role":"assistant","content":[{"type":"tool_use","id":"task-4b","name":"Task","input":{"prompt":"Second task"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":150,"output_tokens":60}},"requestId":"req_02","type":"assistant","uuid":"d-3","timestamp":"2025-01-15T13:00:20.000Z"}\n' - '{"parentUuid":"d-3","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-4b","content":"Second done"}]},"uuid":"d-4","timestamp":"2025-01-15T13:00:30.000Z","toolUseResult":{"status":"completed","agentId":"second","content":[{"type":"text","text":"Second done"}]},"agentId":"second"}\n' + '{"parentUuid":"d-3","isSidechain":false,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","type":"user","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"task-4b","content":"Second done"}]},"uuid":"d-4","timestamp":"2025-01-15T13:00:30.000Z","toolUseResult":{"status":"completed","agentId":"second","content":[{"type":"text","text":"Second done"}]},"agentId":"second"}\n', + encoding="utf-8", ) (tmpdir_path / "agent-first.jsonl").write_text( '{"parentUuid":null,"isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"first","type":"user","message":{"role":"user","content":[{"type":"text","text":"First task"}]},"uuid":"agent-d-0","timestamp":"2025-01-15T13:00:06.000Z"}\n' - '{"parentUuid":"agent-d-0","isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"first","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_01","type":"message","role":"assistant","content":[{"type":"text","text":"First done"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":50,"output_tokens":25}},"requestId":"req_agent_01","type":"assistant","uuid":"agent-d-1","timestamp":"2025-01-15T13:00:14.000Z"}\n' + '{"parentUuid":"agent-d-0","isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"first","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_01","type":"message","role":"assistant","content":[{"type":"text","text":"First done"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":50,"output_tokens":25}},"requestId":"req_agent_01","type":"assistant","uuid":"agent-d-1","timestamp":"2025-01-15T13:00:14.000Z"}\n', + encoding="utf-8", ) (tmpdir_path / "agent-second.jsonl").write_text( '{"parentUuid":null,"isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"second","type":"user","message":{"role":"user","content":[{"type":"text","text":"Second task"}]},"uuid":"agent2-d-0","timestamp":"2025-01-15T13:00:21.000Z"}\n' - '{"parentUuid":"agent2-d-0","isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"second","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_02","type":"message","role":"assistant","content":[{"type":"text","text":"Second done"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":55,"output_tokens":30}},"requestId":"req_agent_02","type":"assistant","uuid":"agent2-d-1","timestamp":"2025-01-15T13:00:29.000Z"}\n' + '{"parentUuid":"agent2-d-0","isSidechain":true,"userType":"external","cwd":"/workspace/test","sessionId":"test-4","version":"2.0.46","gitBranch":"main","agentId":"second","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_agent_02","type":"message","role":"assistant","content":[{"type":"text","text":"Second done"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":55,"output_tokens":30}},"requestId":"req_agent_02","type":"assistant","uuid":"agent2-d-1","timestamp":"2025-01-15T13:00:29.000Z"}\n', + encoding="utf-8", ) messages = load_transcript(main_file) diff --git a/test/test_tui.py b/test/test_tui.py index 1cdd007c..30856317 100644 --- a/test/test_tui.py +++ b/test/test_tui.py @@ -89,7 +89,7 @@ def temp_project_dir(): # Write test data to JSONL file jsonl_file = project_path / "test-transcript.jsonl" - with open(jsonl_file, "w") as f: + with open(jsonl_file, "w", encoding="utf-8") as f: for entry in test_data: f.write(json.dumps(entry) + "\n") @@ -797,7 +797,7 @@ def test_run_session_browser_not_directory(self, capsys, temp_project_dir): """Test running session browser with a file instead of directory.""" # Create a file test_file = temp_project_dir / "test.txt" - test_file.write_text("test") + test_file.write_text("test", encoding="utf-8") run_session_browser(test_file) From a534b79a5840e149250710c8ed2c5fdc95036c74 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sat, 3 Jan 2026 22:57:28 +0000 Subject: [PATCH 02/23] Fixes after rebase --- claude_code_log/cache.py | 8 ++-- claude_code_log/converter.py | 21 +++++++-- claude_code_log/html/renderer.py | 32 ++++++++++++- claude_code_log/tui.py | 2 +- claude_code_log/utils.py | 44 ++++++++++++------ .../__snapshots__/test_snapshot_markdown.ambr | 2 +- test/test_cache_sqlite_integrity.py | 45 ++++++++++++------- test/test_message_types.py | 39 ++++++++++++++++ 8 files changed, 154 insertions(+), 39 deletions(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 81522d80..d7546b52 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -11,6 +11,7 @@ from packaging import version from pydantic import BaseModel +from .factories import create_transcript_entry from .migrations.runner import run_migrations from .models import ( AssistantTranscriptEntry, @@ -19,7 +20,6 @@ SystemTranscriptEntry, TranscriptEntry, UserTranscriptEntry, - parse_transcript_entry, ) @@ -322,7 +322,7 @@ def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, An def _deserialize_entry(self, row: sqlite3.Row) -> TranscriptEntry: """Convert SQLite row back to TranscriptEntry.""" content_dict = json.loads(row["content"]) - return parse_transcript_entry(content_dict) + return create_transcript_entry(content_dict) def _get_file_id(self, jsonl_path: Path) -> Optional[int]: """Get the file ID for a JSONL file.""" @@ -692,7 +692,9 @@ def _is_cache_version_compatible(self, cache_version: str) -> bool: # Define compatibility rules breaking_changes: dict[str, str] = { - # Example: "0.3.3": "0.3.4" means cache from 0.3.3 needs invalidation if lib is >= 0.3.4 + # 0.9.0 introduced _compact_ide_tags_for_preview() which transforms + # first_user_message to use emoji indicators instead of raw IDE tags + "0.8.0": "0.9.0", } cache_ver = version.parse(cache_version) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index b56db8ed..52e5b4b6 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -6,7 +6,7 @@ from dataclasses import dataclass, field from pathlib import Path import traceback -from typing import Optional, Any, TYPE_CHECKING +from typing import Any, Dict, List, Optional, TYPE_CHECKING import dateparser @@ -31,7 +31,7 @@ UserTranscriptEntry, ToolResultContent, ) -from .renderer import get_renderer +from .renderer import get_renderer, is_html_outdated def get_file_extension(format: str) -> str: @@ -149,7 +149,16 @@ def load_transcript( messages: list[TranscriptEntry] = [] agent_ids: set[str] = set() # Collect agentId references while parsing - with open(jsonl_path, "r", encoding="utf-8", errors="replace") as f: + try: + f = open(jsonl_path, "r", encoding="utf-8", errors="replace") + except FileNotFoundError: + # Handle race condition: file may have been deleted between glob and open + # (e.g., Claude Code session cleanup) + if not silent: + print(f"Warning: File not found (may have been deleted): {jsonl_path}") + return [] + + with f: if not silent: print(f"Processing {jsonl_path}...") for line_no, line in enumerate(f, 1): # Start counting from 1 @@ -543,7 +552,8 @@ def _generate_paginated_html( Returns: Path to the first page (combined_transcripts.html) """ - from .renderer import generate_html, format_timestamp + from .html.renderer import generate_html + from .utils import format_timestamp # Check if page size changed - if so, invalidate all pages cached_page_size = cache_manager.get_page_size_config() @@ -726,6 +736,7 @@ def convert_jsonl_to_html( generate_individual_sessions, use_cache, silent, + page_size=page_size, ) @@ -739,6 +750,7 @@ def convert_jsonl_to( use_cache: bool = True, silent: bool = False, image_export_mode: Optional[str] = None, + page_size: int = 2000, ) -> Path: """Convert JSONL transcript(s) to the specified format. @@ -752,6 +764,7 @@ def convert_jsonl_to( use_cache: Whether to use caching. silent: Whether to suppress output. image_export_mode: Image export mode ("placeholder", "embedded", "referenced"). + page_size: Maximum messages per page for combined transcript pagination. If None, uses format default (embedded for HTML, referenced for Markdown). """ if not input_path.exists(): diff --git a/claude_code_log/html/renderer.py b/claude_code_log/html/renderer.py index 747671b0..0a47375f 100644 --- a/claude_code_log/html/renderer.py +++ b/claude_code_log/html/renderer.py @@ -478,8 +478,19 @@ def generate( title: Optional[str] = None, combined_transcript_link: Optional[str] = None, output_dir: Optional[Path] = None, + page_info: Optional[dict[str, Any]] = None, + page_stats: Optional[dict[str, Any]] = None, ) -> str: - """Generate HTML from transcript messages.""" + """Generate HTML from transcript messages. + + Args: + messages: List of transcript entries to render. + title: Optional title for the output. + combined_transcript_link: Optional link to combined transcript. + output_dir: Optional output directory for referenced images. + page_info: Optional pagination info (page_number, prev_link, next_link). + page_stats: Optional page statistics (message_count, date_range, token_summary). + """ import time t_start = time.time() @@ -516,6 +527,8 @@ def generate( css_class_from_message=css_class_from_message, get_message_emoji=get_message_emoji, is_session_header=is_session_header, + page_info=page_info, + page_stats=page_stats, ) ) @@ -592,12 +605,27 @@ def generate_html( messages: list[TranscriptEntry], title: Optional[str] = None, combined_transcript_link: Optional[str] = None, + page_info: Optional[dict[str, Any]] = None, + page_stats: Optional[dict[str, Any]] = None, ) -> str: """Generate HTML from transcript messages using Jinja2 templates. This is a convenience function that delegates to HtmlRenderer.generate. + + Args: + messages: List of transcript entries to render. + title: Optional title for the output. + combined_transcript_link: Optional link to combined transcript. + page_info: Optional pagination info (page_number, prev_link, next_link). + page_stats: Optional page statistics (message_count, date_range, token_summary). """ - return HtmlRenderer().generate(messages, title, combined_transcript_link) + return HtmlRenderer().generate( + messages, + title, + combined_transcript_link, + page_info=page_info, + page_stats=page_stats, + ) def generate_session_html( diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 146a709c..7f1b398a 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -5,7 +5,7 @@ import webbrowser from datetime import datetime from pathlib import Path -from typing import Any, ClassVar, Optional, cast +from typing import Any, ClassVar, List, Optional, cast from textual.app import App, ComposeResult from textual.binding import Binding, BindingType diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py index 0456c868..4a7241d0 100644 --- a/claude_code_log/utils.py +++ b/claude_code_log/utils.py @@ -4,7 +4,10 @@ import re from datetime import datetime, timezone from pathlib import Path -from typing import Optional +from typing import TYPE_CHECKING, Any, Optional + +if TYPE_CHECKING: + from .cache import SessionCacheData from .models import ContentItem, TextContent, TranscriptEntry, UserTranscriptEntry from .factories import ( @@ -53,6 +56,16 @@ def format_timestamp_range(first_timestamp: str, last_timestamp: str) -> str: return "" +def _is_temp_path(path_str: str) -> bool: + """Check if a path is a temporary/test path that should be filtered out.""" + temp_patterns = [ + "/private/var/folders/", # macOS temp + "/tmp/", # Unix temp + "/var/folders/", # macOS temp (alternate) + ] + return any(pattern in path_str for pattern in temp_patterns) + + def get_project_display_name( project_dir_name: str, working_directories: Optional[list[str]] = None ) -> str: @@ -66,8 +79,18 @@ def get_project_display_name( The project display name (e.g., "claude-code-log") """ if working_directories: + # Filter out temporary paths (pytest, macOS temp dirs, etc.) + real_dirs = [wd for wd in working_directories if not _is_temp_path(wd)] + + # If all directories were filtered out, fall back to project_dir_name conversion + if not real_dirs: + display_name = project_dir_name + if display_name.startswith("-"): + display_name = display_name[1:].replace("-", "/") + return display_name + # Convert to Path objects with their original indices for tracking recency - paths_with_indices = [(Path(wd), i) for i, wd in enumerate(working_directories)] + paths_with_indices = [(Path(wd), i) for i, wd in enumerate(real_dirs)] # Sort by: 1) path depth (fewer parts = less nested), 2) recency (lower index = more recent) # This gives us the least nested path, with ties broken by recency @@ -166,18 +189,21 @@ def extract_text_content_length(content: list[ContentItem]) -> int: def extract_working_directories( - entries: list[TranscriptEntry] | list[SessionCacheData], + entries: "list[TranscriptEntry] | list[SessionCacheData] | list[Any]", ) -> list[str]: """Extract unique working directories from a list of entries. Ordered by timestamp (most recent first). Args: - entries: List of entries to extract working directories from + entries: List of TranscriptEntry or SessionCacheData to extract working directories from Returns: List of unique working directory paths found in the entries """ + # Import here to avoid circular dependency at runtime + from .cache import SessionCacheData + working_directories: dict[str, str] = {} for entry in entries: @@ -200,15 +226,7 @@ def extract_working_directories( return [path for path, _ in sorted_dirs] -# IDE tag patterns for compact preview rendering (same as renderer.py) -IDE_OPENED_FILE_PATTERN = re.compile( - r"(.*?)", re.DOTALL -) -IDE_SELECTION_PATTERN = re.compile(r"(.*?)", re.DOTALL) -IDE_DIAGNOSTICS_PATTERN = re.compile( - r"\s*(.*?)\s*", - re.DOTALL, -) +# IDE tag patterns imported from factories for compact preview rendering def _compact_ide_tags_for_preview(text_content: str) -> str: diff --git a/test/__snapshots__/test_snapshot_markdown.ambr b/test/__snapshots__/test_snapshot_markdown.ambr index fafc9b58..67a1d9ce 100644 --- a/test/__snapshots__/test_snapshot_markdown.ambr +++ b/test/__snapshots__/test_snapshot_markdown.ambr @@ -323,7 +323,7 @@ - # Claude Transcripts - tmp + # Claude Transcripts - test_multi_session_markdown0 ## Sessions diff --git a/test/test_cache_sqlite_integrity.py b/test/test_cache_sqlite_integrity.py index eaa14a9b..15d2ee3b 100644 --- a/test/test_cache_sqlite_integrity.py +++ b/test/test_cache_sqlite_integrity.py @@ -12,14 +12,14 @@ from claude_code_log.cache import CacheManager, SessionCacheData from claude_code_log.models import ( - AssistantMessage, + AssistantMessageModel, AssistantTranscriptEntry, TextContent, ThinkingContent, ToolResultContent, ToolUseContent, UsageInfo, - UserMessage, + UserMessageModel, UserTranscriptEntry, ) @@ -52,7 +52,9 @@ def sample_user_entry(): isSidechain=False, userType="external", cwd="/test/path", - message=UserMessage(role="user", content="Hello, world!"), + message=UserMessageModel( + role="user", content=[TextContent(type="text", text="Hello, world!")] + ), ) @@ -70,7 +72,7 @@ def sample_assistant_entry(): userType="assistant", cwd="/test/path", requestId="req-123", - message=AssistantMessage( + message=AssistantMessageModel( id="msg-123", type="message", role="assistant", @@ -200,7 +202,7 @@ def test_session_token_totals_match_message_sums( userType="assistant", cwd="/test/path", requestId=f"req-{i}", - message=AssistantMessage( + message=AssistantMessageModel( id=f"msg-{i}", type="message", role="assistant", @@ -304,7 +306,7 @@ def test_complex_message_types_roundtrip_correctly(self, temp_project_dir): userType="assistant", cwd="/test", requestId="req-1", - message=AssistantMessage( + message=AssistantMessageModel( id="msg-tool", type="message", role="assistant", @@ -330,7 +332,7 @@ def test_complex_message_types_roundtrip_correctly(self, temp_project_dir): isSidechain=False, userType="tool_result", cwd="/test", - message=UserMessage( + message=UserMessageModel( role="user", content=[ ToolResultContent( @@ -353,7 +355,7 @@ def test_complex_message_types_roundtrip_correctly(self, temp_project_dir): userType="assistant", cwd="/test", requestId="req-2", - message=AssistantMessage( + message=AssistantMessageModel( id="msg-thinking", type="message", role="assistant", @@ -476,7 +478,9 @@ def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry isSidechain=False, userType="external", cwd="/test", - message=UserMessage(role="user", content=f"Message {i}"), + message=UserMessageModel( + role="user", content=[TextContent(type="text", text=f"Message {i}")] + ), ) entries.append(entry) @@ -518,7 +522,7 @@ def test_null_tokens_handled_in_aggregates(self, temp_project_dir): userType="assistant", cwd="/test", requestId="req-1", - message=AssistantMessage( + message=AssistantMessageModel( id="msg-1", type="message", role="assistant", @@ -538,7 +542,9 @@ def test_null_tokens_handled_in_aggregates(self, temp_project_dir): isSidechain=False, userType="external", cwd="/test", - message=UserMessage(role="user", content="No tokens"), + message=UserMessageModel( + role="user", content=[TextContent(type="text", text="No tokens")] + ), ), ] @@ -629,7 +635,9 @@ def test_concurrent_readers_dont_block(self, temp_project_dir): isSidechain=False, userType="external", cwd="/test", - message=UserMessage(role="user", content="Test"), + message=UserMessageModel( + role="user", content=[TextContent(type="text", text="Test")] + ), ) jsonl_file = temp_project_dir / "concurrent.jsonl" @@ -678,7 +686,9 @@ def test_query_performance_with_large_dataset(self, temp_project_dir): isSidechain=False, userType="external", cwd="/test", - message=UserMessage(role="user", content=f"Message {i}"), + message=UserMessageModel( + role="user", content=[TextContent(type="text", text=f"Message {i}")] + ), ) entries.append(entry) @@ -721,9 +731,14 @@ def test_sessions_contain_correct_messages(self, temp_project_dir): isSidechain=False, userType="external", cwd="/test", - message=UserMessage( + message=UserMessageModel( role="user", - content=f"Session {session_num} message {msg_num}", + content=[ + TextContent( + type="text", + text=f"Session {session_num} message {msg_num}", + ) + ], ), ) entries.append(entry) diff --git a/test/test_message_types.py b/test/test_message_types.py index 9403ba95..9be0e4c9 100644 --- a/test/test_message_types.py +++ b/test/test_message_types.py @@ -148,7 +148,46 @@ def test_queue_operation_type_support(): test_file_path.unlink() +def test_load_transcript_missing_file_returns_empty_list(capsys): + """Test that load_transcript handles missing files gracefully. + + This handles the race condition where a file exists when globbed but + is deleted before being read (e.g., Claude Code session cleanup). + """ + nonexistent_file = Path("/tmp/nonexistent-session-abc123.jsonl") + # Ensure it doesn't exist + if nonexistent_file.exists(): + nonexistent_file.unlink() + + # Should return empty list, not raise FileNotFoundError + messages = load_transcript(nonexistent_file) + assert messages == [], f"Expected empty list, got {messages}" + + # Should print a warning + captured = capsys.readouterr() + assert "Warning: File not found" in captured.out + assert str(nonexistent_file) in captured.out + + print("✓ Test passed: Missing file returns empty list with warning") + + +def test_load_transcript_missing_file_silent_mode(): + """Test that load_transcript handles missing files in silent mode.""" + nonexistent_file = Path("/tmp/nonexistent-session-xyz789.jsonl") + # Ensure it doesn't exist + if nonexistent_file.exists(): + nonexistent_file.unlink() + + # Should return empty list without printing + messages = load_transcript(nonexistent_file, silent=True) + assert messages == [], f"Expected empty list, got {messages}" + + print("✓ Test passed: Missing file in silent mode returns empty list") + + if __name__ == "__main__": test_summary_type_support() test_queue_operation_type_support() + test_load_transcript_missing_file_returns_empty_list(None) # type: ignore + test_load_transcript_missing_file_silent_mode() print("\n✅ All message type tests passed!") From 46b976c30761df01b0e5e048c3d0e6d7470072d4 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sun, 4 Jan 2026 00:35:16 +0000 Subject: [PATCH 03/23] Add next page link hiding to prevent broken links before next session is processed --- claude_code_log/converter.py | 56 ++++++- .../templates/components/page_nav_styles.css | 5 + .../html/templates/transcript.html | 6 +- test/__snapshots__/test_snapshot_html.ambr | 20 +++ test/test_pagination.py | 137 ++++++++++++++++++ 5 files changed, 216 insertions(+), 8 deletions(-) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 52e5b4b6..151f47b0 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -490,6 +490,50 @@ def _get_page_html_path(page_number: int) -> str: return f"combined_transcripts_{page_number}.html" +# Regex pattern to match and update the next link marker block +_NEXT_LINK_PATTERN = re.compile( + r'(.*?class="page-nav-link next) last-page(".*?)', + re.DOTALL, +) + + +def _enable_next_link_on_previous_page(output_dir: Path, page_number: int) -> bool: + """Enable the next link on a previous page by removing the last-page class. + + When a new page is created, the previous page's "Next" link (which was hidden + with the last-page CSS class) needs to be revealed. This function performs + an in-place edit to remove that class. + + Args: + output_dir: Directory containing the HTML files + page_number: The page number whose next link should be enabled + + Returns: + True if the file was modified, False otherwise + """ + if page_number < 1: + return False + + page_path = output_dir / _get_page_html_path(page_number) + if not page_path.exists(): + return False + + content = page_path.read_text(encoding="utf-8") + + # Check if there's a last-page class to remove + if "last-page" not in content: + return False + + # Replace the pattern to remove last-page class + new_content, count = _NEXT_LINK_PATTERN.subn(r"\1\2", content) + + if count > 0: + page_path.write_text(new_content, encoding="utf-8") + return True + + return False + + def _assign_sessions_to_pages( sessions: Dict[str, SessionCacheData], page_size: int ) -> List[List[str]]: @@ -645,17 +689,19 @@ def _generate_paginated_html( # Build page_info for navigation has_prev = page_num > 1 - # Pre-enable next link if this page exceeds threshold (anticipating future pages) - # or if there are more pages - page_exceeds_threshold = page_message_count > page_size - has_next = page_num < len(pages) or page_exceeds_threshold + is_last_page = page_num == len(pages) page_info = { "page_number": page_num, "prev_link": _get_page_html_path(page_num - 1) if has_prev else None, - "next_link": _get_page_html_path(page_num + 1) if has_next else None, + "next_link": _get_page_html_path(page_num + 1), # Always provide + "is_last_page": is_last_page, } + # Enable previous page's next link when creating a new page + if page_num > 1: + _enable_next_link_on_previous_page(output_dir, page_num - 1) + # Build page_stats date_range = "" if first_timestamp and last_timestamp: diff --git a/claude_code_log/html/templates/components/page_nav_styles.css b/claude_code_log/html/templates/components/page_nav_styles.css index d0254878..dfa81398 100644 --- a/claude_code_log/html/templates/components/page_nav_styles.css +++ b/claude_code_log/html/templates/components/page_nav_styles.css @@ -72,3 +72,8 @@ .page-nav-link.next::after { content: ''; } + +/* Hide next link on last page (will be revealed via in-place editing when new page is created) */ +.page-nav-link.next.last-page { + display: none; +} diff --git a/claude_code_log/html/templates/transcript.html b/claude_code_log/html/templates/transcript.html index fed0bb53..5863b50c 100644 --- a/claude_code_log/html/templates/transcript.html +++ b/claude_code_log/html/templates/transcript.html @@ -43,9 +43,9 @@

{{ title }}

{% if page_info.prev_link %} {% endif %} - {% if page_info.next_link %} - - {% endif %} + + Next → + {% endif %} diff --git a/test/__snapshots__/test_snapshot_html.ambr b/test/__snapshots__/test_snapshot_html.ambr index 863b29ec..65bf5df3 100644 --- a/test/__snapshots__/test_snapshot_html.ambr +++ b/test/__snapshots__/test_snapshot_html.ambr @@ -4361,6 +4361,11 @@ .page-nav-link.next::after { content: ''; } + + /* Hide next link on last page (will be revealed via in-place editing when new page is created) */ + .page-nav-link.next.last-page { + display: none; + } @@ -9238,6 +9243,11 @@ .page-nav-link.next::after { content: ''; } + + /* Hide next link on last page (will be revealed via in-place editing when new page is created) */ + .page-nav-link.next.last-page { + display: none; + } @@ -14211,6 +14221,11 @@ .page-nav-link.next::after { content: ''; } + + /* Hide next link on last page (will be revealed via in-place editing when new page is created) */ + .page-nav-link.next.last-page { + display: none; + } @@ -19225,6 +19240,11 @@ .page-nav-link.next::after { content: ''; } + + /* Hide next link on last page (will be revealed via in-place editing when new page is created) */ + .page-nav-link.next.last-page { + display: none; + } diff --git a/test/test_pagination.py b/test/test_pagination.py index 44db8b0c..fc9b0fd6 100644 --- a/test/test_pagination.py +++ b/test/test_pagination.py @@ -548,3 +548,140 @@ def test_page_contains_stats(self, temp_project_dir): ) assert "messages" in page1_content.lower() assert "Page 1" in page1_content or "page-navigation" in page1_content + + +class TestNextLinkInPlaceUpdate: + """Tests for in-place next link updates.""" + + def test_enable_next_link_removes_last_page_class(self, temp_project_dir): + """_enable_next_link_on_previous_page should remove last-page class.""" + from claude_code_log.converter import ( + _enable_next_link_on_previous_page, + _get_page_html_path, + ) + + # Create a page with hidden next link + page_path = temp_project_dir / _get_page_html_path(1) + page_path.write_text( + """ + + + + """, + encoding="utf-8", + ) + + result = _enable_next_link_on_previous_page(temp_project_dir, 1) + + assert result is True + content = page_path.read_text(encoding="utf-8") + assert "last-page" not in content + assert 'class="page-nav-link next"' in content + + def test_enable_next_link_no_op_if_already_visible(self, temp_project_dir): + """_enable_next_link_on_previous_page should not modify if already visible.""" + from claude_code_log.converter import ( + _enable_next_link_on_previous_page, + _get_page_html_path, + ) + + page_path = temp_project_dir / _get_page_html_path(1) + original_content = """ + + + + """ + page_path.write_text(original_content, encoding="utf-8") + + result = _enable_next_link_on_previous_page(temp_project_dir, 1) + + assert result is False + assert page_path.read_text(encoding="utf-8") == original_content + + def test_enable_next_link_handles_missing_file(self, temp_project_dir): + """_enable_next_link_on_previous_page should handle missing files gracefully.""" + from claude_code_log.converter import _enable_next_link_on_previous_page + + result = _enable_next_link_on_previous_page(temp_project_dir, 99) + + assert result is False + + def test_enable_next_link_handles_invalid_page_number(self, temp_project_dir): + """_enable_next_link_on_previous_page should handle invalid page numbers.""" + from claude_code_log.converter import _enable_next_link_on_previous_page + + result = _enable_next_link_on_previous_page(temp_project_dir, 0) + assert result is False + + result = _enable_next_link_on_previous_page(temp_project_dir, -1) + assert result is False + + +class TestPaginationNextLinkVisibility: + """Integration tests for next link visibility across pages.""" + + def test_single_page_has_hidden_next_link(self, temp_project_dir): + """Single page should have next link with last-page class when pagination is enabled.""" + from claude_code_log.converter import convert_jsonl_to_html + + # Create a session with enough messages to trigger pagination + # but only enough to fit on one page + jsonl_file = temp_project_dir / "session1.jsonl" + messages = _create_session_messages("session1", 15, "2023-01-01") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + # Use page_size=10 to trigger pagination (15 messages > 10) + # This will result in a single page since session can't be split + convert_jsonl_to_html(temp_project_dir, page_size=10, silent=True) + + content = (temp_project_dir / "combined_transcripts.html").read_text( + encoding="utf-8" + ) + assert "last-page" in content + assert "PAGINATION_NEXT_LINK_START" in content + + def test_multi_page_first_has_visible_next_link(self, temp_project_dir): + """First page of multi-page should have visible next link (no last-page class).""" + from claude_code_log.converter import convert_jsonl_to_html + + # Create sessions that will span 2 pages + for i, session_id in enumerate(["s1", "s2"]): + jsonl_file = temp_project_dir / f"{session_id}.jsonl" + messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True) + + # Page 1 should have visible next link (not last page) + page1 = (temp_project_dir / "combined_transcripts.html").read_text( + encoding="utf-8" + ) + assert "PAGINATION_NEXT_LINK_START" in page1 + # Should NOT have last-page class on its next link + # The pattern should be: class="page-nav-link next" without last-page + assert 'class="page-nav-link next"' in page1 or 'next "' not in page1 + + def test_multi_page_last_has_hidden_next_link(self, temp_project_dir): + """Last page of multi-page should have hidden next link (with last-page class).""" + from claude_code_log.converter import convert_jsonl_to_html + + # Create sessions that will span 2 pages + for i, session_id in enumerate(["s1", "s2"]): + jsonl_file = temp_project_dir / f"{session_id}.jsonl" + messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True) + + # Page 2 should have hidden next link (is last page) + page2 = (temp_project_dir / "combined_transcripts_2.html").read_text( + encoding="utf-8" + ) + assert "PAGINATION_NEXT_LINK_START" in page2 + assert "last-page" in page2 From a50bfb9acd4ef665b8058e5ab085bcc071796380 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sun, 4 Jan 2026 00:38:21 +0000 Subject: [PATCH 04/23] Possible fix for Windows test fail --- test/test_timeline_browser.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/test/test_timeline_browser.py b/test/test_timeline_browser.py index 8c2260a8..7373b9c5 100644 --- a/test/test_timeline_browser.py +++ b/test/test_timeline_browser.py @@ -1042,9 +1042,21 @@ def test_timezone_conversion_functionality(self, page: Page): page.goto(f"file://{temp_file}") - # Wait for page to load and timestamp conversion to occur + # Wait for page to load page.wait_for_load_state("networkidle") - page.wait_for_timeout(500) # Give time for JavaScript to run + + # Wait for JavaScript timestamp conversion to complete + # The conversion adds timezone info in parentheses, e.g., "(UTC)" or "(PST)" + # Using wait_for_function instead of fixed timeout for deterministic behaviour + page.wait_for_function( + """ + () => { + const ts = document.querySelector('.timestamp[data-timestamp]'); + return ts && ts.textContent.includes('('); + } + """, + timeout=5000, + ) # Check that timestamp elements have data-timestamp attributes timestamp_elements = page.locator(".timestamp[data-timestamp]") From 43f07afa6a9b0b6ce0769748479b63368a530047 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sun, 4 Jan 2026 01:31:53 +0000 Subject: [PATCH 05/23] Fix false cache invalidation and preserve archived sessions Two fixes for cache invalidation issues: 1. Filter agent files in modification check - get_modified_files() checked all .jsonl including agent-*.jsonl - But load_directory_transcripts() excludes agent files - Result: unreferenced agent files always marked as "modified" 2. Skip archived sessions in staleness check (preserve data) - When JSONL files are deleted, cached sessions remain - Instead of pruning, skip them in get_stale_sessions() - Show "N archived" count in output for visibility - Preserves data for potential future archive/restore features Output now shows: project-name: cached, 3 archived (0.0s) --- claude_code_log/cache.py | 41 +++++++++++++++++++++++++++++++++++- claude_code_log/converter.py | 35 ++++++++++++++++++++++++++---- 2 files changed, 71 insertions(+), 5 deletions(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index d7546b52..97aa7fdf 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -868,9 +868,16 @@ def is_html_stale( return False, "up_to_date" - def get_stale_sessions(self) -> List[tuple[str, str]]: + def get_stale_sessions( + self, valid_session_ids: Optional[set[str]] = None + ) -> List[tuple[str, str]]: """Get list of sessions that need HTML regeneration. + Args: + valid_session_ids: If provided, only check sessions in this set. + Sessions not in this set are considered "archived" (JSONL deleted) + and are skipped to avoid perpetual staleness. + Returns: List of (session_id, reason) tuples for sessions needing regeneration """ @@ -889,6 +896,14 @@ def get_stale_sessions(self) -> List[tuple[str, str]]: for row in session_rows: session_id = row["session_id"] + + # Skip archived sessions (JSONL deleted but cache remains) + if ( + valid_session_ids is not None + and session_id not in valid_session_ids + ): + continue + html_path = f"session-{session_id}.html" is_stale, reason = self.is_html_stale(html_path, session_id) @@ -897,6 +912,30 @@ def get_stale_sessions(self) -> List[tuple[str, str]]: return stale_sessions + def get_archived_session_count(self, valid_session_ids: set[str]) -> int: + """Count sessions in cache whose JSONL files have been deleted. + + These are preserved for potential future archiving/restore features. + + Args: + valid_session_ids: Set of session IDs that currently exist in source data + + Returns: + Number of archived (orphan) sessions + """ + if self._project_id is None: + return 0 + + with self._get_connection() as conn: + cached_rows = conn.execute( + "SELECT session_id FROM sessions WHERE project_id = ?", + (self._project_id,), + ).fetchall() + + return sum( + 1 for row in cached_rows if row["session_id"] not in valid_session_ids + ) + # ========== Page Cache Methods (Pagination) ========== def get_page_size_config(self) -> Optional[int]: diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 151f47b0..07e31e00 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -1548,11 +1548,29 @@ def process_projects_hierarchy( stats.add_warning(f"Failed to initialize cache: {e}") # Phase 1: Fast check if anything needs updating (mtime comparison only) - jsonl_files = list(project_dir.glob("*.jsonl")) + # Exclude agent files - they are loaded via session references, not directly + jsonl_files = [ + f + for f in project_dir.glob("*.jsonl") + if not f.name.startswith("agent-") + ] + # Valid session IDs are from existing JSONL files (file stem = session ID) + valid_session_ids = {f.stem for f in jsonl_files} modified_files = ( cache_manager.get_modified_files(jsonl_files) if cache_manager else [] ) - stale_sessions = cache_manager.get_stale_sessions() if cache_manager else [] + # Pass valid_session_ids to skip archived sessions (JSONL deleted) + stale_sessions = ( + cache_manager.get_stale_sessions(valid_session_ids) + if cache_manager + else [] + ) + # Count archived sessions (cached but JSONL deleted) + archived_count = ( + cache_manager.get_archived_session_count(valid_session_ids) + if cache_manager + else 0 + ) output_path = project_dir / "combined_transcripts.html" # Check combined_stale using the appropriate cache: # - Paginated projects store data in html_pages table (via save_page_cache) @@ -1578,12 +1596,19 @@ def process_projects_hierarchy( or not output_path.exists() ) + # Build archived suffix for output (shown on both cached and work paths) + archived_suffix = ( + f", {archived_count} archived" if archived_count > 0 else "" + ) + if not needs_work: # Fast path: nothing to do, just collect stats for index stats.files_loaded_from_cache = len(jsonl_files) stats.total_time = time.time() - project_start_time # Show progress - print(f" {project_dir.name}: cached ({stats.total_time:.1f}s)") + print( + f" {project_dir.name}: cached{archived_suffix} ({stats.total_time:.1f}s)" + ) else: # Slow path: update cache and regenerate output stats.files_updated = len(modified_files) if modified_files else 0 @@ -1618,7 +1643,9 @@ def process_projects_hierarchy( if stats.sessions_regenerated > 0: progress_parts.append(f"{stats.sessions_regenerated} sessions") detail = ", ".join(progress_parts) if progress_parts else "regenerated" - print(f" {project_dir.name}: {detail} ({stats.total_time:.1f}s)") + print( + f" {project_dir.name}: {detail}{archived_suffix} ({stats.total_time:.1f}s)" + ) # Get project info for index - use cached data if available # Exclude agent files (they are loaded via session references) From 4a7ce7ae5d9f3c6f78c83e8e77a471642a0d1750 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sat, 10 Jan 2026 00:40:13 +0000 Subject: [PATCH 06/23] Add archived project support and session deletion (#76) * Add archived project support and session deletion Features: - Show fully archived projects (cached but no JSONL files) in TUI and HTML index - Projects with [ARCHIVED] badge in project selector and greyed-out styling in index - Archived projects automatically open in archived view mode - Delete sessions from cache via 'd' key with confirmation dialog - Restore JSONL now produces compact JSON format matching original files - Project list refreshes after restore to update archived status Implementation: - Add get_all_cached_projects() to discover archived projects from cache.db - Add delete_session() and delete_project() methods to CacheManager - Update CLI to discover and track archived projects, refresh on return - Update SessionBrowser to handle is_archived_project flag - Add DeleteConfirmScreen modal for delete confirmation Tests: - Add tests for compact JSON export, session/project deletion - Add tests for get_all_cached_projects with active/archived detection - Add TUI test for archived project loading in archived mode * Simplify views to show both archived and current + implement delete as well as archive and restore --- claude_code_log/cache.py | 209 ++++ claude_code_log/cli.py | 119 +- claude_code_log/converter.py | 129 +- .../components/project_card_styles.css | 24 + claude_code_log/html/templates/index.html | 6 +- claude_code_log/tui.py | 987 +++++++++++++++- dev-docs/restoring-archived-sessions.md | 100 ++ test/__snapshots__/test_snapshot_html.ambr | 28 + test/test_cache_integration.py | 421 +++++++ test/test_tui.py | 1036 ++++++++++++++++- 10 files changed, 2973 insertions(+), 86 deletions(-) create mode 100644 dev-docs/restoring-archived-sessions.md diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 97aa7fdf..8e37d477 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -936,6 +936,105 @@ def get_archived_session_count(self, valid_session_ids: set[str]) -> int: 1 for row in cached_rows if row["session_id"] not in valid_session_ids ) + def get_archived_sessions( + self, valid_session_ids: set[str] + ) -> Dict[str, SessionCacheData]: + """Get session data for archived sessions (cached but JSONL deleted). + + Args: + valid_session_ids: Set of session IDs that currently exist in source data + + Returns: + Dict mapping session_id to SessionCacheData for archived sessions + """ + if self._project_id is None: + return {} + + archived_sessions: Dict[str, SessionCacheData] = {} + + with self._get_connection() as conn: + session_rows = conn.execute( + "SELECT * FROM sessions WHERE project_id = ?", + (self._project_id,), + ).fetchall() + + for row in session_rows: + session_id = row["session_id"] + if session_id not in valid_session_ids: + archived_sessions[session_id] = SessionCacheData( + session_id=session_id, + summary=row["summary"], + first_timestamp=row["first_timestamp"], + last_timestamp=row["last_timestamp"], + message_count=row["message_count"], + first_user_message=row["first_user_message"], + cwd=row["cwd"], + total_input_tokens=row["total_input_tokens"], + total_output_tokens=row["total_output_tokens"], + total_cache_creation_tokens=row["total_cache_creation_tokens"], + total_cache_read_tokens=row["total_cache_read_tokens"], + ) + + return archived_sessions + + def export_session_to_jsonl(self, session_id: str) -> List[str]: + """Export all message content JSONs for a session, for JSONL restoration. + + Args: + session_id: The session ID to export + + Returns: + List of JSON strings (one per line for JSONL file), compact format + """ + if self._project_id is None: + return [] + + with self._get_connection() as conn: + rows = conn.execute( + """SELECT content FROM messages + WHERE project_id = ? AND session_id = ? + ORDER BY timestamp NULLS LAST""", + (self._project_id, session_id), + ).fetchall() + + # Re-serialize to compact JSON format (no spaces after separators) + # to match original JSONL file format + result: List[str] = [] + for row in rows: + try: + parsed = json.loads(row["content"]) + compact = json.dumps(parsed, separators=(",", ":")) + result.append(compact) + except json.JSONDecodeError: + # If parsing fails, use original content + result.append(row["content"]) + return result + + def load_session_entries(self, session_id: str) -> List[TranscriptEntry]: + """Load transcript entries for a session from cache. + + Used for rendering archived sessions to HTML/Markdown when + the original JSONL file no longer exists. + + Args: + session_id: The session ID to load + + Returns: + List of TranscriptEntry objects for the session + """ + if self._project_id is None: + return [] + + with self._get_connection() as conn: + rows = conn.execute( + """SELECT content FROM messages + WHERE project_id = ? AND session_id = ? + ORDER BY timestamp NULLS LAST""", + (self._project_id, session_id), + ).fetchall() + + return [self._deserialize_entry(row) for row in rows] + # ========== Page Cache Methods (Pagination) ========== def get_page_size_config(self) -> Optional[int]: @@ -1224,6 +1323,115 @@ def get_page_count(self) -> int: return row["cnt"] if row else 0 + def delete_session(self, session_id: str) -> bool: + """Delete a session and its messages from cache. + + Args: + session_id: The session ID to delete + + Returns: + True if session was deleted, False if not found + """ + if self._project_id is None: + return False + + with self._get_connection() as conn: + # Check if session exists + row = conn.execute( + "SELECT id FROM sessions WHERE project_id = ? AND session_id = ?", + (self._project_id, session_id), + ).fetchone() + + if not row: + return False + + # Delete messages for this session + conn.execute( + "DELETE FROM messages WHERE project_id = ? AND session_id = ?", + (self._project_id, session_id), + ) + + # Delete HTML cache entries for this session + conn.execute( + "DELETE FROM html_cache WHERE project_id = ? AND source_session_id = ?", + (self._project_id, session_id), + ) + + # Delete the session record + conn.execute( + "DELETE FROM sessions WHERE project_id = ? AND session_id = ?", + (self._project_id, session_id), + ) + + self._update_last_updated(conn) + conn.commit() + + return True + + def delete_project(self) -> bool: + """Delete this project and all its data from cache. + + Returns: + True if project was deleted, False if not found + """ + if self._project_id is None: + return False + + with self._get_connection() as conn: + # Cascade delete handles messages, sessions, cached_files, html_cache, html_pages + conn.execute("DELETE FROM projects WHERE id = ?", (self._project_id,)) + conn.commit() + + self._project_id = None + return True + + +def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]: + """Get all projects from cache, indicating which are archived. + + This is a standalone function that queries the cache.db directly + to find all project paths, without needing to instantiate CacheManager + for each project. + + Args: + projects_dir: Path to the projects directory (e.g., ~/.claude/projects) + + Returns: + List of (project_path, is_archived) tuples. + is_archived is True if the project has no JSONL files but exists in cache. + """ + db_path = projects_dir / "cache.db" + if not db_path.exists(): + return [] + + result: List[tuple[str, bool]] = [] + + try: + conn = sqlite3.connect(db_path, timeout=30.0) + conn.row_factory = sqlite3.Row + try: + rows = conn.execute( + "SELECT project_path FROM projects ORDER BY project_path" + ).fetchall() + + for row in rows: + project_path = Path(row["project_path"]) + # Check if project has JSONL files (non-archived) + has_jsonl = ( + bool(list(project_path.glob("*.jsonl"))) + if project_path.exists() + else False + ) + # is_archived = project exists in cache but has no JSONL files + is_archived = not has_jsonl + result.append((row["project_path"], is_archived)) + finally: + conn.close() + except Exception: + pass + + return result + __all__ = [ "CacheManager", @@ -1232,5 +1440,6 @@ def get_page_count(self) -> int: "PageCacheData", "ProjectCache", "SessionCacheData", + "get_all_cached_projects", "get_library_version", ] diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 90f0e4b3..19b4bb9d 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -17,7 +17,7 @@ get_file_extension, process_projects_hierarchy, ) -from .cache import CacheManager, get_library_version +from .cache import CacheManager, get_all_cached_projects, get_library_version def get_default_projects_dir() -> Path: @@ -25,36 +25,75 @@ def get_default_projects_dir() -> Path: return Path.home() / ".claude" / "projects" -def _launch_tui_with_cache_check(project_path: Path) -> Optional[str]: +def _discover_projects( + projects_dir: Path, +) -> tuple[list[Path], set[Path]]: + """Discover active and archived projects in the projects directory. + + Returns: + Tuple of (all_project_dirs, archived_projects_set) + """ + # Find active projects (directories with JSONL files) + project_dirs = [ + d for d in projects_dir.iterdir() if d.is_dir() and list(d.glob("*.jsonl")) + ] + + # Find archived projects (in cache but without JSONL files) + archived_projects: set[Path] = set() + cached_projects = get_all_cached_projects(projects_dir) + active_project_paths = {str(p) for p in project_dirs} + for project_path_str, is_archived in cached_projects: + if is_archived and project_path_str not in active_project_paths: + archived_path = Path(project_path_str) + archived_projects.add(archived_path) + project_dirs.append(archived_path) + + return project_dirs, archived_projects + + +def _launch_tui_with_cache_check( + project_path: Path, is_archived: bool = False +) -> Optional[str]: """Launch TUI with proper cache checking and user feedback.""" click.echo("Checking cache and loading session data...") # Check if we need to rebuild cache cache_manager = CacheManager(project_path, get_library_version()) - jsonl_files = list(project_path.glob("*.jsonl")) - modified_files = cache_manager.get_modified_files(jsonl_files) project_cache = cache_manager.get_cached_project_data() - if not (project_cache and project_cache.sessions and not modified_files): - # Need to rebuild cache - if modified_files: + if is_archived: + # Archived projects have no JSONL files, just load from cache + if project_cache and project_cache.sessions: click.echo( - f"Found {len(modified_files)} modified files, rebuilding cache..." + f"[ARCHIVED] Found {len(project_cache.sessions)} sessions in cache. Launching TUI..." ) else: - click.echo("Building session cache...") - - # Pre-build the cache before launching TUI (no HTML generation) - try: - ensure_fresh_cache(project_path, cache_manager, silent=True) - click.echo("Cache ready! Launching TUI...") - except Exception as e: - click.echo(f"Error building cache: {e}", err=True) + click.echo("Error: No cached sessions found for archived project", err=True) return None else: - click.echo( - f"Cache up to date. Found {len(project_cache.sessions)} sessions. Launching TUI..." - ) + jsonl_files = list(project_path.glob("*.jsonl")) + modified_files = cache_manager.get_modified_files(jsonl_files) + + if not (project_cache and project_cache.sessions and not modified_files): + # Need to rebuild cache + if modified_files: + click.echo( + f"Found {len(modified_files)} modified files, rebuilding cache..." + ) + else: + click.echo("Building session cache...") + + # Pre-build the cache before launching TUI (no HTML generation) + try: + ensure_fresh_cache(project_path, cache_manager, silent=True) + click.echo("Cache ready! Launching TUI...") + except Exception as e: + click.echo(f"Error building cache: {e}", err=True) + return None + else: + click.echo( + f"Cache up to date. Found {len(project_cache.sessions)} sessions. Launching TUI..." + ) # Small delay to let user see the message before TUI clears screen import time @@ -63,7 +102,7 @@ def _launch_tui_with_cache_check(project_path: Path) -> Optional[str]: from .tui import run_session_browser - result = run_session_browser(project_path) + result = run_session_browser(project_path, is_archived=is_archived) return result @@ -511,11 +550,8 @@ def main( click.echo(f"Error: Projects directory not found: {input_path}") return - project_dirs = [ - d - for d in input_path.iterdir() - if d.is_dir() and list(d.glob("*.jsonl")) - ] + # Initial project discovery + project_dirs, archived_projects = _discover_projects(input_path) if not project_dirs: click.echo(f"No projects with JSONL files found in {input_path}") @@ -524,7 +560,7 @@ def main( # Try to find projects that match current working directory matching_projects = find_projects_by_cwd(input_path) - if len(project_dirs) == 1: + if len(project_dirs) == 1 and not archived_projects: # Only one project, open it directly result = _launch_tui_with_cache_check(project_dirs[0]) if result == "back_to_projects": @@ -532,14 +568,21 @@ def main( from .tui import run_project_selector while True: + # Re-discover projects (may have changed after restore) + project_dirs, archived_projects = _discover_projects( + input_path + ) selected_project = run_project_selector( - project_dirs, matching_projects + project_dirs, matching_projects, archived_projects ) if not selected_project: # User cancelled return - result = _launch_tui_with_cache_check(selected_project) + is_archived = selected_project in archived_projects + result = _launch_tui_with_cache_check( + selected_project, is_archived=is_archived + ) if result != "back_to_projects": # User quit normally return @@ -555,14 +598,21 @@ def main( from .tui import run_project_selector while True: + # Re-discover projects (may have changed after restore) + project_dirs, archived_projects = _discover_projects( + input_path + ) selected_project = run_project_selector( - project_dirs, matching_projects + project_dirs, matching_projects, archived_projects ) if not selected_project: # User cancelled return - result = _launch_tui_with_cache_check(selected_project) + is_archived = selected_project in archived_projects + result = _launch_tui_with_cache_check( + selected_project, is_archived=is_archived + ) if result != "back_to_projects": # User quit normally return @@ -572,14 +622,19 @@ def main( from .tui import run_project_selector while True: + # Re-discover projects each iteration (may have changed after restore) + project_dirs, archived_projects = _discover_projects(input_path) selected_project = run_project_selector( - project_dirs, matching_projects + project_dirs, matching_projects, archived_projects ) if not selected_project: # User cancelled return - result = _launch_tui_with_cache_check(selected_project) + is_archived = selected_project in archived_projects + result = _launch_tui_with_cache_check( + selected_project, is_archived=is_archived + ) if result != "back_to_projects": # User quit normally return diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 07e31e00..6b5606f5 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -20,7 +20,12 @@ create_session_preview, get_warmup_session_ids, ) -from .cache import CacheManager, SessionCacheData, get_library_version +from .cache import ( + CacheManager, + SessionCacheData, + get_all_cached_projects, + get_library_version, +) from .parser import parse_timestamp from .factories import create_transcript_entry from .models import ( @@ -1477,6 +1482,49 @@ def _generate_individual_session_files( return regenerated_count +def _get_cleanup_period_days() -> Optional[int]: + """Read cleanupPeriodDays from Claude Code settings. + + Checks ~/.claude/settings.json for the cleanupPeriodDays setting. + + Returns: + The configured cleanup period in days, or None if not set/readable. + """ + import json + + settings_path = Path.home() / ".claude" / "settings.json" + if not settings_path.exists(): + return None + + try: + with open(settings_path, "r", encoding="utf-8") as f: + settings = json.load(f) + return settings.get("cleanupPeriodDays") + except (json.JSONDecodeError, OSError): + return None + + +def _print_archived_sessions_note(total_archived: int) -> None: + """Print a note about archived sessions and how to restore them. + + Args: + total_archived: Total number of archived sessions across all projects. + """ + cleanup_days = _get_cleanup_period_days() + cleanup_info = ( + f" (cleanupPeriodDays: {cleanup_days})" + if cleanup_days is not None + else " (cleanupPeriodDays: 30 default)" + ) + + print( + f"\nNote: {total_archived} archived session(s) found{cleanup_info}.\n" + " These sessions were cached before their JSONL files were deleted.\n" + " To restore them or adjust cleanup settings, see:\n" + " https://github.com/daaain/claude-code-log/blob/main/dev-docs/restoring-archived-sessions.md" + ) + + def process_projects_hierarchy( projects_path: Path, from_date: Optional[str] = None, @@ -1514,7 +1562,16 @@ def process_projects_hierarchy( if child.is_dir() and list(child.glob("*.jsonl")): project_dirs.append(child) - if not project_dirs: + # Find archived projects (projects in cache but without JSONL files) + archived_project_dirs: list[Path] = [] + if use_cache: + cached_projects = get_all_cached_projects(projects_path) + active_project_paths = {str(p) for p in project_dirs} + for project_path_str, is_archived in cached_projects: + if is_archived and project_path_str not in active_project_paths: + archived_project_dirs.append(Path(project_path_str)) + + if not project_dirs and not archived_project_dirs: raise FileNotFoundError( f"No project directories with JSONL files found in {projects_path}" ) @@ -1530,6 +1587,7 @@ def process_projects_hierarchy( total_projects = len(project_dirs) projects_with_updates = 0 total_sessions = 0 + total_archived = 0 # Per-project stats for summary output project_stats: List[tuple[str, GenerationStats]] = [] @@ -1571,6 +1629,7 @@ def process_projects_hierarchy( if cache_manager else 0 ) + total_archived += archived_count output_path = project_dir / "combined_transcripts.html" # Check combined_stale using the appropriate cache: # - Paginated projects store data in html_pages table (via save_page_cache) @@ -1681,6 +1740,7 @@ def process_projects_hierarchy( "latest_timestamp": cached_project_data.latest_timestamp, "earliest_timestamp": cached_project_data.earliest_timestamp, "working_directories": cache_manager.get_working_directories(), + "is_archived": False, "sessions": [ { "id": session_data.session_id, @@ -1790,6 +1850,7 @@ def process_projects_hierarchy( "working_directories": cache_manager.get_working_directories() if cache_manager else [], + "is_archived": False, "sessions": sessions_data, } ) @@ -1808,6 +1869,66 @@ def process_projects_hierarchy( ) continue + # Process archived projects (projects in cache but without JSONL files) + archived_project_count = 0 + for archived_dir in sorted(archived_project_dirs): + try: + # Initialize cache manager for archived project + cache_manager = CacheManager(archived_dir, library_version) + cached_project_data = cache_manager.get_cached_project_data() + + if cached_project_data is None: + continue + + archived_project_count += 1 + print( + f" {archived_dir.name}: [ARCHIVED] ({len(cached_project_data.sessions)} sessions)" + ) + + # Add archived project to summaries + project_summaries.append( + { + "name": archived_dir.name, + "path": archived_dir, + "html_file": f"{archived_dir.name}/combined_transcripts.html", + "jsonl_count": 0, + "message_count": cached_project_data.total_message_count, + "last_modified": 0.0, + "total_input_tokens": cached_project_data.total_input_tokens, + "total_output_tokens": cached_project_data.total_output_tokens, + "total_cache_creation_tokens": cached_project_data.total_cache_creation_tokens, + "total_cache_read_tokens": cached_project_data.total_cache_read_tokens, + "latest_timestamp": cached_project_data.latest_timestamp, + "earliest_timestamp": cached_project_data.earliest_timestamp, + "working_directories": cache_manager.get_working_directories(), + "is_archived": True, + "sessions": [ + { + "id": session_data.session_id, + "summary": session_data.summary, + "timestamp_range": format_timestamp_range( + session_data.first_timestamp, + session_data.last_timestamp, + ), + "first_timestamp": session_data.first_timestamp, + "last_timestamp": session_data.last_timestamp, + "message_count": session_data.message_count, + "first_user_message": session_data.first_user_message + or "[No user message found in session.]", + } + for session_data in cached_project_data.sessions.values() + if session_data.first_user_message + and session_data.first_user_message != "Warmup" + ], + } + ) + except Exception as e: + print(f"Warning: Failed to process archived project {archived_dir}: {e}") + continue + + # Update total projects count to include archived + total_projects = len(project_dirs) + archived_project_count + # Generate index (always regenerate if outdated) ext = get_file_extension(output_format) index_path = projects_path / f"index.{ext}" @@ -1846,4 +1967,8 @@ def process_projects_hierarchy( summary_parts.append(" Index regenerated") print("\n".join(summary_parts)) + # Show archived sessions note if any exist + if total_archived > 0: + _print_archived_sessions_note(total_archived) + return index_path diff --git a/claude_code_log/html/templates/components/project_card_styles.css b/claude_code_log/html/templates/components/project_card_styles.css index ebae9931..c6d40f2f 100644 --- a/claude_code_log/html/templates/components/project_card_styles.css +++ b/claude_code_log/html/templates/components/project_card_styles.css @@ -111,4 +111,28 @@ .project-sessions details[open] summary { margin-bottom: 10px; +} + +/* Archived project styling */ +.project-card.archived { + opacity: 0.6; + background-color: #f5f5f522; +} + +.project-card.archived:hover { + opacity: 0.8; +} + +.archived-badge { + display: inline-block; + background-color: #888; + color: white; + font-size: 0.65em; + font-weight: 600; + padding: 2px 8px; + border-radius: 4px; + margin-left: 10px; + vertical-align: middle; + text-transform: uppercase; + letter-spacing: 0.5px; } \ No newline at end of file diff --git a/claude_code_log/html/templates/index.html b/claude_code_log/html/templates/index.html index a539386a..4b2bf430 100644 --- a/claude_code_log/html/templates/index.html +++ b/claude_code_log/html/templates/index.html @@ -59,10 +59,14 @@

{{ title }}

{% for project in projects %} -
+
{{ project.display_name }} + {% if project.is_archived %} + Archived + {% else %} (← open combined transcript) + {% endif %}
📁 {{ project.jsonl_count }} transcript files
diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 7f1b398a..63c1d5f3 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -41,7 +41,7 @@ class ProjectSelector(App[Path]): border: solid $primary; margin-bottom: 1; } - + DataTable { height: auto; } @@ -50,7 +50,11 @@ class ProjectSelector(App[Path]): TITLE = "Claude Code Log - Project Selector" BINDINGS: ClassVar[list[BindingType]] = [ Binding("q", "quit", "Quit"), + Binding("escape", "quit", "Quit", show=False), Binding("s", "select_project", "Select Project"), + Binding("a", "archive_project", "Archive Project"), + Binding("d", "delete_project", "Delete Project"), + Binding("r", "restore_project", "Restore Project"), ] selected_project_path: reactive[Optional[Path]] = reactive( @@ -58,13 +62,20 @@ class ProjectSelector(App[Path]): ) projects: list[Path] matching_projects: list[Path] - - def __init__(self, projects: list[Path], matching_projects: list[Path]): + archived_projects: set[Path] + + def __init__( + self, + projects: list[Path], + matching_projects: list[Path], + archived_projects: Optional[set[Path]] = None, + ): """Initialize the project selector.""" super().__init__() self.theme = "gruvbox" self.projects = projects self.matching_projects = matching_projects + self.archived_projects = archived_projects or set() def compose(self) -> ComposeResult: """Create the UI layout.""" @@ -105,18 +116,21 @@ def populate_table(self) -> None: # Add rows for project_path in self.projects: + is_archived = project_path in self.archived_projects try: cache_manager = CacheManager(project_path, get_library_version()) project_cache = cache_manager.get_cached_project_data() if not project_cache or not project_cache.sessions: - try: - ensure_fresh_cache(project_path, cache_manager, silent=True) - # Reload cache after ensuring it's fresh - project_cache = cache_manager.get_cached_project_data() - except Exception: - # If cache building fails, continue with empty cache - project_cache = None + if not is_archived: + # Only try to build cache for non-archived projects + try: + ensure_fresh_cache(project_path, cache_manager, silent=True) + # Reload cache after ensuring it's fresh + project_cache = cache_manager.get_cached_project_data() + except Exception: + # If cache building fails, continue with empty cache + project_cache = None # Get project info session_count = ( @@ -132,6 +146,10 @@ def populate_table(self) -> None: if project_path in self.matching_projects: project_display = f"→ {project_display[2:]}" + # Add archived indicator + if is_archived: + project_display = f"{project_display} [ARCHIVED]" + table.add_row( project_display, str(session_count), @@ -141,6 +159,8 @@ def populate_table(self) -> None: project_display = f" {project_path.name}" if project_path in self.matching_projects: project_display = f"→ {project_display[2:]}" + if is_archived: + project_display = f"{project_display} [ARCHIVED]" table.add_row( project_display, @@ -151,6 +171,10 @@ def on_data_table_row_highlighted(self, _event: DataTable.RowHighlighted) -> Non """Handle row highlighting (cursor movement) in the projects table.""" self._update_selected_project_from_cursor() + def on_data_table_row_selected(self, _event: DataTable.RowSelected) -> None: + """Handle row selection (Enter key) in the projects table.""" + self.action_select_project() + def _update_selected_project_from_cursor(self) -> None: """Update the selected project based on the current cursor position.""" try: @@ -164,6 +188,10 @@ def _update_selected_project_from_cursor(self) -> None: if project_display.startswith("→"): project_display = project_display[1:].strip() + # Remove the archived indicator if present + if project_display.endswith(" [ARCHIVED]"): + project_display = project_display[:-11].strip() + # Find the matching project path for project_path in self.projects: if project_path.name == project_display: @@ -186,6 +214,185 @@ async def action_quit(self) -> None: """Quit the application with proper cleanup.""" self.exit(None) + def _get_project_session_count(self, project_path: Path) -> int: + """Get the number of sessions in a project from cache.""" + try: + cache_manager = CacheManager(project_path, get_library_version()) + project_cache = cache_manager.get_cached_project_data() + if project_cache and project_cache.sessions: + return len(project_cache.sessions) + except Exception: + pass + return 0 + + def _is_project_archived(self, project_path: Path) -> bool: + """Check if a project is archived (no JSONL files exist).""" + return project_path in self.archived_projects + + def check_action( + self, + action: str, + parameters: tuple[object, ...], # noqa: ARG002 + ) -> bool | None: + """Control which actions are available based on context.""" + project_path = self.selected_project_path + is_archived = project_path in self.archived_projects if project_path else False + + if action == "archive_project": + # Can only archive non-archived projects + return project_path is not None and not is_archived + elif action == "restore_project": + # Can only restore archived projects + return project_path is not None and is_archived + elif action == "delete_project": + # Can delete any project + return project_path is not None + + # Allow all other actions (quit, select_project, etc.) + return True + + def action_archive_project(self) -> None: + """Archive all sessions in the selected project.""" + if not self.selected_project_path: + self.notify("No project selected", severity="warning") + return + + if self._is_project_archived(self.selected_project_path): + self.notify("Project is already archived", severity="warning") + return + + session_count = self._get_project_session_count(self.selected_project_path) + self.push_screen( + ArchiveProjectConfirmScreen(self.selected_project_path.name, session_count), + self._handle_archive_project_confirm, + ) + + def _handle_archive_project_confirm(self, confirmed: bool | None) -> None: + """Handle the result of the archive project confirmation dialog.""" + if not confirmed or not self.selected_project_path: + return + + project_path = self.selected_project_path + archived_count = 0 + + # Delete all JSONL files in the project + for jsonl_file in project_path.glob("*.jsonl"): + try: + jsonl_file.unlink() + archived_count += 1 + except Exception as e: + self.notify( + f"Failed to delete {jsonl_file.name}: {e}", severity="error" + ) + + if archived_count > 0: + self.notify(f"Archived {archived_count} sessions") + # Add to archived projects set + self.archived_projects.add(project_path) + self.populate_table() + + def action_delete_project(self) -> None: + """Delete the selected project from cache (and optionally JSONL files).""" + if not self.selected_project_path: + self.notify("No project selected", severity="warning") + return + + is_archived = self._is_project_archived(self.selected_project_path) + session_count = self._get_project_session_count(self.selected_project_path) + self.push_screen( + DeleteProjectConfirmScreen( + self.selected_project_path.name, session_count, is_archived + ), + self._handle_delete_project_confirm, + ) + + def _handle_delete_project_confirm(self, result: Optional[str]) -> None: + """Handle the result of the delete project confirmation dialog.""" + if not result or not self.selected_project_path: + return + + project_path = self.selected_project_path + + # Delete cache + cache_manager = CacheManager(project_path, get_library_version()) + cache_manager.clear_cache() + + # If deleting both, also delete JSONL files + if result == "both": + for jsonl_file in project_path.glob("*.jsonl"): + try: + jsonl_file.unlink() + except Exception as e: + self.notify( + f"Failed to delete {jsonl_file.name}: {e}", severity="error" + ) + + # Remove from projects list + if project_path in self.projects: + self.projects.remove(project_path) + if project_path in self.matching_projects: + self.matching_projects.remove(project_path) + if project_path in self.archived_projects: + self.archived_projects.discard(project_path) + + self.notify(f"Deleted project: {project_path.name}") + self.selected_project_path = None + self.populate_table() + + def action_restore_project(self) -> None: + """Restore all archived sessions in the selected project.""" + if not self.selected_project_path: + self.notify("No project selected", severity="warning") + return + + if not self._is_project_archived(self.selected_project_path): + self.notify("Project is not archived", severity="warning") + return + + session_count = self._get_project_session_count(self.selected_project_path) + self.push_screen( + RestoreProjectConfirmScreen(self.selected_project_path.name, session_count), + self._handle_restore_project_confirm, + ) + + def _handle_restore_project_confirm(self, confirmed: bool | None) -> None: + """Handle the result of the restore project confirmation dialog.""" + if not confirmed or not self.selected_project_path: + return + + project_path = self.selected_project_path + cache_manager = CacheManager(project_path, get_library_version()) + project_cache = cache_manager.get_cached_project_data() + + if not project_cache or not project_cache.sessions: + self.notify("No sessions to restore", severity="warning") + return + + # Ensure project directory exists + project_path.mkdir(parents=True, exist_ok=True) + + restored_count = 0 + for session_id in project_cache.sessions: + jsonl_path = project_path / f"{session_id}.jsonl" + if not jsonl_path.exists(): + try: + messages = cache_manager.export_session_to_jsonl(session_id) + if messages: + with open(jsonl_path, "w", encoding="utf-8") as f: + for msg in messages: + f.write(msg + "\n") + restored_count += 1 + except Exception as e: + self.notify( + f"Failed to restore {session_id}: {e}", severity="error" + ) + + if restored_count > 0: + self.notify(f"Restored {restored_count} sessions") + # Remove from archived projects set + self.archived_projects.discard(project_path) + self.populate_table() + class MarkdownViewerScreen(ModalScreen[None]): """Modal screen for viewing Markdown content with table of contents.""" @@ -309,6 +516,406 @@ async def action_dismiss(self, result: None = None) -> None: self.dismiss(result) +class ArchiveConfirmScreen(ModalScreen[bool]): + """Modal screen for confirming session archiving (delete JSONL, keep cache).""" + + CSS = """ + ArchiveConfirmScreen { + align: center middle; + } + + #archive-container { + width: 65; + height: auto; + border: solid $warning; + background: $surface; + padding: 1 2; + } + + #archive-title { + text-align: center; + text-style: bold; + color: $warning; + margin-bottom: 1; + } + + #archive-message { + margin-bottom: 1; + } + + #archive-info { + color: $text-muted; + margin-bottom: 1; + } + + #archive-buttons { + text-align: center; + height: auto; + } + """ + + BINDINGS: ClassVar[list[BindingType]] = [ + Binding("y", "confirm", "Yes"), + Binding("enter", "confirm", "Confirm", show=False), + Binding("n", "cancel", "No"), + Binding("escape", "cancel", "Cancel", show=False), + ] + + def __init__(self, session_id: str) -> None: + super().__init__() + self.session_id = session_id + + def compose(self) -> ComposeResult: + with Container(id="archive-container"): + yield Static("Archive Session", id="archive-title") + yield Static( + f"Session: {self.session_id[:8]}...", + id="archive-message", + ) + yield Static( + "This will delete the JSONL file.\n" + "The session will be archived and can be restored from cache.", + id="archive-info", + ) + yield Static("\\[Enter/y] Yes \\[Esc/n] No", id="archive-buttons") + + def action_confirm(self) -> None: + self.dismiss(True) + + def action_cancel(self) -> None: + self.dismiss(False) + + +class DeleteConfirmScreen(ModalScreen[Optional[str]]): + """Modal screen for confirming session deletion with smart options.""" + + CSS = """ + DeleteConfirmScreen { + align: center middle; + } + + #delete-container { + width: 65; + height: auto; + border: solid $error; + background: $surface; + padding: 1 2; + } + + #delete-title { + text-align: center; + text-style: bold; + color: $error; + margin-bottom: 1; + } + + #delete-message { + margin-bottom: 1; + } + + #delete-warning { + color: $warning; + margin-bottom: 1; + } + + #delete-buttons { + text-align: center; + height: auto; + } + """ + + BINDINGS: ClassVar[list[BindingType]] = [ + Binding("c", "delete_cache", "Cache only"), + Binding("b", "delete_both", "Both", show=False), + Binding("y", "delete_cache", "Yes", show=False), + Binding("enter", "delete_cache", "Confirm", show=False), + Binding("n", "cancel", "No"), + Binding("escape", "cancel", "Cancel", show=False), + ] + + def __init__(self, session_id: str, is_archived: bool = False) -> None: + super().__init__() + self.session_id = session_id + self.is_archived = is_archived + + def compose(self) -> ComposeResult: + with Container(id="delete-container"): + yield Static("Delete Session", id="delete-title") + yield Static( + f"Session: {self.session_id[:8]}...", + id="delete-message", + ) + if self.is_archived: + yield Static( + "This is an archived session with no JSONL file.\n" + "Deletion is PERMANENT and cannot be undone!", + id="delete-warning", + ) + yield Static( + "\\[Enter/y/c] Delete from cache \\[Esc/n] Cancel", + id="delete-buttons", + ) + else: + yield Static( + "Choose what to delete:\n" + "• Cache only: JSONL file remains, session can be re-parsed\n" + "• Both: Delete JSONL file AND cache (permanent!)", + id="delete-warning", + ) + yield Static( + "\\[c] Cache only \\[b] Both (permanent) \\[Esc/n] Cancel", + id="delete-buttons", + ) + + def action_delete_cache(self) -> None: + self.dismiss("cache_only") + + def action_delete_both(self) -> None: + if not self.is_archived: + self.dismiss("both") + + def action_cancel(self) -> None: + self.dismiss(None) + + +class ArchiveProjectConfirmScreen(ModalScreen[bool]): + """Modal screen for confirming project archival.""" + + CSS = """ + ArchiveProjectConfirmScreen { + align: center middle; + } + + #archive-project-container { + width: 65; + height: auto; + border: solid $warning; + background: $surface; + padding: 1 2; + } + + #archive-project-title { + text-align: center; + text-style: bold; + color: $warning; + margin-bottom: 1; + } + + #archive-project-message { + margin-bottom: 1; + } + + #archive-project-info { + color: $text-muted; + margin-bottom: 1; + } + + #archive-project-buttons { + text-align: center; + height: auto; + } + """ + + BINDINGS: ClassVar[list[BindingType]] = [ + Binding("y", "confirm", "Yes"), + Binding("enter", "confirm", "Confirm", show=False), + Binding("n", "cancel", "No"), + Binding("escape", "cancel", "Cancel", show=False), + ] + + def __init__(self, project_name: str, session_count: int) -> None: + super().__init__() + self.project_name = project_name + self.session_count = session_count + + def compose(self) -> ComposeResult: + with Container(id="archive-project-container"): + yield Static("Archive Project", id="archive-project-title") + yield Static( + f"Project: {self.project_name}\nSessions: {self.session_count}", + id="archive-project-message", + ) + yield Static( + "This will delete ALL JSONL files in the project.\n" + "Sessions will be archived and can be restored from cache.", + id="archive-project-info", + ) + yield Static("\\[Enter/y] Yes \\[Esc/n] No", id="archive-project-buttons") + + def action_confirm(self) -> None: + self.dismiss(True) + + def action_cancel(self) -> None: + self.dismiss(False) + + +class DeleteProjectConfirmScreen(ModalScreen[Optional[str]]): + """Modal screen for confirming project deletion with smart options.""" + + CSS = """ + DeleteProjectConfirmScreen { + align: center middle; + } + + #delete-project-container { + width: 65; + height: auto; + border: solid $error; + background: $surface; + padding: 1 2; + } + + #delete-project-title { + text-align: center; + text-style: bold; + color: $error; + margin-bottom: 1; + } + + #delete-project-message { + margin-bottom: 1; + } + + #delete-project-warning { + color: $warning; + margin-bottom: 1; + } + + #delete-project-buttons { + text-align: center; + height: auto; + } + """ + + BINDINGS: ClassVar[list[BindingType]] = [ + Binding("c", "delete_cache", "Cache only"), + Binding("b", "delete_both", "Both", show=False), + Binding("y", "delete_cache", "Yes", show=False), + Binding("enter", "delete_cache", "Confirm", show=False), + Binding("n", "cancel", "No"), + Binding("escape", "cancel", "Cancel", show=False), + ] + + def __init__( + self, project_name: str, session_count: int, is_archived: bool = False + ) -> None: + super().__init__() + self.project_name = project_name + self.session_count = session_count + self.is_archived = is_archived + + def compose(self) -> ComposeResult: + with Container(id="delete-project-container"): + yield Static("Delete Project", id="delete-project-title") + yield Static( + f"Project: {self.project_name}\nSessions: {self.session_count}", + id="delete-project-message", + ) + if self.is_archived: + yield Static( + "This is an archived project with no JSONL files.\n" + "Deletion is PERMANENT and cannot be undone!", + id="delete-project-warning", + ) + yield Static( + "\\[Enter/y/c] Delete from cache \\[Esc/n] Cancel", + id="delete-project-buttons", + ) + else: + yield Static( + "Choose what to delete:\n" + "• Cache only: JSONL files remain, sessions can be re-parsed\n" + "• Both: Delete ALL JSONL files AND cache (permanent!)", + id="delete-project-warning", + ) + yield Static( + "\\[c] Cache only \\[b] Both (permanent) \\[Esc/n] Cancel", + id="delete-project-buttons", + ) + + def action_delete_cache(self) -> None: + self.dismiss("cache_only") + + def action_delete_both(self) -> None: + if not self.is_archived: + self.dismiss("both") + + def action_cancel(self) -> None: + self.dismiss(None) + + +class RestoreProjectConfirmScreen(ModalScreen[bool]): + """Modal screen for confirming project restoration.""" + + CSS = """ + RestoreProjectConfirmScreen { + align: center middle; + } + + #restore-project-container { + width: 65; + height: auto; + border: solid $success; + background: $surface; + padding: 1 2; + } + + #restore-project-title { + text-align: center; + text-style: bold; + color: $success; + margin-bottom: 1; + } + + #restore-project-message { + margin-bottom: 1; + } + + #restore-project-info { + color: $text-muted; + margin-bottom: 1; + } + + #restore-project-buttons { + text-align: center; + height: auto; + } + """ + + BINDINGS: ClassVar[list[BindingType]] = [ + Binding("y", "confirm", "Yes"), + Binding("enter", "confirm", "Confirm", show=False), + Binding("n", "cancel", "No"), + Binding("escape", "cancel", "Cancel", show=False), + ] + + def __init__(self, project_name: str, session_count: int) -> None: + super().__init__() + self.project_name = project_name + self.session_count = session_count + + def compose(self) -> ComposeResult: + with Container(id="restore-project-container"): + yield Static("Restore Project", id="restore-project-title") + yield Static( + f"Project: {self.project_name}\n" + f"Archived sessions: {self.session_count}", + id="restore-project-message", + ) + yield Static( + "This will restore ALL archived sessions by writing JSONL files.\n" + "The project directory will be created if it doesn't exist.", + id="restore-project-info", + ) + yield Static("\\[Enter/y] Yes \\[Esc/n] No", id="restore-project-buttons") + + def action_confirm(self) -> None: + self.dismiss(True) + + def action_cancel(self) -> None: + self.dismiss(False) + + class SessionBrowser(App[Optional[str]]): """Interactive TUI for browsing and managing Claude Code Log sessions.""" @@ -349,6 +956,9 @@ class SessionBrowser(App[Optional[str]]): TITLE = "Claude Code Log - Session Browser" BINDINGS: ClassVar[list[BindingType]] = [ Binding("q", "quit", "Quit"), + Binding("escape", "back_to_projects", "Back", show=False), + Binding("enter", "export_selected", "Open HTML", show=False), + Binding("a", "archive_session", "Archive Session"), Binding("h", "export_selected", "Open HTML page"), Binding("m", "export_markdown", "Open Markdown"), Binding("v", "view_markdown", "View Markdown"), @@ -357,6 +967,8 @@ class SessionBrowser(App[Optional[str]]): Binding("M", "force_export_markdown", "Force Markdown", show=False), Binding("V", "force_view_markdown", "Force View", show=False), Binding("c", "resume_selected", "Resume in Claude Code"), + Binding("r", "restore_jsonl", "Restore JSONL"), + Binding("d", "delete_session", "Delete Session"), Binding("e", "toggle_expanded", "Toggle Expanded View"), Binding("p", "back_to_projects", "Open Project Selector"), Binding("?", "toggle_help", "Help"), @@ -367,14 +979,17 @@ class SessionBrowser(App[Optional[str]]): project_path: Path cache_manager: CacheManager sessions: dict[str, SessionCacheData] + archived_sessions: dict[str, SessionCacheData] - def __init__(self, project_path: Path): + def __init__(self, project_path: Path, is_archived: bool = False): """Initialize the session browser with a project path.""" super().__init__() self.theme = "gruvbox" self.project_path = project_path + self.is_archived_project = is_archived self.cache_manager = CacheManager(project_path, get_library_version()) self.sessions = {} + self.archived_sessions = {} def compose(self) -> ComposeResult: """Create the UI layout.""" @@ -407,8 +1022,32 @@ def on_resize(self) -> None: def load_sessions(self) -> None: """Load session information from cache or build cache if needed.""" + # For archived projects, just load from cache (no JSONL files to check) + if self.is_archived_project: + project_cache = self.cache_manager.get_cached_project_data() + if project_cache and project_cache.sessions: + # All sessions are "archived" for fully archived projects + self.sessions = {} + self.archived_sessions = project_cache.sessions + else: + self.sessions = {} + self.archived_sessions = {} + # Update UI + try: + self.populate_table() + self.update_stats() + except Exception: + pass + return + # Check if we need to rebuild cache by checking for modified files - jsonl_files = list(self.project_path.glob("*.jsonl")) + # Exclude agent files - they are loaded via session references + jsonl_files = [ + f + for f in self.project_path.glob("*.jsonl") + if not f.name.startswith("agent-") + ] + valid_session_ids = {f.stem for f in jsonl_files} modified_files = self.cache_manager.get_modified_files(jsonl_files) # Get cached project data @@ -434,6 +1073,24 @@ def load_sessions(self) -> None: # Don't show notification during startup - just return return + # Only compute archived sessions if there are JSONL files to compare against + # (in test environments, there may be cached sessions but no JSONL files) + if valid_session_ids: + # Load archived sessions (cached but JSONL deleted) + self.archived_sessions = self.cache_manager.get_archived_sessions( + valid_session_ids + ) + + # Filter current sessions to only those with existing JSONL files + self.sessions = { + sid: data + for sid, data in self.sessions.items() + if sid in valid_session_ids + } + else: + # No JSONL files to compare - treat all sessions as current + self.archived_sessions = {} + # Only update UI if we're in app context try: self.populate_table() @@ -473,13 +1130,20 @@ def populate_table(self) -> None: table.add_column("Messages", width=messages_width) table.add_column("Tokens", width=tokens_width) - # Sort sessions by start time (newest first) + # Combine current and archived sessions with archived flag + all_sessions: list[tuple[str, SessionCacheData, bool]] = [] + for session_id, session_data in self.sessions.items(): + all_sessions.append((session_id, session_data, False)) + for session_id, session_data in self.archived_sessions.items(): + all_sessions.append((session_id, session_data, True)) + + # Sort all sessions by start time (newest first) sorted_sessions = sorted( - self.sessions.items(), key=lambda x: x[1].first_timestamp, reverse=True + all_sessions, key=lambda x: x[1].first_timestamp, reverse=True ) # Add rows - for session_id, session_data in sorted_sessions: + for session_id, session_data, is_archived in sorted_sessions: # Format timestamps - use short format for narrow terminals use_short_format = terminal_width < 120 start_time = self.format_timestamp( @@ -501,7 +1165,9 @@ def populate_table(self) -> None: or session_data.first_user_message or "No preview available" ) - # Let Textual handle truncation based on column width + # Add [ARCHIVED] indicator for archived sessions + if is_archived: + preview = f"[ARCHIVED] {preview}" table.add_row( session_id[:8], @@ -514,10 +1180,12 @@ def populate_table(self) -> None: def update_stats(self) -> None: """Update the project statistics display.""" - total_sessions = len(self.sessions) - total_messages = sum(s.message_count for s in self.sessions.values()) + # Combine all sessions for stats + all_sessions = {**self.sessions, **self.archived_sessions} + total_sessions = len(all_sessions) + total_messages = sum(s.message_count for s in all_sessions.values()) total_tokens = sum( - s.total_input_tokens + s.total_output_tokens for s in self.sessions.values() + s.total_input_tokens + s.total_output_tokens for s in all_sessions.values() ) # Get project name using shared logic @@ -533,16 +1201,14 @@ def update_stats(self) -> None: ) # Find date range - if self.sessions: + if all_sessions: timestamps = [ - s.first_timestamp for s in self.sessions.values() if s.first_timestamp + s.first_timestamp for s in all_sessions.values() if s.first_timestamp ] earliest = min(timestamps) if timestamps else "" latest = ( - max( - s.last_timestamp for s in self.sessions.values() if s.last_timestamp - ) - if self.sessions + max(s.last_timestamp for s in all_sessions.values() if s.last_timestamp) + if all_sessions else "" ) @@ -560,8 +1226,17 @@ def update_stats(self) -> None: # Create spaced layout: Project (left), Sessions info (center), Date range (right) terminal_width = self.size.width + # Show archived count if any + archived_count = len(self.archived_sessions) + if archived_count > 0: + mode_indicator = f"({archived_count} archived)" + else: + mode_indicator = "" + # Project section (left aligned) - project_section = f"[bold]Project:[/bold] {project_name}" + project_section = ( + f"[bold]Project:[/bold] {project_name} {mode_indicator}".strip() + ) # Sessions info section (center) sessions_section = f"[bold]Sessions:[/bold] {total_sessions:,} | [bold]Messages:[/bold] {total_messages:,} | [bold]Tokens:[/bold] {total_tokens:,}" @@ -631,11 +1306,15 @@ def _update_selected_session_from_cursor(self) -> None: if row_data: # Extract session ID from the first column (now just first 8 chars) session_id_display = str(row_data[0]) - # Find the full session ID + # Find the full session ID in both dicts (current first, then archived) for full_session_id in self.sessions.keys(): if full_session_id.startswith(session_id_display): self.selected_session_id = full_session_id - break + return + for full_session_id in self.archived_sessions.keys(): + if full_session_id.startswith(session_id_display): + self.selected_session_id = full_session_id + return except Exception: # If widget not mounted yet or we can't get the row data, don't update selection pass @@ -764,14 +1443,17 @@ def _escape_rich_markup(self, text: str) -> str: def _update_expanded_content(self) -> None: """Update the expanded content for the currently selected session.""" - if ( - not self.selected_session_id - or self.selected_session_id not in self.sessions - ): + if not self.selected_session_id: + return + + # Get session data from either current or archived sessions + session_data = self.sessions.get( + self.selected_session_id + ) or self.archived_sessions.get(self.selected_session_id) + if not session_data: return expanded_content = self.query_one("#expanded-content", Static) - session_data = self.sessions[self.selected_session_id] # Build expanded content content_parts: list[str] = [] @@ -841,16 +1523,24 @@ def _ensure_session_file( if not needs_regeneration: return session_file - # Load messages from JSONL files + # Load messages - from cache for archived sessions, from JSONL otherwise try: - messages = load_directory_transcripts( - self.project_path, self.cache_manager, silent=True - ) + is_archived = session_id in self.archived_sessions + if is_archived: + # Load from cache for archived sessions + messages = self.cache_manager.load_session_entries(session_id) + else: + # Load from JSONL files for current sessions + messages = load_directory_transcripts( + self.project_path, self.cache_manager, silent=True + ) if not messages: return None - # Build session title - session_data = self.sessions.get(session_id) + # Build session title - check both dicts + session_data = self.sessions.get(session_id) or self.archived_sessions.get( + session_id + ) project_cache = self.cache_manager.get_cached_project_data() project_name = get_project_display_name( self.project_path.name, @@ -884,9 +1574,12 @@ def _ensure_session_file( def action_toggle_expanded(self) -> None: """Toggle the expanded view for the selected session.""" + if not self.selected_session_id: + return + # Check if session exists in either current or archived sessions if ( - not self.selected_session_id - or self.selected_session_id not in self.sessions + self.selected_session_id not in self.sessions + and self.selected_session_id not in self.archived_sessions ): return @@ -909,8 +1602,12 @@ def action_toggle_help(self) -> None: "Claude Code Log - Session Browser\n\n" "Navigation:\n" "- Use arrow keys to select sessions\n" - "- Expanded content updates automatically when visible\n\n" + "- Expanded content updates automatically when visible\n" + "- [ARCHIVED] sessions have no JSONL file (cache only)\n\n" "Actions:\n" + "- a: Archive session (delete JSONL, keep in cache)\n" + "- d: Delete session (with options)\n" + "- r: Restore archived session to JSONL\n" "- e: Toggle expanded view for session\n" "- h: Open selected session's HTML page\n" "- m: Open selected session's Markdown file (in browser)\n" @@ -921,6 +1618,181 @@ def action_toggle_help(self) -> None: ) self.notify(help_text, timeout=10) + def check_action(self, action: str, parameters: tuple[object, ...]) -> bool | None: + """Conditionally enable/disable actions based on selected session type.""" + if not self.selected_session_id: + return True # Allow action, it will handle missing selection + + is_archived = self.selected_session_id in self.archived_sessions + is_current = self.selected_session_id in self.sessions + + # Archive is only available for current sessions (has JSONL file) + if action == "archive_session" and not is_current: + return False + # Resume is only available for current sessions + if action == "resume_selected" and not is_current: + return False + # Restore is only available for archived sessions + if action == "restore_jsonl" and not is_archived: + return False + return True + + def action_restore_jsonl(self) -> None: + """Restore the selected archived session to a JSONL file.""" + if not self.selected_session_id: + self.notify("No session selected", severity="warning") + return + + if self.selected_session_id not in self.archived_sessions: + self.notify( + "Selected session not found in archived sessions", severity="error" + ) + return + + try: + # Export messages from cache + messages = self.cache_manager.export_session_to_jsonl( + self.selected_session_id + ) + if not messages: + self.notify("No messages found for session", severity="error") + return + + # Ensure project directory exists (may have been deleted) + self.project_path.mkdir(parents=True, exist_ok=True) + + # Write to JSONL file + output_path = self.project_path / f"{self.selected_session_id}.jsonl" + with open(output_path, "w", encoding="utf-8") as f: + for msg in messages: + f.write(msg + "\n") + + self.notify( + f"Restored {len(messages)} messages to {output_path.name}", + severity="information", + ) + + # Refresh to show the restored session as current + self._refresh_after_restore() + + except Exception as e: + self.notify(f"Error restoring session: {e}", severity="error") + + def _refresh_after_restore(self) -> None: + """Refresh sessions after restoring an archived session.""" + # If this was a fully archived project, it's no longer archived + # since we just restored a JSONL file + if self.is_archived_project: + self.is_archived_project = False + + # Reload sessions - this will now detect the restored JSONL file + self.load_sessions() + + self.notify( + "Session restored! It now appears as a current session.", + timeout=5, + ) + + def action_archive_session(self) -> None: + """Archive the selected session (delete JSONL file, keep in cache).""" + if not self.selected_session_id: + self.notify("No session selected", severity="warning") + return + + # Archive only works for current sessions (those with JSONL files) + if self.selected_session_id not in self.sessions: + self.notify( + "Only current sessions can be archived (already archived or not found)", + severity="warning", + ) + return + + # Push archive confirmation screen + self.push_screen( + ArchiveConfirmScreen(session_id=self.selected_session_id), + callback=self._on_archive_confirm, + ) + + def _on_archive_confirm(self, confirmed: Optional[bool]) -> None: + """Handle archive confirmation result.""" + if not confirmed or not self.selected_session_id: + return + + try: + # Delete the JSONL file + jsonl_path = self.project_path / f"{self.selected_session_id}.jsonl" + if jsonl_path.exists(): + jsonl_path.unlink() + self.notify( + f"Session {self.selected_session_id[:8]} archived", + severity="information", + ) + # Reload sessions - this will move the session to archived + self.load_sessions() + else: + self.notify("JSONL file not found", severity="error") + except Exception as e: + self.notify(f"Error archiving session: {e}", severity="error") + + def action_delete_session(self) -> None: + """Delete the selected session with smart options.""" + if not self.selected_session_id: + self.notify("No session selected", severity="warning") + return + + # Check if session exists in either current or archived sessions + if ( + self.selected_session_id not in self.sessions + and self.selected_session_id not in self.archived_sessions + ): + self.notify("Selected session not found", severity="error") + return + + # Determine if this is an archived session (no JSONL to fall back on) + is_archived_session = self.selected_session_id in self.archived_sessions + + # Push confirmation screen + self.push_screen( + DeleteConfirmScreen( + session_id=self.selected_session_id, + is_archived=is_archived_session, + ), + callback=self._on_delete_confirm, + ) + + def _on_delete_confirm(self, delete_option: Optional[str]) -> None: + """Handle deletion confirmation result.""" + if not delete_option or not self.selected_session_id: + return + + try: + deleted_what: list[str] = [] + + # Delete JSONL file if requested + if delete_option == "both": + jsonl_path = self.project_path / f"{self.selected_session_id}.jsonl" + if jsonl_path.exists(): + jsonl_path.unlink() + deleted_what.append("JSONL file") + + # Delete from cache + success = self.cache_manager.delete_session(self.selected_session_id) + if success: + deleted_what.append("cache") + + if deleted_what: + self.notify( + f"Session {self.selected_session_id[:8]} deleted ({', '.join(deleted_what)})", + severity="information", + ) + # Clear selection and reload + self.selected_session_id = None + self.load_sessions() + else: + self.notify("Failed to delete session", severity="error") + except Exception as e: + self.notify(f"Error deleting session: {e}", severity="error") + def action_back_to_projects(self) -> None: """Navigate to the project selector.""" # Exit with a special return value to signal we want to go to project selector @@ -932,14 +1804,16 @@ async def action_quit(self) -> None: def run_project_selector( - projects: list[Path], matching_projects: list[Path] + projects: list[Path], + matching_projects: list[Path], + archived_projects: Optional[set[Path]] = None, ) -> Optional[Path]: """Run the project selector TUI and return the selected project path.""" if not projects: print("Error: No projects provided") return None - app = ProjectSelector(projects, matching_projects) + app = ProjectSelector(projects, matching_projects, archived_projects) try: return app.run() except KeyboardInterrupt: @@ -948,9 +1822,20 @@ def run_project_selector( return None -def run_session_browser(project_path: Path) -> Optional[str]: +def run_session_browser(project_path: Path, is_archived: bool = False) -> Optional[str]: """Run the session browser TUI for the given project path.""" if not project_path.exists(): + # For archived projects, the directory may not exist but cache may + if is_archived: + # Try to load from cache + try: + cache_manager = CacheManager(project_path, get_library_version()) + project_cache = cache_manager.get_cached_project_data() + if project_cache and project_cache.sessions: + app = SessionBrowser(project_path, is_archived=True) + return app.run() + except Exception: + pass print(f"Error: Project path {project_path} does not exist") return None @@ -961,10 +1846,20 @@ def run_session_browser(project_path: Path) -> Optional[str]: # Check if there are any JSONL files jsonl_files = list(project_path.glob("*.jsonl")) if not jsonl_files: + # For archived projects, check if we have cached sessions + if is_archived: + try: + cache_manager = CacheManager(project_path, get_library_version()) + project_cache = cache_manager.get_cached_project_data() + if project_cache and project_cache.sessions: + app = SessionBrowser(project_path, is_archived=True) + return app.run() + except Exception: + pass print(f"Error: No JSONL transcript files found in {project_path}") return None - app = SessionBrowser(project_path) + app = SessionBrowser(project_path, is_archived=is_archived) try: return app.run() except KeyboardInterrupt: diff --git a/dev-docs/restoring-archived-sessions.md b/dev-docs/restoring-archived-sessions.md new file mode 100644 index 00000000..38582deb --- /dev/null +++ b/dev-docs/restoring-archived-sessions.md @@ -0,0 +1,100 @@ +# Restoring Archived Sessions + +When you run `claude-code-log`, you may see output like: + +```sh +project-name: cached, 3 archived (0.0s) +``` + +This indicates that 3 sessions exist in the cache whose source JSONL files have been deleted. + +## What Are Archived Sessions? + +Archived sessions are sessions preserved in the SQLite cache (`~/.claude/projects/cache.db`) even after their source JSONL files have been deleted. This happens when: + +1. Claude Code automatically deletes old JSONL files based on the `cleanupPeriodDays` setting +2. You manually delete JSONL files from `~/.claude/projects/*/` + +The cache stores the complete message data, so full restoration is possible. + +## Preventing Automatic Deletion + +Claude Code automatically deletes session logs after 30 days by default. To change this, add `cleanupPeriodDays` to your `~/.claude/settings.json`: + +```json +{ + "cleanupPeriodDays": 99999 +} +``` + +This effectively disables automatic cleanup (274 years). You can also set it to a specific number of days. + +See Claude Code's [settings documentation](https://docs.anthropic.com/en/docs/claude-code/settings) for more details. + +## Using the TUI to Manage Archived Sessions + +The easiest way to browse and restore archived sessions is through the interactive TUI. + +### Launch the TUI + +```bash +claude-code-log --tui +``` + +### Toggle Archived View + +Press `a` to toggle between current and archived sessions. The header shows the current mode: + +```text +┌─ Claude Code Log ─────────────────────────────────────────────────┐ +│ Project: my-project ARCHIVED (3) │ +│ Sessions: 3 │ Messages: 456 │ Tokens: 45,230 │ +├──────────┬───────────────────────────────────┬─────────┬──────────┤ +│ Session │ Title │ Start │ Messages │ +├──────────┼───────────────────────────────────┼─────────┼──────────┤ +│ abc123 │ Fix authentication bug │ 12-01 │ 45 │ +│ def456 │ Add user settings page │ 11-28 │ 123 │ +│ ghi789 │ Refactor database layer │ 11-15 │ 67 │ +└──────────┴───────────────────────────────────┴─────────┴──────────┘ + [a] Current [r] Restore [h] HTML [v] View [q] Quit +``` + +### Restore a Session + +1. Switch to archived view with `a` +2. Navigate to the session you want to restore +3. Press `r` to restore the session to a JSONL file +4. The session will be restored to `~/.claude/projects/{project}/{session-id}.jsonl` +5. Press `a` again to switch back to current sessions and see the restored session + +### View Archived Sessions + +You can also view archived sessions as HTML or Markdown without restoring them: + +- `h` - Open HTML in browser +- `m` - Open Markdown in browser +- `v` - View Markdown in embedded viewer + +## Limitations + +- **Message order**: Messages are ordered by timestamp, which may differ slightly from original file order for same-timestamp entries +- **Whitespace**: Original JSON formatting is not preserved (semantically identical) + +## Manual SQL Approach + +For advanced users, you can also query the cache database directly: + +```bash +sqlite3 ~/.claude/projects/cache.db +``` + +```sql +-- List all sessions +SELECT p.project_path, s.session_id, s.first_timestamp, s.message_count +FROM sessions s +JOIN projects p ON s.project_id = p.id +ORDER BY s.first_timestamp; + +-- Export a session's messages +SELECT content FROM messages WHERE session_id = 'your-session-id' ORDER BY timestamp; +``` diff --git a/test/__snapshots__/test_snapshot_html.ambr b/test/__snapshots__/test_snapshot_html.ambr index 65bf5df3..83cee00c 100644 --- a/test/__snapshots__/test_snapshot_html.ambr +++ b/test/__snapshots__/test_snapshot_html.ambr @@ -461,6 +461,30 @@ .project-sessions details[open] summary { margin-bottom: 10px; } + + /* Archived project styling */ + .project-card.archived { + opacity: 0.6; + background-color: #f5f5f522; + } + + .project-card.archived:hover { + opacity: 0.8; + } + + .archived-badge { + display: inline-block; + background-color: #888; + color: white; + font-size: 0.65em; + font-weight: 600; + padding: 2px 8px; + border-radius: 4px; + margin-left: 10px; + vertical-align: middle; + text-transform: uppercase; + letter-spacing: 0.5px; + } /* Search Bar Styles */ .search-container { position: relative; @@ -1655,7 +1679,9 @@
Users/test/project/beta + (← open combined transcript) +
📁 3 transcript files
@@ -1672,7 +1698,9 @@
alpha + (← open combined transcript) +
📁 5 transcript files
diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py index 2bce66df..6af5b721 100644 --- a/test/test_cache_integration.py +++ b/test/test_cache_integration.py @@ -440,3 +440,424 @@ def test_cache_version_upgrade_scenario(self, setup_test_project): with patch("claude_code_log.cache.get_library_version", return_value="2.0.0"): output = convert_jsonl_to_html(input_path=project_dir, use_cache=True) assert output.exists() + + +class TestArchivedSessionsIntegration: + """Test archived sessions functionality - sessions cached but JSONL deleted.""" + + def test_get_archived_sessions_after_file_deletion( + self, temp_projects_dir, sample_jsonl_data + ): + """Test that sessions become archived when JSONL files are deleted.""" + project_dir = temp_projects_dir / "archived-test" + project_dir.mkdir() + + # Create JSONL file with session data + jsonl_file = project_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Verify session is in cache + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert "session-1" in cached_data.sessions + + # Delete the JSONL file + jsonl_file.unlink() + + # Now session-1 should be archived (no valid session IDs) + valid_session_ids: set[str] = set() # No JSONL files left + archived = cache_manager.get_archived_sessions(valid_session_ids) + + assert "session-1" in archived + assert archived["session-1"].message_count > 0 + assert archived["session-1"].first_timestamp == "2023-01-01T10:00:00Z" + + def test_get_archived_sessions_with_some_files_remaining( + self, temp_projects_dir, sample_jsonl_data + ): + """Test archived sessions when only some JSONL files are deleted.""" + project_dir = temp_projects_dir / "partial-archived" + project_dir.mkdir() + + # Create two session files + for session_id in ["session-1", "session-2"]: + jsonl_file = project_dir / f"{session_id}.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + entry_copy = entry.copy() + if "sessionId" in entry_copy: + entry_copy["sessionId"] = session_id + f.write(json.dumps(entry_copy) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Delete only session-1 + (project_dir / "session-1.jsonl").unlink() + + # session-2 should be valid, session-1 should be archived + valid_session_ids = {"session-2"} + cache_manager = CacheManager(project_dir, "1.0.0") + archived = cache_manager.get_archived_sessions(valid_session_ids) + + assert "session-1" in archived + assert "session-2" not in archived + + def test_export_session_to_jsonl(self, temp_projects_dir, sample_jsonl_data): + """Test exporting session messages for JSONL restoration.""" + project_dir = temp_projects_dir / "export-test" + project_dir.mkdir() + + # Create JSONL file + jsonl_file = project_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Export messages from cache + cache_manager = CacheManager(project_dir, "1.0.0") + exported_messages = cache_manager.export_session_to_jsonl("session-1") + + # Should have exported messages (not summary which has no sessionId) + assert len(exported_messages) >= 2 # user + assistant messages + + # Each message should be valid JSON + for msg_json in exported_messages: + parsed = json.loads(msg_json) + assert "type" in parsed + assert parsed["sessionId"] == "session-1" + + def test_load_session_entries_for_rendering( + self, temp_projects_dir, sample_jsonl_data + ): + """Test loading session entries from cache for HTML/Markdown rendering.""" + project_dir = temp_projects_dir / "load-entries-test" + project_dir.mkdir() + + # Create JSONL file + jsonl_file = project_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Load entries from cache + cache_manager = CacheManager(project_dir, "1.0.0") + entries = cache_manager.load_session_entries("session-1") + + # Should have TranscriptEntry objects + assert len(entries) >= 2 + + # Check that entries are proper types + entry_types = [e.type for e in entries] + assert "user" in entry_types + assert "assistant" in entry_types + + def test_full_archive_and_restore_workflow( + self, temp_projects_dir, sample_jsonl_data + ): + """Test the full workflow: cache -> delete -> archive -> restore.""" + project_dir = temp_projects_dir / "full-workflow" + project_dir.mkdir() + + # Step 1: Create JSONL file and cache it + original_file = project_dir / "session-1.jsonl" + with open(original_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Verify cache populated + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + original_message_count = cached_data.sessions["session-1"].message_count + + # Step 2: Delete the JSONL file + original_file.unlink() + assert not original_file.exists() + + # Step 3: Verify session is now archived + archived = cache_manager.get_archived_sessions(set()) + assert "session-1" in archived + + # Step 4: Restore the session from cache + exported_messages = cache_manager.export_session_to_jsonl("session-1") + restored_file = project_dir / "session-1.jsonl" + with open(restored_file, "w") as f: + for msg in exported_messages: + f.write(msg + "\n") + + # Step 5: Verify the restored file exists and session is no longer archived + assert restored_file.exists() + + valid_session_ids = {"session-1"} + archived_after_restore = cache_manager.get_archived_sessions(valid_session_ids) + assert "session-1" not in archived_after_restore + + # Step 6: Verify restored content is valid by re-processing + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + cached_data = cache_manager.get_cached_project_data() + # Message count should be preserved + assert cached_data is not None + assert cached_data.sessions["session-1"].message_count == original_message_count + + def test_archived_session_count_in_converter( + self, temp_projects_dir, sample_jsonl_data, capsys + ): + """Test that archived session count is reported in converter output.""" + project_dir = temp_projects_dir / "count-test" + project_dir.mkdir() + + # Create two sessions so one remains after deletion + for session_id in ["session-1", "session-2"]: + jsonl_file = project_dir / f"{session_id}.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + entry_copy = entry.copy() + if "sessionId" in entry_copy: + entry_copy["sessionId"] = session_id + f.write(json.dumps(entry_copy) + "\n") + + # Process to cache (as part of all-projects hierarchy) + process_projects_hierarchy(projects_path=temp_projects_dir, use_cache=True) + + # Delete only session-1, keeping session-2 so project is still found + (project_dir / "session-1.jsonl").unlink() + + # Process again - should report archived sessions + process_projects_hierarchy( + projects_path=temp_projects_dir, use_cache=True, silent=False + ) + + captured = capsys.readouterr() + # Output should mention archived sessions + assert "archived" in captured.out.lower() + + def test_load_entries_preserves_message_order( + self, temp_projects_dir, sample_jsonl_data + ): + """Test that loaded entries preserve chronological order.""" + project_dir = temp_projects_dir / "order-test" + project_dir.mkdir() + + # Create JSONL file + jsonl_file = project_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Load entries from cache + cache_manager = CacheManager(project_dir, "1.0.0") + entries = cache_manager.load_session_entries("session-1") + + # Filter to entries with timestamps and extract them + timestamps: list[str] = [] + for e in entries: + if hasattr(e, "timestamp") and e.timestamp: + timestamps.append(str(e.timestamp)) + + # Verify chronological order (ISO timestamps are lexicographically sortable) + assert timestamps == sorted(timestamps) + + def test_export_empty_session_returns_empty_list(self, temp_projects_dir): + """Test that exporting a non-existent session returns empty list.""" + project_dir = temp_projects_dir / "empty-export" + project_dir.mkdir() + + # Create a dummy JSONL to initialize the project + jsonl_file = project_dir / "dummy.jsonl" + jsonl_file.write_text("{}\n") + + cache_manager = CacheManager(project_dir, "1.0.0") + + # Export non-existent session + exported = cache_manager.export_session_to_jsonl("non-existent-session") + assert exported == [] + + # Load entries for non-existent session + entries = cache_manager.load_session_entries("non-existent-session") + assert entries == [] + + def test_export_session_produces_compact_json( + self, temp_projects_dir, sample_jsonl_data + ): + """Test that exported JSONL has compact JSON format (no spaces after separators).""" + project_dir = temp_projects_dir / "compact-json-test" + project_dir.mkdir() + + # Create JSONL file + jsonl_file = project_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Export messages + cache_manager = CacheManager(project_dir, "1.0.0") + exported_messages = cache_manager.export_session_to_jsonl("session-1") + + # Each message should be compact JSON (no spaces after : or ,) + for msg_json in exported_messages: + # Should not have ": " (colon-space) pattern except in string values + # Check by ensuring re-serialization produces same result + parsed = json.loads(msg_json) + compact_reserialized = json.dumps(parsed, separators=(",", ":")) + assert msg_json == compact_reserialized, ( + f"JSON should be compact format.\n" + f"Got: {msg_json[:100]}...\n" + f"Expected: {compact_reserialized[:100]}..." + ) + + def test_delete_session_from_cache(self, temp_projects_dir, sample_jsonl_data): + """Test deleting a session from cache.""" + project_dir = temp_projects_dir / "delete-session-test" + project_dir.mkdir() + + # Create JSONL file + jsonl_file = project_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Verify session exists in cache + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert "session-1" in cached_data.sessions + + # Delete the session + result = cache_manager.delete_session("session-1") + assert result is True + + # Verify session is gone from cache + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert "session-1" not in cached_data.sessions + + # Export should return empty + exported = cache_manager.export_session_to_jsonl("session-1") + assert exported == [] + + def test_delete_nonexistent_session(self, temp_projects_dir): + """Test deleting a session that doesn't exist returns False.""" + project_dir = temp_projects_dir / "delete-nonexistent" + project_dir.mkdir() + + # Create a dummy JSONL to initialize the project + jsonl_file = project_dir / "dummy.jsonl" + jsonl_file.write_text("{}\n") + + cache_manager = CacheManager(project_dir, "1.0.0") + + # Delete non-existent session + result = cache_manager.delete_session("non-existent-session") + assert result is False + + def test_delete_project_from_cache(self, temp_projects_dir, sample_jsonl_data): + """Test deleting an entire project from cache.""" + project_dir = temp_projects_dir / "delete-project-test" + project_dir.mkdir() + + # Create JSONL file + jsonl_file = project_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Verify project exists in cache + cache_manager = CacheManager(project_dir, "1.0.0") + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + + # Delete the project + result = cache_manager.delete_project() + assert result is True + + # Cache manager should no longer have valid project ID + cached_data = cache_manager.get_cached_project_data() + assert cached_data is None + + +class TestGetAllCachedProjects: + """Tests for get_all_cached_projects() function.""" + + def test_get_all_cached_projects_finds_active_and_archived( + self, temp_projects_dir, sample_jsonl_data + ): + """Test finding both active and archived projects.""" + from claude_code_log.cache import get_all_cached_projects + + # Create two projects - one active, one that will be archived + active_dir = temp_projects_dir / "active-project" + active_dir.mkdir() + archived_dir = temp_projects_dir / "archived-project" + archived_dir.mkdir() + + # Create JSONL files in both + for proj_dir in [active_dir, archived_dir]: + jsonl_file = proj_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process both projects to populate cache + convert_jsonl_to_html(input_path=active_dir, use_cache=True) + convert_jsonl_to_html(input_path=archived_dir, use_cache=True) + + # Delete JSONL from "archived" project to simulate archival + (archived_dir / "session-1.jsonl").unlink() + + # Get all cached projects + projects = get_all_cached_projects(temp_projects_dir) + + # Should find both projects + project_paths = {p[0] for p in projects} + assert str(active_dir) in project_paths + assert str(archived_dir) in project_paths + + # Check is_archived flag + for project_path, is_archived in projects: + if project_path == str(active_dir): + assert is_archived is False + elif project_path == str(archived_dir): + assert is_archived is True + + def test_get_all_cached_projects_empty_dir(self, temp_projects_dir): + """Test get_all_cached_projects with no cache.""" + from claude_code_log.cache import get_all_cached_projects + + # No cache.db exists + projects = get_all_cached_projects(temp_projects_dir) + assert projects == [] + + def test_get_all_cached_projects_nonexistent_dir(self, tmp_path): + """Test get_all_cached_projects with nonexistent directory.""" + from claude_code_log.cache import get_all_cached_projects + + nonexistent = tmp_path / "does-not-exist" + projects = get_all_cached_projects(nonexistent) + assert projects == [] diff --git a/test/test_tui.py b/test/test_tui.py index 30856317..9009a490 100644 --- a/test/test_tui.py +++ b/test/test_tui.py @@ -13,7 +13,7 @@ from textual.widgets import DataTable, Label from claude_code_log.cache import CacheManager, SessionCacheData -from claude_code_log.tui import SessionBrowser, run_session_browser +from claude_code_log.tui import ProjectSelector, SessionBrowser, run_session_browser @pytest.fixture @@ -87,11 +87,23 @@ def temp_project_dir(): }, ] - # Write test data to JSONL file - jsonl_file = project_path / "test-transcript.jsonl" - with open(jsonl_file, "w", encoding="utf-8") as f: + # Write test data to JSONL files - one per session (matching real-world usage) + # Session 123 entries + session_123_file = project_path / "session-123.jsonl" + with open(session_123_file, "w", encoding="utf-8") as f: for entry in test_data: - f.write(json.dumps(entry) + "\n") + if entry.get("sessionId") == "session-123": + f.write(json.dumps(entry) + "\n") + + # Session 456 entries (includes summary) + session_456_file = project_path / "session-456.jsonl" + with open(session_456_file, "w", encoding="utf-8") as f: + for entry in test_data: + if ( + entry.get("sessionId") == "session-456" + or entry.get("type") == "summary" + ): + f.write(json.dumps(entry) + "\n") yield project_path @@ -907,3 +919,1017 @@ async def test_empty_project_handling(self): stats = cast(Label, app.query_one("#stats")) stats_text = str(stats.content) assert "Sessions:[/bold] 0" in stats_text + + @pytest.mark.asyncio + async def test_archived_project_loads_archived_sessions(self): + """Test that an archived project (no JSONL files) loads sessions in archived_sessions.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + + # Create empty JSONL file to initialize + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.touch() + + # Create app with is_archived=True (simulating archived project) + app = SessionBrowser(project_path, is_archived=True) + + # Mock the cache manager to return some sessions + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + summary="Archived session", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=5, + first_user_message="Hello from archived", + total_input_tokens=100, + total_output_tokens=200, + ), + } + + with ( + patch.object( + app.cache_manager, "get_cached_project_data" + ) as mock_cache, + ): + mock_cache.return_value = Mock( + sessions=mock_session_data, + working_directories=[str(project_path)], + ) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Manually call load_sessions (since mocking) + app.load_sessions() + + # Sessions should be in archived_sessions, not sessions + assert len(app.archived_sessions) > 0 + assert len(app.sessions) == 0 + + # Stats should show "archived" count + stats = cast(Label, app.query_one("#stats")) + stats_text = str(stats.content) + assert "archived" in stats_text.lower() + + +@pytest.mark.tui +class TestUnifiedSessionList: + """Tests for the unified session list showing both current and archived sessions.""" + + @pytest.mark.asyncio + async def test_unified_list_shows_both_current_and_archived(self): + """Test that both current and archived sessions appear in the same list.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-current.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + current_session = { + "session-current": SessionCacheData( + session_id="session-current", + first_timestamp="2025-01-02T10:00:00Z", + last_timestamp="2025-01-02T10:01:00Z", + message_count=1, + first_user_message="Current session", + total_input_tokens=10, + total_output_tokens=10, + ), + } + archived_session = { + "session-archived": SessionCacheData( + session_id="session-archived", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Archived session", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = current_session + app.archived_sessions = archived_session + app.populate_table() + + # Get the table + table = cast(DataTable, app.query_one("#sessions-table")) + + # Should have 2 rows (both sessions in one list) + assert table.row_count == 2 + + @pytest.mark.asyncio + async def test_unified_list_sorted_by_timestamp_newest_first(self): + """Test that sessions are sorted by timestamp with newest first.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-old.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + # Create sessions with different timestamps + old_session = { + "session-old": SessionCacheData( + session_id="session-old", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Old session", + total_input_tokens=10, + total_output_tokens=10, + ), + } + new_archived_session = { + "session-new": SessionCacheData( + session_id="session-new", + first_timestamp="2025-01-03T10:00:00Z", + last_timestamp="2025-01-03T10:01:00Z", + message_count=1, + first_user_message="New archived session", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = old_session + app.archived_sessions = new_archived_session + app.populate_table() + + table = cast(DataTable, app.query_one("#sessions-table")) + + # Get first row - should be the newest (archived) session + first_row = table.get_row_at(0) + # Session ID column shows first 8 chars + assert str(first_row[0]).startswith("session-") + # Title should have [ARCHIVED] prefix since newest is archived + assert "[ARCHIVED]" in str(first_row[1]) + + @pytest.mark.asyncio + async def test_archived_sessions_have_archived_indicator(self): + """Test that archived sessions display [ARCHIVED] indicator in title.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-current.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + current_session = { + "session-current": SessionCacheData( + session_id="session-current", + first_timestamp="2025-01-02T10:00:00Z", + last_timestamp="2025-01-02T10:01:00Z", + message_count=1, + first_user_message="Current session message", + total_input_tokens=10, + total_output_tokens=10, + ), + } + archived_session = { + "session-archived": SessionCacheData( + session_id="session-archived", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Archived session message", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = current_session + app.archived_sessions = archived_session + app.populate_table() + + table = cast(DataTable, app.query_one("#sessions-table")) + + # Check both rows + found_archived_indicator = False + found_current_without_indicator = False + + for row_idx in range(table.row_count): + row = table.get_row_at(row_idx) + title = str(row[1]) + if "[ARCHIVED]" in title: + found_archived_indicator = True + assert "Archived session message" in title + else: + found_current_without_indicator = True + assert "Current session message" in title + + assert found_archived_indicator, ( + "Archived session should have [ARCHIVED] indicator" + ) + assert found_current_without_indicator, ( + "Current session should not have [ARCHIVED] indicator" + ) + + @pytest.mark.asyncio + async def test_stats_show_combined_totals(self): + """Test that stats display combined totals from both current and archived sessions.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-current.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + current_session = { + "session-current": SessionCacheData( + session_id="session-current", + first_timestamp="2025-01-02T10:00:00Z", + last_timestamp="2025-01-02T10:01:00Z", + message_count=5, + first_user_message="Current", + total_input_tokens=100, + total_output_tokens=200, + ), + } + archived_session = { + "session-archived": SessionCacheData( + session_id="session-archived", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=3, + first_user_message="Archived", + total_input_tokens=50, + total_output_tokens=100, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = current_session + app.archived_sessions = archived_session + app.update_stats() + + stats = cast(Label, app.query_one("#stats")) + stats_text = str(stats.content) + + # Should show combined sessions count (2) + assert "Sessions:[/bold] 2" in stats_text + # Should show combined messages count (5 + 3 = 8) + assert "Messages:[/bold] 8" in stats_text + # Should show combined tokens (100+200+50+100 = 450) + assert "Tokens:[/bold] 450" in stats_text + # Should indicate archived count + assert "1 archived" in stats_text + + +@pytest.mark.tui +class TestArchiveConfirmScreen: + """Tests for archive confirmation via the archive action.""" + + @pytest.mark.asyncio + async def test_archive_confirm_y_key_deletes_file(self): + """Test confirming archive with 'y' key deletes the JSONL file.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = mock_session_data + app.selected_session_id = "session-123" + + assert jsonl_file.exists() + + # Trigger archive (opens modal) + await pilot.press("a") + await pilot.pause(0.1) + + # Confirm with 'y' + await pilot.press("y") + await pilot.pause(0.1) + + assert not jsonl_file.exists() + + @pytest.mark.asyncio + async def test_archive_confirm_enter_key_deletes_file(self): + """Test confirming archive with Enter key deletes the JSONL file.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = mock_session_data + app.selected_session_id = "session-123" + + assert jsonl_file.exists() + + # Trigger archive (opens modal) + await pilot.press("a") + await pilot.pause(0.1) + + # Confirm with Enter + await pilot.press("enter") + await pilot.pause(0.1) + + assert not jsonl_file.exists() + + @pytest.mark.asyncio + async def test_archive_cancel_n_key_keeps_file(self): + """Test cancelling archive with 'n' key keeps the JSONL file.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = mock_session_data + app.selected_session_id = "session-123" + + # Trigger archive (opens modal) + await pilot.press("a") + await pilot.pause(0.1) + + # Cancel with 'n' + await pilot.press("n") + await pilot.pause(0.1) + + # File should still exist + assert jsonl_file.exists() + + @pytest.mark.asyncio + async def test_archive_cancel_escape_key_keeps_file(self): + """Test cancelling archive with Escape key keeps the JSONL file.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = mock_session_data + app.selected_session_id = "session-123" + + # Trigger archive (opens modal) + await pilot.press("a") + await pilot.pause(0.1) + + # Cancel with Escape + await pilot.press("escape") + await pilot.pause(0.1) + + # File should still exist + assert jsonl_file.exists() + + +@pytest.mark.tui +class TestDeleteConfirmScreen: + """Tests for delete confirmation with smart options.""" + + @pytest.mark.asyncio + async def test_delete_current_session_cache_only_keeps_jsonl(self): + """Test delete with 'c' (cache only) keeps JSONL file.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + with patch.object( + app.cache_manager, "delete_session", return_value=True + ) as mock_delete: + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = mock_session_data + app.selected_session_id = "session-123" + + # Trigger delete (opens modal) + await pilot.press("d") + await pilot.pause(0.1) + + # Choose cache only with 'c' + await pilot.press("c") + await pilot.pause(0.1) + + # JSONL should still exist + assert jsonl_file.exists() + mock_delete.assert_called_once_with("session-123") + + @pytest.mark.asyncio + async def test_delete_current_session_both_deletes_jsonl(self): + """Test delete with 'b' (both) deletes JSONL file.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + with patch.object( + app.cache_manager, "delete_session", return_value=True + ) as mock_delete: + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = mock_session_data + app.selected_session_id = "session-123" + + assert jsonl_file.exists() + + # Trigger delete (opens modal) + await pilot.press("d") + await pilot.pause(0.1) + + # Choose both with 'b' + await pilot.press("b") + await pilot.pause(0.1) + + # JSONL should be deleted + assert not jsonl_file.exists() + mock_delete.assert_called_once_with("session-123") + + @pytest.mark.asyncio + async def test_delete_archived_session_with_enter_key(self): + """Test deleting archived session with Enter key.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_archived_data = { + "session-archived": SessionCacheData( + session_id="session-archived", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + with patch.object( + app.cache_manager, "delete_session", return_value=True + ) as mock_delete: + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = {} + app.archived_sessions = mock_archived_data + app.selected_session_id = "session-archived" + + # Trigger delete (opens modal) + await pilot.press("d") + await pilot.pause(0.1) + + # Confirm with Enter (for archived sessions) + await pilot.press("enter") + await pilot.pause(0.1) + + mock_delete.assert_called_once_with("session-archived") + + @pytest.mark.asyncio + async def test_delete_cancel_n_key(self): + """Test cancelling delete with 'n' key.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + with patch.object( + app.cache_manager, "delete_session", return_value=True + ) as mock_delete: + async with app.run_test() as pilot: + await pilot.pause(0.2) + + app.sessions = mock_session_data + app.selected_session_id = "session-123" + + # Trigger delete (opens modal) + await pilot.press("d") + await pilot.pause(0.1) + + # Cancel with 'n' + await pilot.press("n") + await pilot.pause(0.1) + + # Should not have deleted + mock_delete.assert_not_called() + assert jsonl_file.exists() + + +@pytest.mark.tui +class TestArchiveActionEdgeCases: + """Edge case tests for the archive session action.""" + + @pytest.mark.asyncio + async def test_archive_action_no_selection(self): + """Test archive action with no session selected shows warning.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Ensure no session is selected + app.selected_session_id = None + + # Try to archive - should notify warning + await pilot.press("a") + await pilot.pause(0.1) + + # No modal should be pushed (we can't easily check notifications) + # but at least verify no crash occurred + + @pytest.mark.asyncio + async def test_archive_action_on_archived_session_shows_warning(self): + """Test archive action on already archived session shows warning.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + mock_session_data = { + "session-archived": SessionCacheData( + session_id="session-archived", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Set up archived session + app.archived_sessions = mock_session_data + app.sessions = {} + app.selected_session_id = "session-archived" + + # Try to archive - should notify warning (already archived) + await pilot.press("a") + await pilot.pause(0.1) + + +@pytest.mark.tui +class TestDeleteActionEdgeCases: + """Edge case tests for the delete session action.""" + + @pytest.mark.asyncio + async def test_delete_action_no_selection(self): + """Test delete action with no session selected shows warning.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) + jsonl_file = project_path / "session-123.jsonl" + jsonl_file.write_text('{"type":"user"}\n', encoding="utf-8") + + app = SessionBrowser(project_path) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Ensure no session is selected + app.selected_session_id = None + + # Try to delete - should notify warning + await pilot.press("d") + await pilot.pause(0.1) + + +@pytest.mark.tui +class TestRestoreWithMkdir: + """Tests for restore action creating directory if needed.""" + + @pytest.mark.asyncio + async def test_restore_creates_directory_if_missing(self): + """Test that restore creates the project directory if it was deleted.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) / "deleted_project" + # Don't create the directory - it should be created on restore + + app = SessionBrowser(project_path, is_archived=True) + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + with ( + patch.object( + app.cache_manager, + "export_session_to_jsonl", + return_value=['{"type":"user"}'], + ), + patch.object( + app.cache_manager, "get_cached_project_data" + ) as mock_cache, + patch.object( + app.cache_manager, "get_archived_sessions", return_value={} + ), + ): + mock_cache.return_value = Mock( + sessions=mock_session_data, + working_directories=[str(project_path)], + ) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Set up archived session + app.archived_sessions = mock_session_data + app.selected_session_id = "session-123" + + # Directory should not exist + assert not project_path.exists() + + # Trigger restore + app.action_restore_jsonl() + await pilot.pause(0.1) + + # Directory should now exist + assert project_path.exists() + + # JSONL file should be created + assert (project_path / "session-123.jsonl").exists() + + +@pytest.mark.tui +class TestProjectSelector: + """Tests for the ProjectSelector TUI.""" + + @pytest.mark.asyncio + async def test_enter_key_selects_project(self): + """Test that Enter key selects the highlighted project.""" + with tempfile.TemporaryDirectory() as temp_dir: + project1 = Path(temp_dir) / "project1" + project1.mkdir() + (project1 / "session-1.jsonl").write_text('{"type":"user"}\n') + + project2 = Path(temp_dir) / "project2" + project2.mkdir() + (project2 / "session-2.jsonl").write_text('{"type":"user"}\n') + + app = ProjectSelector( + projects=[project1, project2], + matching_projects=[], + archived_projects=set(), + ) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Select first project and press Enter + await pilot.press("enter") + await pilot.pause(0.1) + + @pytest.mark.asyncio + async def test_escape_key_quits(self): + """Test that Escape key quits the application.""" + with tempfile.TemporaryDirectory() as temp_dir: + project1 = Path(temp_dir) / "project1" + project1.mkdir() + + app = ProjectSelector( + projects=[project1], + matching_projects=[], + archived_projects=set(), + ) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Press Escape to quit + await pilot.press("escape") + await pilot.pause(0.1) + + @pytest.mark.asyncio + async def test_archive_project_action(self): + """Test archiving a project deletes JSONL files.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) / "project1" + project_path.mkdir() + jsonl1 = project_path / "session-1.jsonl" + jsonl2 = project_path / "session-2.jsonl" + jsonl1.write_text('{"type":"user"}\n') + jsonl2.write_text('{"type":"user"}\n') + + app = ProjectSelector( + projects=[project_path], + matching_projects=[], + archived_projects=set(), + ) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Select the project + app.selected_project_path = project_path + + # Both JSONL files should exist + assert jsonl1.exists() + assert jsonl2.exists() + + # Press 'a' to archive and then confirm + await pilot.press("a") + await pilot.pause(0.1) + await pilot.press("y") + await pilot.pause(0.1) + + # JSONL files should be deleted + assert not jsonl1.exists() + assert not jsonl2.exists() + + # Project should now be in archived set + assert project_path in app.archived_projects + + @pytest.mark.asyncio + async def test_archive_project_already_archived_shows_warning(self): + """Test archiving an already archived project shows warning.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) / "project1" + project_path.mkdir() + + app = ProjectSelector( + projects=[project_path], + matching_projects=[], + archived_projects={project_path}, # Already archived + ) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Select the archived project + app.selected_project_path = project_path + + # Try to archive - should show warning + await pilot.press("a") + await pilot.pause(0.1) + + @pytest.mark.asyncio + async def test_delete_project_cache_only(self): + """Test deleting project cache only keeps JSONL files.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) / "project1" + project_path.mkdir() + jsonl = project_path / "session-1.jsonl" + jsonl.write_text('{"type":"user"}\n') + + app = ProjectSelector( + projects=[project_path], + matching_projects=[], + archived_projects=set(), + ) + + with patch.object(CacheManager, "clear_cache"): + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Select the project + app.selected_project_path = project_path + + # Press 'd' to delete and choose cache only + await pilot.press("d") + await pilot.pause(0.1) + await pilot.press("c") # Cache only + await pilot.pause(0.1) + + # JSONL file should still exist + assert jsonl.exists() + + @pytest.mark.asyncio + async def test_delete_project_both(self): + """Test deleting project cache and JSONL files.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) / "project1" + project_path.mkdir() + jsonl = project_path / "session-1.jsonl" + jsonl.write_text('{"type":"user"}\n') + + app = ProjectSelector( + projects=[project_path], + matching_projects=[], + archived_projects=set(), + ) + + with patch.object(CacheManager, "clear_cache"): + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Select the project + app.selected_project_path = project_path + + assert jsonl.exists() + + # Press 'd' to delete and choose both + await pilot.press("d") + await pilot.pause(0.1) + await pilot.press("b") # Both + await pilot.pause(0.1) + + # JSONL file should be deleted + assert not jsonl.exists() + + @pytest.mark.asyncio + async def test_restore_project_creates_directory(self): + """Test restoring a project creates directory if missing.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) / "deleted_project" + # Don't create the directory + + mock_session_data = { + "session-123": SessionCacheData( + session_id="session-123", + first_timestamp="2025-01-01T10:00:00Z", + last_timestamp="2025-01-01T10:01:00Z", + message_count=1, + first_user_message="Test", + total_input_tokens=10, + total_output_tokens=10, + ), + } + + app = ProjectSelector( + projects=[project_path], + matching_projects=[], + archived_projects={project_path}, # Archived project + ) + + with ( + patch.object(CacheManager, "get_cached_project_data") as mock_cache, + patch.object( + CacheManager, + "export_session_to_jsonl", + return_value=['{"type":"user"}'], + ), + ): + mock_cache.return_value = Mock(sessions=mock_session_data) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Select the project + app.selected_project_path = project_path + + # Directory should not exist + assert not project_path.exists() + + # Press 'r' to restore and confirm + await pilot.press("r") + await pilot.pause(0.1) + await pilot.press("y") + await pilot.pause(0.1) + + # Directory should now exist + assert project_path.exists() + + @pytest.mark.asyncio + async def test_restore_project_not_archived_shows_warning(self): + """Test restoring a non-archived project shows warning.""" + with tempfile.TemporaryDirectory() as temp_dir: + project_path = Path(temp_dir) / "project1" + project_path.mkdir() + (project_path / "session-1.jsonl").write_text('{"type":"user"}\n') + + app = ProjectSelector( + projects=[project_path], + matching_projects=[], + archived_projects=set(), # Not archived + ) + + async with app.run_test() as pilot: + await pilot.pause(0.2) + + # Select the non-archived project + app.selected_project_path = project_path + + # Try to restore - should show warning + await pilot.press("r") + await pilot.pause(0.1) From 3101323add7b1a06da87e6f247562f44266c7204 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sat, 10 Jan 2026 02:01:57 +0000 Subject: [PATCH 07/23] PR feedback + zlib compressed message.content --- claude_code_log/cache.py | 51 +++++---- claude_code_log/cli.py | 2 +- claude_code_log/converter.py | 103 +++++++++++++++++- .../migrations/001_initial_schema.sql | 4 +- claude_code_log/tui.py | 16 +-- test/test_cache.py | 75 ++++++++++++- test/test_cache_integration.py | 60 +++++++++- test/test_integration_realistic.py | 4 +- test/test_pagination.py | 49 +++++++++ 9 files changed, 321 insertions(+), 43 deletions(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 8e37d477..842c9165 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -3,8 +3,9 @@ import json import sqlite3 +import zlib from contextlib import contextmanager -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, Generator, List, Optional @@ -178,8 +179,8 @@ def __init__(self, project_path: Path, library_version: str): self.project_path = project_path self.library_version = library_version - # Database at parent level (projects_dir/cache.db) - self.db_path = project_path.parent / "cache.db" + # Database at parent level (projects_dir/claude-code-log-cache.db) + self.db_path = project_path.parent / "claude-code-log-cache.db" # Initialise database and ensure project exists self._init_database() @@ -287,7 +288,9 @@ def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, An "_leaf_uuid": None, "_level": None, "_operation": None, - "content": json.dumps(entry.model_dump()), + "content": zlib.compress( + json.dumps(entry.model_dump(), separators=(",", ":")).encode("utf-8") + ), } # Extract flattened usage for assistant messages @@ -321,7 +324,7 @@ def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, An def _deserialize_entry(self, row: sqlite3.Row) -> TranscriptEntry: """Convert SQLite row back to TranscriptEntry.""" - content_dict = json.loads(row["content"]) + content_dict = json.loads(zlib.decompress(row["content"]).decode("utf-8")) return create_transcript_entry(content_dict) def _get_file_id(self, jsonl_path: Path) -> Optional[int]: @@ -422,13 +425,22 @@ def load_cached_entries_filtered( params: List[Any] = [file_id] if from_dt: + # Normalize to UTC 'Z' format for consistent string comparison + # with stored timestamps (which use 'Z' suffix from JSONL) + if from_dt.tzinfo is None: + from_dt = from_dt.replace(tzinfo=timezone.utc) + from_bound = from_dt.strftime("%Y-%m-%dT%H:%M:%SZ") # Include entries with NULL timestamp (like summaries) OR within date range sql += " AND (timestamp IS NULL OR timestamp >= ?)" - params.append(from_dt.isoformat()) + params.append(from_bound) if to_dt: + # Normalize to UTC 'Z' format for consistent string comparison + if to_dt.tzinfo is None: + to_dt = to_dt.replace(tzinfo=timezone.utc) + to_bound = to_dt.strftime("%Y-%m-%dT%H:%M:%SZ") sql += " AND (timestamp IS NULL OR timestamp <= ?)" - params.append(to_dt.isoformat()) + params.append(to_bound) sql += " ORDER BY timestamp NULLS LAST" @@ -997,18 +1009,8 @@ def export_session_to_jsonl(self, session_id: str) -> List[str]: (self._project_id, session_id), ).fetchall() - # Re-serialize to compact JSON format (no spaces after separators) - # to match original JSONL file format - result: List[str] = [] - for row in rows: - try: - parsed = json.loads(row["content"]) - compact = json.dumps(parsed, separators=(",", ":")) - result.append(compact) - except json.JSONDecodeError: - # If parsing fails, use original content - result.append(row["content"]) - return result + # Content is stored as compressed, compact JSON - just decompress + return [zlib.decompress(row["content"]).decode("utf-8") for row in rows] def load_session_entries(self, session_id: str) -> List[TranscriptEntry]: """Load transcript entries for a session from cache. @@ -1357,6 +1359,13 @@ def delete_session(self, session_id: str) -> bool: (self._project_id, session_id), ) + # Delete cached_files entry for this session's JSONL file + # File name pattern is {session_id}.jsonl + conn.execute( + "DELETE FROM cached_files WHERE project_id = ? AND file_name = ?", + (self._project_id, f"{session_id}.jsonl"), + ) + # Delete the session record conn.execute( "DELETE FROM sessions WHERE project_id = ? AND session_id = ?", @@ -1389,7 +1398,7 @@ def delete_project(self) -> bool: def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]: """Get all projects from cache, indicating which are archived. - This is a standalone function that queries the cache.db directly + This is a standalone function that queries the cache database directly to find all project paths, without needing to instantiate CacheManager for each project. @@ -1400,7 +1409,7 @@ def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]: List of (project_path, is_archived) tuples. is_archived is True if the project has no JSONL files but exists in cache. """ - db_path = projects_dir / "cache.db" + db_path = projects_dir / "claude-code-log-cache.db" if not db_path.exists(): return [] diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 19b4bb9d..0be197a3 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -303,7 +303,7 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None: click.echo("Clearing caches for all projects...") # Delete the shared SQLite cache database - cache_db = input_path / "cache.db" + cache_db = input_path / "claude-code-log-cache.db" if cache_db.exists(): try: cache_db.unlink() diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 6b5606f5..e41a839b 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -576,6 +576,103 @@ def _assign_sessions_to_pages( return pages +def _build_session_data_from_messages( + messages: List[TranscriptEntry], +) -> Dict[str, SessionCacheData]: + """Build session data from messages when cache is unavailable. + + This is a fallback for pagination when get_cached_project_data() returns None. + + Args: + messages: All messages (deduplicated) + + Returns: + Dict mapping session_id to SessionCacheData + """ + from .parser import extract_text_content + + # Pre-compute warmup session IDs to filter them out + warmup_session_ids = get_warmup_session_ids(messages) + + # Group messages by session + sessions: Dict[str, Dict[str, Any]] = {} + for message in messages: + if not hasattr(message, "sessionId") or isinstance( + message, SummaryTranscriptEntry + ): + continue + + session_id = getattr(message, "sessionId", "") + if not session_id or session_id in warmup_session_ids: + continue + + if session_id not in sessions: + sessions[session_id] = { + "first_timestamp": getattr(message, "timestamp", ""), + "last_timestamp": getattr(message, "timestamp", ""), + "message_count": 0, + "first_user_message": "", + "total_input_tokens": 0, + "total_output_tokens": 0, + "total_cache_creation_tokens": 0, + "total_cache_read_tokens": 0, + } + + sessions[session_id]["message_count"] += 1 + current_timestamp = getattr(message, "timestamp", "") + if current_timestamp: + sessions[session_id]["last_timestamp"] = current_timestamp + + # Get first user message for preview + if ( + isinstance(message, UserTranscriptEntry) + and not sessions[session_id]["first_user_message"] + and hasattr(message, "message") + ): + first_user_content = extract_text_content(message.message.content) + if should_use_as_session_starter(first_user_content): + sessions[session_id]["first_user_message"] = create_session_preview( + first_user_content + ) + + # Extract token usage from assistant messages + if isinstance(message, AssistantTranscriptEntry) and hasattr( + message, "message" + ): + msg_data = message.message + if hasattr(msg_data, "usage") and msg_data.usage: + usage = msg_data.usage + sessions[session_id]["total_input_tokens"] += ( + getattr(usage, "input_tokens", 0) or 0 + ) + sessions[session_id]["total_output_tokens"] += ( + getattr(usage, "output_tokens", 0) or 0 + ) + sessions[session_id]["total_cache_creation_tokens"] += ( + getattr(usage, "cache_creation_input_tokens", 0) or 0 + ) + sessions[session_id]["total_cache_read_tokens"] += ( + getattr(usage, "cache_read_input_tokens", 0) or 0 + ) + + # Convert to Dict[str, SessionCacheData] + result: Dict[str, SessionCacheData] = {} + for session_id, data in sessions.items(): + result[session_id] = SessionCacheData( + session_id=session_id, + first_timestamp=data["first_timestamp"], + last_timestamp=data["last_timestamp"], + message_count=data["message_count"], + first_user_message=data["first_user_message"], + total_input_tokens=data["total_input_tokens"], + total_output_tokens=data["total_output_tokens"], + total_cache_creation_tokens=data["total_cache_creation_tokens"], + total_cache_read_tokens=data["total_cache_read_tokens"], + ) + + return result + + def _generate_paginated_html( messages: List[TranscriptEntry], output_dir: Path, @@ -928,7 +1025,11 @@ def convert_jsonl_to( if use_pagination: # Use paginated HTML generation assert cache_manager is not None # Ensured by use_pagination condition - session_data = cached_data.sessions if cached_data else {} + # Use cached session data if available, otherwise build from messages + if cached_data is not None: + session_data = cached_data.sessions + else: + session_data = _build_session_data_from_messages(messages) output_path = _generate_paginated_html( messages, input_path, diff --git a/claude_code_log/migrations/001_initial_schema.sql b/claude_code_log/migrations/001_initial_schema.sql index f7c5946e..b90a6d6c 100644 --- a/claude_code_log/migrations/001_initial_schema.sql +++ b/claude_code_log/migrations/001_initial_schema.sql @@ -99,8 +99,8 @@ CREATE TABLE IF NOT EXISTS messages ( -- QueueOperationTranscriptEntry _operation TEXT, - -- Message content as JSON - content JSON NOT NULL, + -- Message content as compressed JSON (zlib) + content BLOB NOT NULL, FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE, FOREIGN KEY (file_id) REFERENCES cached_files(id) ON DELETE CASCADE diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 63c1d5f3..e1495e17 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -1036,8 +1036,9 @@ def load_sessions(self) -> None: try: self.populate_table() self.update_stats() - except Exception: - pass + except Exception as e: + # UI components may not be mounted yet during initialization + self.log.debug(f"Skipped UI update for archived project: {e}") return # Check if we need to rebuild cache by checking for modified files @@ -1069,8 +1070,9 @@ def load_sessions(self) -> None: else: self.sessions = {} - except Exception: - # Don't show notification during startup - just return + except Exception as e: + # Don't show notification during startup - log and return + self.log.debug(f"Cache building failed during startup: {e}") return # Only compute archived sessions if there are JSONL files to compare against @@ -1095,9 +1097,9 @@ def load_sessions(self) -> None: try: self.populate_table() self.update_stats() - except Exception: - # Not in app context, skip UI updates - pass + except Exception as e: + # UI components may not be mounted yet during initialization + self.log.debug(f"Skipped UI update after session load: {e}") def populate_table(self) -> None: """Populate the sessions table with session data.""" diff --git a/test/test_cache.py b/test/test_cache.py index 8bb4302d..2779f283 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -27,7 +27,7 @@ def temp_project_dir(): """Create a temporary project directory for testing.""" with tempfile.TemporaryDirectory() as temp_dir: - # Create project subdirectory so db_path (parent/cache.db) is unique per test + # Create project subdirectory so db_path (parent/claude-code-log-cache.db) is unique per test project_dir = Path(temp_dir) / "project" project_dir.mkdir() yield project_dir @@ -102,13 +102,16 @@ def test_initialization(self, temp_project_dir, mock_version): assert cache_manager.project_path == temp_project_dir assert cache_manager.library_version == mock_version # SQLite database should be created at parent level - assert cache_manager.db_path == temp_project_dir.parent / "cache.db" + assert ( + cache_manager.db_path + == temp_project_dir.parent / "claude-code-log-cache.db" + ) assert cache_manager.db_path.exists() def test_database_path(self, cache_manager, temp_project_dir): """Test that SQLite database is created at the correct location.""" - # Database should be at parent level (projects_dir/cache.db) - expected_db = temp_project_dir.parent / "cache.db" + # Database should be at parent level (projects_dir/claude-code-log-cache.db) + expected_db = temp_project_dir.parent / "claude-code-log-cache.db" assert cache_manager.db_path == expected_db assert expected_db.exists() @@ -260,6 +263,70 @@ def test_filtered_loading_with_dates(self, cache_manager, temp_project_dir): assert len(user_messages) == 1 assert "Early message" in str(user_messages[0].message.content) + def test_filtered_loading_with_z_suffix_boundary( + self, cache_manager, temp_project_dir + ): + """Test that timestamps with 'Z' suffix are correctly compared at day boundaries. + + This tests the edge case where a message at 23:59:59Z should be included + when filtering with to_date set to that day. Previously, the query used + isoformat() which produced '.999999' microseconds, and 'Z' > '.' in string + comparison caused incorrect exclusion. + """ + entries = [ + UserTranscriptEntry( + parentUuid=None, + isSidechain=False, + userType="user", + cwd="/test", + sessionId="session1", + version="1.0.0", + uuid="user1", + timestamp="2023-01-01T23:59:59Z", # End of day with Z suffix + type="user", + message=UserMessageModel( + role="user", + content=[TextContent(type="text", text="End of day message")], + ), + ), + UserTranscriptEntry( + parentUuid=None, + isSidechain=False, + userType="user", + cwd="/test", + sessionId="session1", + version="1.0.0", + uuid="user2", + timestamp="2023-01-02T00:00:01Z", # Start of next day + type="user", + message=UserMessageModel( + role="user", + content=[TextContent(type="text", text="Next day message")], + ), + ), + ] + + jsonl_path = temp_project_dir / "test.jsonl" + jsonl_path.write_text("dummy content", encoding="utf-8") + + cache_manager.save_cached_entries(jsonl_path, entries) + + # Filter to only 2023-01-01 - should include the 23:59:59Z message + filtered = cache_manager.load_cached_entries_filtered( + jsonl_path, "2023-01-01", "2023-01-01" + ) + + assert filtered is not None + user_messages = [entry for entry in filtered if entry.type == "user"] + + # Should include only the end-of-day message, not the next day message + assert len(user_messages) == 1, ( + f"Expected 1 message from 2023-01-01, got {len(user_messages)}. " + "The 23:59:59Z message may have been incorrectly excluded due to " + "timestamp format mismatch (Z vs .999999 suffix)." + ) + assert "End of day message" in str(user_messages[0].message.content) + def test_clear_cache(self, cache_manager, temp_project_dir, sample_entries): """Test cache clearing functionality.""" jsonl_path = temp_project_dir / "test.jsonl" diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py index 6af5b721..68d9639f 100644 --- a/test/test_cache_integration.py +++ b/test/test_cache_integration.py @@ -96,7 +96,7 @@ def test_cli_no_cache_flag(self, setup_test_project): assert result1.exit_code == 0 # Check if SQLite cache was created at parent level - cache_db = project_dir.parent / "cache.db" + cache_db = project_dir.parent / "claude-code-log-cache.db" assert cache_db.exists() # Clear the cache @@ -161,7 +161,7 @@ def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data): assert result.exit_code == 0 # Verify SQLite cache database created at projects level - cache_db = temp_projects_dir / "cache.db" + cache_db = temp_projects_dir / "claude-code-log-cache.db" assert cache_db.exists() # Verify cache data exists for each project @@ -202,7 +202,7 @@ def test_convert_jsonl_to_html_with_cache(self, setup_test_project): assert output1.exists() # Verify SQLite cache was created - cache_db = project_dir.parent / "cache.db" + cache_db = project_dir.parent / "claude-code-log-cache.db" assert cache_db.exists() # Verify cache has data @@ -253,7 +253,7 @@ def test_process_projects_hierarchy_with_cache( assert output1.exists() # Verify SQLite cache database was created - cache_db = temp_projects_dir / "cache.db" + cache_db = temp_projects_dir / "claude-code-log-cache.db" assert cache_db.exists() # Verify cache data exists for each project @@ -759,6 +759,56 @@ def test_delete_session_from_cache(self, temp_projects_dir, sample_jsonl_data): exported = cache_manager.export_session_to_jsonl("session-1") assert exported == [] + def test_delete_session_invalidates_file_cache( + self, temp_projects_dir, sample_jsonl_data + ): + """Test that delete_session also removes cached_files entry. + + Previously, delete_session only removed from messages, html_cache, and + sessions tables but left cached_files intact. This caused is_file_cached() + to return True even though the session data was gone, leading to + load_cached_entries() returning an empty list instead of None. + """ + project_dir = temp_projects_dir / "delete-file-cache-test" + project_dir.mkdir() + + # Create JSONL file with session ID matching file name + session_id = "session-1" + jsonl_file = project_dir / f"{session_id}.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + # Verify file is cached before deletion + cache_manager = CacheManager(project_dir, "1.0.0") + assert cache_manager.is_file_cached(jsonl_file), ( + "File should be cached before deletion" + ) + entries_before = cache_manager.load_cached_entries(jsonl_file) + assert entries_before is not None and len(entries_before) > 0, ( + "Should load cached entries before deletion" + ) + + # Delete the session + result = cache_manager.delete_session(session_id) + assert result is True + + # Verify cached_files entry is also removed + assert not cache_manager.is_file_cached(jsonl_file), ( + "is_file_cached() should return False after delete_session() " + "because the cached_files entry should be removed" + ) + + # load_cached_entries should return None (not empty list) for uncached file + entries_after = cache_manager.load_cached_entries(jsonl_file) + assert entries_after is None, ( + "load_cached_entries() should return None after delete_session() " + "because the file is no longer considered cached" + ) + def test_delete_nonexistent_session(self, temp_projects_dir): """Test deleting a session that doesn't exist returns False.""" project_dir = temp_projects_dir / "delete-nonexistent" @@ -850,7 +900,7 @@ def test_get_all_cached_projects_empty_dir(self, temp_projects_dir): """Test get_all_cached_projects with no cache.""" from claude_code_log.cache import get_all_cached_projects - # No cache.db exists + # No claude-code-log-cache.db exists projects = get_all_cached_projects(temp_projects_dir) assert projects == [] diff --git a/test/test_integration_realistic.py b/test/test_integration_realistic.py index 1c051290..92580cae 100644 --- a/test/test_integration_realistic.py +++ b/test/test_integration_realistic.py @@ -246,7 +246,7 @@ def test_clear_cache_with_projects_dir(self, temp_projects_copy: Path) -> None: assert result.exit_code == 0 # Verify SQLite cache was created - cache_db = temp_projects_copy / "cache.db" + cache_db = temp_projects_copy / "claude-code-log-cache.db" assert cache_db.exists(), "SQLite cache should exist after processing" # Clear caches @@ -419,7 +419,7 @@ def test_cache_creation_all_projects(self, temp_projects_copy: Path) -> None: process_projects_hierarchy(temp_projects_copy) # Verify SQLite cache database was created - cache_db = temp_projects_copy / "cache.db" + cache_db = temp_projects_copy / "claude-code-log-cache.db" assert cache_db.exists(), "SQLite cache database should exist" for project_dir in temp_projects_copy.iterdir(): diff --git a/test/test_pagination.py b/test/test_pagination.py index fc9b0fd6..63285f60 100644 --- a/test/test_pagination.py +++ b/test/test_pagination.py @@ -685,3 +685,52 @@ def test_multi_page_last_has_hidden_next_link(self, temp_project_dir): ) assert "PAGINATION_NEXT_LINK_START" in page2 assert "last-page" in page2 + + +class TestPaginationFallbackWithoutCache: + """Tests for pagination when cache data is unavailable.""" + + def test_pagination_renders_messages_when_cache_unavailable(self, temp_project_dir): + """Pagination should render messages even when get_cached_project_data returns None. + + This tests the fallback path where cached_data is None but pagination is triggered + because total_message_count exceeds page_size. + """ + from unittest.mock import patch + from claude_code_log.converter import convert_jsonl_to_html + from claude_code_log.cache import CacheManager + + # Create sessions with messages + for i, session_id in enumerate(["s1", "s2"]): + jsonl_file = temp_project_dir / f"{session_id}.jsonl" + messages = _create_session_messages(session_id, 20, f"2023-01-0{i + 1}") + with open(jsonl_file, "w", encoding="utf-8") as f: + for msg in messages: + f.write(json.dumps(msg) + "\n") + + # First pass: Build cache but then simulate cache unavailable + convert_jsonl_to_html(temp_project_dir, page_size=5000, silent=True) + + # Delete combined file to force regeneration + combined_path = temp_project_dir / "combined_transcripts.html" + if combined_path.exists(): + combined_path.unlink() + + # Patch get_cached_project_data to return None (simulating cache unavailable) + # but keep total_message_count high enough to trigger pagination + def mock_get_cached_project_data(self): + return None + + with patch.object( + CacheManager, "get_cached_project_data", mock_get_cached_project_data + ): + # Force pagination with small page_size + convert_jsonl_to_html(temp_project_dir, page_size=15, silent=True) + + # Verify the generated HTML contains actual messages, not empty content + page1_content = combined_path.read_text(encoding="utf-8") + + # The page should contain message content from the sessions + assert "Message 0 from user" in page1_content or "Response" in page1_content, ( + "Paginated HTML should contain messages when cache is unavailable" + ) From a9f337e894325c1e4f780398f3f37702be37a705 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 12 Jan 2026 00:59:49 +0000 Subject: [PATCH 08/23] Make cache db path parametrised --- claude_code_log/cache.py | 56 +++++++++++++++++++++---- test/test_cache.py | 88 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 7 deletions(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 842c9165..220cc90c 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -2,6 +2,7 @@ """SQLite-based cache management for Claude Code Log.""" import json +import os import sqlite3 import zlib from contextlib import contextmanager @@ -163,24 +164,54 @@ def get_library_version() -> str: return "unknown" +# ========== Cache Path Configuration ========== + + +def get_cache_db_path(projects_dir: Path) -> Path: + """Get cache database path, respecting CLAUDE_CODE_LOG_CACHE_PATH env var. + + Priority: CLAUDE_CODE_LOG_CACHE_PATH env var > default location. + + Args: + projects_dir: Path to the projects directory (e.g., ~/.claude/projects) + + Returns: + Path to the SQLite cache database. + """ + env_path = os.getenv("CLAUDE_CODE_LOG_CACHE_PATH") + if env_path: + return Path(env_path) + return projects_dir / "claude-code-log-cache.db" + + # ========== Cache Manager ========== class CacheManager: """SQLite-based cache manager for Claude Code Log.""" - def __init__(self, project_path: Path, library_version: str): + def __init__( + self, + project_path: Path, + library_version: str, + db_path: Optional[Path] = None, + ): """Initialise cache manager for a project. Args: project_path: Path to the project directory containing JSONL files library_version: Current version of the library for cache invalidation + db_path: Optional explicit path to the cache database. If not provided, + uses CLAUDE_CODE_LOG_CACHE_PATH env var or default location. """ self.project_path = project_path self.library_version = library_version - # Database at parent level (projects_dir/claude-code-log-cache.db) - self.db_path = project_path.parent / "claude-code-log-cache.db" + # Priority: explicit db_path > env var > default location + if db_path: + self.db_path = db_path + else: + self.db_path = get_cache_db_path(project_path.parent) # Initialise database and ensure project exists self._init_database() @@ -1395,7 +1426,10 @@ def delete_project(self) -> bool: return True -def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]: +def get_all_cached_projects( + projects_dir: Path, + db_path: Optional[Path] = None, +) -> List[tuple[str, bool]]: """Get all projects from cache, indicating which are archived. This is a standalone function that queries the cache database directly @@ -1404,19 +1438,26 @@ def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]: Args: projects_dir: Path to the projects directory (e.g., ~/.claude/projects) + db_path: Optional explicit path to the cache database. If not provided, + uses CLAUDE_CODE_LOG_CACHE_PATH env var or default location. Returns: List of (project_path, is_archived) tuples. is_archived is True if the project has no JSONL files but exists in cache. """ - db_path = projects_dir / "claude-code-log-cache.db" - if not db_path.exists(): + # Priority: explicit db_path > env var > default location + if db_path: + actual_db_path = db_path + else: + actual_db_path = get_cache_db_path(projects_dir) + + if not actual_db_path.exists(): return [] result: List[tuple[str, bool]] = [] try: - conn = sqlite3.connect(db_path, timeout=30.0) + conn = sqlite3.connect(actual_db_path, timeout=30.0) conn.row_factory = sqlite3.Row try: rows = conn.execute( @@ -1450,5 +1491,6 @@ def get_all_cached_projects(projects_dir: Path) -> List[tuple[str, bool]]: "ProjectCache", "SessionCacheData", "get_all_cached_projects", + "get_cache_db_path", "get_library_version", ] diff --git a/test/test_cache.py b/test/test_cache.py index 2779f283..e2832ccd 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -702,3 +702,91 @@ def test_cache_directory_permissions(self, temp_project_dir, mock_version): cache_dir.chmod(0o755) except OSError: pass + + +class TestCachePathEnvVar: + """Test CLAUDE_CODE_LOG_CACHE_PATH environment variable.""" + + def test_default_path_without_env_var(self, tmp_path): + """Test default cache path when env var is not set.""" + project_dir = tmp_path / "project" + project_dir.mkdir() + + cache = CacheManager(project_dir, "1.0.0") + + # Default should be parent/claude-code-log-cache.db + expected_path = tmp_path / "claude-code-log-cache.db" + assert cache.db_path == expected_path + assert expected_path.exists() + + def test_env_var_overrides_default(self, tmp_path, monkeypatch): + """Test that CLAUDE_CODE_LOG_CACHE_PATH overrides default location.""" + custom_db = tmp_path / "custom-cache.db" + monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(custom_db)) + + project_dir = tmp_path / "project" + project_dir.mkdir() + + cache = CacheManager(project_dir, "1.0.0") + assert cache.db_path == custom_db + assert custom_db.exists() + + def test_explicit_db_path_overrides_env_var(self, tmp_path, monkeypatch): + """Test that explicit db_path takes precedence over env var.""" + env_db = tmp_path / "env-cache.db" + explicit_db = tmp_path / "explicit-cache.db" + monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(env_db)) + + project_dir = tmp_path / "project" + project_dir.mkdir() + + cache = CacheManager(project_dir, "1.0.0", db_path=explicit_db) + assert cache.db_path == explicit_db + assert explicit_db.exists() + assert not env_db.exists() + + def test_get_all_cached_projects_respects_env_var(self, tmp_path, monkeypatch): + """Test that get_all_cached_projects uses env var.""" + from claude_code_log.cache import get_all_cached_projects + + custom_db = tmp_path / "custom-cache.db" + monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(custom_db)) + + projects_dir = tmp_path / "projects" + projects_dir.mkdir() + + # Create a project and cache it + project_dir = projects_dir / "test-project" + project_dir.mkdir() + cache = CacheManager(project_dir, "1.0.0") # Uses env var + assert cache.db_path == custom_db + + # get_all_cached_projects should also use the env var + projects = get_all_cached_projects(projects_dir) + assert len(projects) == 1 + assert projects[0][0] == str(project_dir) + + def test_get_all_cached_projects_explicit_db_path(self, tmp_path, monkeypatch): + """Test that get_all_cached_projects explicit db_path overrides env var.""" + from claude_code_log.cache import get_all_cached_projects + + env_db = tmp_path / "env-cache.db" + explicit_db = tmp_path / "explicit-cache.db" + monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(env_db)) + + projects_dir = tmp_path / "projects" + projects_dir.mkdir() + project_dir = projects_dir / "test-project" + project_dir.mkdir() + + # Create cache using explicit path + cache = CacheManager(project_dir, "1.0.0", db_path=explicit_db) + assert cache.db_path == explicit_db + + # get_all_cached_projects with explicit path should find it + projects = get_all_cached_projects(projects_dir, db_path=explicit_db) + assert len(projects) == 1 + + # get_all_cached_projects without explicit path uses env var (empty db) + projects_env = get_all_cached_projects(projects_dir) + assert len(projects_env) == 0 # env_db doesn't have any projects From 479e71b26cfd8233016a8038fb5e0d527a0f04a8 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 12 Jan 2026 12:11:32 +0000 Subject: [PATCH 09/23] Improve test coverage and db isolation --- claude_code_log/cli.py | 11 +- test/conftest.py | 41 +++ test/test_cache_integration.py | 145 +++++++-- test/test_cache_sqlite_integrity.py | 149 ++++++--- test/test_cli.py | 469 ++++++++++++++++++++++++++++ test/test_migrations.py | 367 ++++++++++++++++++++++ test/test_renderer_timings.py | 291 +++++++++++++++++ 7 files changed, 1386 insertions(+), 87 deletions(-) create mode 100644 test/test_cli.py create mode 100644 test/test_migrations.py create mode 100644 test/test_renderer_timings.py diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index 0be197a3..2c6e3afc 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -17,7 +17,12 @@ get_file_extension, process_projects_hierarchy, ) -from .cache import CacheManager, get_all_cached_projects, get_library_version +from .cache import ( + CacheManager, + get_all_cached_projects, + get_cache_db_path, + get_library_version, +) def get_default_projects_dir() -> Path: @@ -302,8 +307,8 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None: # Clear cache for all project directories click.echo("Clearing caches for all projects...") - # Delete the shared SQLite cache database - cache_db = input_path / "claude-code-log-cache.db" + # Delete the SQLite cache database (respects CLAUDE_CODE_LOG_CACHE_PATH env var) + cache_db = get_cache_db_path(input_path) if cache_db.exists(): try: cache_db.unlink() diff --git a/test/conftest.py b/test/conftest.py index e158654f..7df5e9ba 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,15 +1,56 @@ """Pytest configuration and shared fixtures.""" from pathlib import Path +from typing import TYPE_CHECKING, Generator import pytest +if TYPE_CHECKING: + from claude_code_log.cache import CacheManager + from test.snapshot_serializers import ( NormalisedHTMLSerializer, NormalisedMarkdownSerializer, ) +# ========== Cache Test Fixtures ========== +# These fixtures use explicit db_path for true test isolation, +# enabling parallel test execution without database conflicts. + + +@pytest.fixture +def isolated_cache_dir(tmp_path: Path) -> Path: + """Create an isolated project directory with explicit db_path. + + This fixture ensures each test gets its own SQLite database, + enabling full parallel execution with pytest-xdist. + """ + project_dir = tmp_path / "project" + project_dir.mkdir() + return project_dir + + +@pytest.fixture +def isolated_db_path(tmp_path: Path) -> Path: + """Return an isolated database path for cache tests.""" + return tmp_path / "test-cache.db" + + +@pytest.fixture +def isolated_cache_manager( + isolated_cache_dir: Path, isolated_db_path: Path +) -> Generator["CacheManager", None, None]: + """Create a CacheManager with explicit db_path for test isolation. + + This fixture is preferred over the older temp_project_dir pattern + as it guarantees database isolation for parallel test execution. + """ + from claude_code_log.cache import CacheManager + + yield CacheManager(isolated_cache_dir, "1.0.0-test", db_path=isolated_db_path) + + @pytest.fixture def test_data_dir() -> Path: """Return path to test data directory.""" diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py index 68d9639f..ddccd39e 100644 --- a/test/test_cache_integration.py +++ b/test/test_cache_integration.py @@ -2,8 +2,8 @@ """Integration tests for cache functionality with CLI and converter.""" import json -import tempfile from pathlib import Path +from typing import Generator from unittest.mock import patch import pytest @@ -14,13 +14,39 @@ from claude_code_log.cache import CacheManager +class ProjectSetup: + """Container for test project setup data.""" + + def __init__(self, projects_dir: Path, db_path: Path): + self.projects_dir = projects_dir + self.db_path = db_path + + @pytest.fixture -def temp_projects_dir(): - """Create a temporary projects directory structure.""" - with tempfile.TemporaryDirectory() as temp_dir: - projects_dir = Path(temp_dir) / "projects" - projects_dir.mkdir() - yield projects_dir +def temp_projects_setup( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> Generator[ProjectSetup, None, None]: + """Create a temporary projects directory structure with isolated cache. + + Uses CLAUDE_CODE_LOG_CACHE_PATH env var for cache isolation, + enabling parallel test execution with pytest-xdist. + + Returns ProjectSetup with both projects_dir and db_path. + """ + projects_dir = tmp_path / "projects" + projects_dir.mkdir() + + # Set env var to isolate cache for this test + isolated_db = tmp_path / "test-cache.db" + monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(isolated_db)) + + yield ProjectSetup(projects_dir, isolated_db) + + +@pytest.fixture +def temp_projects_dir(temp_projects_setup: ProjectSetup) -> Path: + """Backward-compatible fixture returning just the projects dir.""" + return temp_projects_setup.projects_dir @pytest.fixture @@ -67,10 +93,57 @@ def sample_jsonl_data(): ] +class ProjectWithCache: + """Container for test project with cache info. + + Implements Path-like interface for backward compatibility with tests + that pass this directly to functions expecting Path objects. + """ + + def __init__(self, project_dir: Path, db_path: Path): + self.project_dir = project_dir + self.db_path = db_path + + # Path-like interface for backward compatibility + def __fspath__(self) -> str: + return str(self.project_dir) + + def __str__(self) -> str: + return str(self.project_dir) + + def __truediv__(self, other: str) -> Path: + return self.project_dir / other + + @property + def parent(self) -> Path: + return self.project_dir.parent + + def exists(self) -> bool: + return self.project_dir.exists() + + def is_dir(self) -> bool: + return self.project_dir.is_dir() + + def is_file(self) -> bool: + return self.project_dir.is_file() + + def glob(self, pattern: str): + return self.project_dir.glob(pattern) + + def iterdir(self): + return self.project_dir.iterdir() + + @property + def name(self) -> str: + return self.project_dir.name + + @pytest.fixture -def setup_test_project(temp_projects_dir, sample_jsonl_data): +def setup_test_project( + temp_projects_setup: ProjectSetup, sample_jsonl_data +) -> ProjectWithCache: """Set up a test project with JSONL files.""" - project_dir = temp_projects_dir / "test-project" + project_dir = temp_projects_setup.projects_dir / "test-project" project_dir.mkdir() # Create JSONL file @@ -79,15 +152,16 @@ def setup_test_project(temp_projects_dir, sample_jsonl_data): for entry in sample_jsonl_data: f.write(json.dumps(entry) + "\n") - return project_dir + return ProjectWithCache(project_dir, temp_projects_setup.db_path) class TestCacheIntegrationCLI: """Test cache integration with CLI commands.""" - def test_cli_no_cache_flag(self, setup_test_project): + def test_cli_no_cache_flag(self, setup_test_project: ProjectWithCache): """Test --no-cache flag disables caching.""" - project_dir = setup_test_project + project_dir = setup_test_project.project_dir + db_path = setup_test_project.db_path runner = CliRunner() @@ -95,9 +169,8 @@ def test_cli_no_cache_flag(self, setup_test_project): result1 = runner.invoke(main, [str(project_dir)]) assert result1.exit_code == 0 - # Check if SQLite cache was created at parent level - cache_db = project_dir.parent / "claude-code-log-cache.db" - assert cache_db.exists() + # Check if SQLite cache was created at the isolated location + assert db_path.exists() # Clear the cache runner.invoke(main, [str(project_dir), "--clear-cache"]) @@ -107,7 +180,7 @@ def test_cli_no_cache_flag(self, setup_test_project): assert result2.exit_code == 0 # Cache should be empty (project should not be populated) - cache_manager = CacheManager(project_dir, "1.0.0") + cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path) cached_data = cache_manager.get_cached_project_data() assert cached_data is not None assert cached_data.total_message_count == 0 @@ -138,8 +211,13 @@ def test_cli_clear_cache_flag(self, setup_test_project): assert cached_data is not None assert len(cached_data.cached_files) == 0 - def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data): + def test_cli_all_projects_caching( + self, temp_projects_setup: ProjectSetup, sample_jsonl_data + ): """Test caching with --all-projects flag.""" + temp_projects_dir = temp_projects_setup.projects_dir + db_path = temp_projects_setup.db_path + # Create multiple projects for i in range(3): project_dir = temp_projects_dir / f"project-{i}" @@ -160,14 +238,13 @@ def test_cli_all_projects_caching(self, temp_projects_dir, sample_jsonl_data): result = runner.invoke(main, [str(temp_projects_dir), "--all-projects"]) assert result.exit_code == 0 - # Verify SQLite cache database created at projects level - cache_db = temp_projects_dir / "claude-code-log-cache.db" - assert cache_db.exists() + # Verify SQLite cache database created at isolated location + assert db_path.exists() # Verify cache data exists for each project for i in range(3): project_dir = temp_projects_dir / f"project-{i}" - cache_manager = CacheManager(project_dir, "1.0.0") + cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path) cached_data = cache_manager.get_cached_project_data() assert cached_data is not None assert len(cached_data.cached_files) >= 1 @@ -193,20 +270,22 @@ def test_cli_date_filtering_with_cache(self, setup_test_project): class TestCacheIntegrationConverter: """Test cache integration with converter functions.""" - def test_convert_jsonl_to_html_with_cache(self, setup_test_project): + def test_convert_jsonl_to_html_with_cache( + self, setup_test_project: ProjectWithCache + ): """Test converter uses cache when available.""" - project_dir = setup_test_project + project_dir = setup_test_project.project_dir + db_path = setup_test_project.db_path # First conversion (populate cache) output1 = convert_jsonl_to_html(input_path=project_dir, use_cache=True) assert output1.exists() - # Verify SQLite cache was created - cache_db = project_dir.parent / "claude-code-log-cache.db" - assert cache_db.exists() + # Verify SQLite cache was created at isolated location + assert db_path.exists() # Verify cache has data - cache_manager = CacheManager(project_dir, "1.0.0") + cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path) cached_data = cache_manager.get_cached_project_data() assert cached_data is not None assert len(cached_data.cached_files) >= 1 @@ -230,9 +309,12 @@ def test_convert_jsonl_to_html_no_cache(self, setup_test_project): assert cached_data.total_message_count == 0 def test_process_projects_hierarchy_with_cache( - self, temp_projects_dir, sample_jsonl_data + self, temp_projects_setup: ProjectSetup, sample_jsonl_data ): """Test project hierarchy processing uses cache effectively.""" + temp_projects_dir = temp_projects_setup.projects_dir + db_path = temp_projects_setup.db_path + # Create multiple projects for i in range(2): project_dir = temp_projects_dir / f"project-{i}" @@ -252,14 +334,13 @@ def test_process_projects_hierarchy_with_cache( ) assert output1.exists() - # Verify SQLite cache database was created - cache_db = temp_projects_dir / "claude-code-log-cache.db" - assert cache_db.exists() + # Verify SQLite cache database was created at isolated location + assert db_path.exists() # Verify cache data exists for each project for i in range(2): project_dir = temp_projects_dir / f"project-{i}" - cache_manager = CacheManager(project_dir, "1.0.0") + cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path) cached_data = cache_manager.get_cached_project_data() assert cached_data is not None assert len(cached_data.cached_files) >= 1 diff --git a/test/test_cache_sqlite_integrity.py b/test/test_cache_sqlite_integrity.py index 15d2ee3b..bc1c3e43 100644 --- a/test/test_cache_sqlite_integrity.py +++ b/test/test_cache_sqlite_integrity.py @@ -3,7 +3,6 @@ import json import sqlite3 -import tempfile import threading import time from pathlib import Path @@ -24,19 +23,13 @@ ) -@pytest.fixture -def temp_project_dir(): - """Create a temporary project directory.""" - with tempfile.TemporaryDirectory() as temp_dir: - project_dir = Path(temp_dir) / "test-project" - project_dir.mkdir() - yield project_dir +# Use conftest.py fixtures: isolated_cache_dir, isolated_db_path, isolated_cache_manager @pytest.fixture -def cache_manager(temp_project_dir): - """Create a cache manager for testing.""" - return CacheManager(temp_project_dir, "1.0.0") +def cache_manager(isolated_cache_dir: Path, isolated_db_path: Path) -> CacheManager: + """Create a cache manager with explicit db_path for test isolation.""" + return CacheManager(isolated_cache_dir, "1.0.0", db_path=isolated_db_path) @pytest.fixture @@ -92,13 +85,19 @@ class TestCascadeDelete: """Tests for cascade delete behaviour.""" def test_cascade_delete_project_removes_all_nested_records( - self, temp_project_dir, sample_user_entry, sample_assistant_entry + self, + isolated_cache_dir, + isolated_db_path, + sample_user_entry, + sample_assistant_entry, ): """Deleting project cascades to files, messages, sessions.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Create a JSONL file with entries - jsonl_file = temp_project_dir / "test.jsonl" + jsonl_file = isolated_cache_dir / "test.jsonl" jsonl_file.write_text( json.dumps(sample_user_entry.model_dump()) + "\n" @@ -180,10 +179,12 @@ class TestTokenSumVerification: """Tests for token sum calculations.""" def test_session_token_totals_match_message_sums( - self, temp_project_dir, sample_assistant_entry + self, isolated_cache_dir, isolated_db_path, sample_assistant_entry ): """Session token totals equal sum of message tokens.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Create multiple assistant entries with known token values entries = [] @@ -219,7 +220,7 @@ def test_session_token_totals_match_message_sums( total_output += 50 + i * 5 # Save entries - jsonl_file = temp_project_dir / "test.jsonl" + jsonl_file = isolated_cache_dir / "test.jsonl" jsonl_file.write_text( "\n".join(json.dumps(e.model_dump()) for e in entries), encoding="utf-8", @@ -288,9 +289,13 @@ def test_cannot_insert_message_without_valid_project_id(self, cache_manager): class TestSerializationRoundTrip: """Tests for message serialization/deserialization.""" - def test_complex_message_types_roundtrip_correctly(self, temp_project_dir): + def test_complex_message_types_roundtrip_correctly( + self, isolated_cache_dir, isolated_db_path + ): """Tool use, images, thinking content survive JSON serialization.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Create entries with complex content types entries = [ @@ -372,7 +377,7 @@ def test_complex_message_types_roundtrip_correctly(self, temp_project_dir): ] # Save entries - jsonl_file = temp_project_dir / "complex.jsonl" + jsonl_file = isolated_cache_dir / "complex.jsonl" jsonl_file.write_text( "\n".join(json.dumps(e.model_dump()) for e in entries), encoding="utf-8", @@ -453,9 +458,13 @@ def test_duplicate_session_id_in_project_fails(self, cache_manager): class TestTimestampOrdering: """Tests for message timestamp ordering.""" - def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry): + def test_messages_ordered_by_timestamp( + self, isolated_cache_dir, isolated_db_path, sample_user_entry + ): """Messages retrieved in timestamp order.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Create entries with out-of-order timestamps entries = [] @@ -484,7 +493,7 @@ def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry ) entries.append(entry) - jsonl_file = temp_project_dir / "order.jsonl" + jsonl_file = isolated_cache_dir / "order.jsonl" jsonl_file.write_text( "\n".join(json.dumps(e.model_dump()) for e in entries), encoding="utf-8", @@ -504,9 +513,13 @@ def test_messages_ordered_by_timestamp(self, temp_project_dir, sample_user_entry class TestNullTokenHandling: """Tests for NULL token value handling.""" - def test_null_tokens_handled_in_aggregates(self, temp_project_dir): + def test_null_tokens_handled_in_aggregates( + self, isolated_cache_dir, isolated_db_path + ): """NULL token values don't corrupt sums.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Create mix of entries with and without tokens entries = [ @@ -548,7 +561,7 @@ def test_null_tokens_handled_in_aggregates(self, temp_project_dir): ), ] - jsonl_file = temp_project_dir / "mixed.jsonl" + jsonl_file = isolated_cache_dir / "mixed.jsonl" jsonl_file.write_text( "\n".join(json.dumps(e.model_dump()) for e in entries), encoding="utf-8", @@ -577,13 +590,19 @@ class TestMessageFileRelationship: """Tests for message-file relationships.""" def test_cached_file_message_count_matches_actual( - self, temp_project_dir, sample_user_entry, sample_assistant_entry + self, + isolated_cache_dir, + isolated_db_path, + sample_user_entry, + sample_assistant_entry, ): """message_count column matches COUNT(*) FROM messages.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) entries = [sample_user_entry, sample_assistant_entry] - jsonl_file = temp_project_dir / "count.jsonl" + jsonl_file = isolated_cache_dir / "count.jsonl" jsonl_file.write_text( "\n".join(json.dumps(e.model_dump()) for e in entries), encoding="utf-8", @@ -620,9 +639,11 @@ def test_wal_journal_mode_enabled(self, cache_manager): class TestConcurrentAccess: """Tests for concurrent database access.""" - def test_concurrent_readers_dont_block(self, temp_project_dir): + def test_concurrent_readers_dont_block(self, isolated_cache_dir, isolated_db_path): """Multiple readers can access simultaneously.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Add some data entry = UserTranscriptEntry( @@ -640,7 +661,7 @@ def test_concurrent_readers_dont_block(self, temp_project_dir): ), ) - jsonl_file = temp_project_dir / "concurrent.jsonl" + jsonl_file = isolated_cache_dir / "concurrent.jsonl" jsonl_file.write_text(json.dumps(entry.model_dump()), encoding="utf-8") cache_manager.save_cached_entries(jsonl_file, [entry]) @@ -649,7 +670,7 @@ def test_concurrent_readers_dont_block(self, temp_project_dir): def read_data(): try: - cm = CacheManager(temp_project_dir, "1.0.0") + cm = CacheManager(isolated_cache_dir, "1.0.0", db_path=isolated_db_path) data = cm.get_cached_project_data() results.append(data is not None) except Exception as e: @@ -669,9 +690,13 @@ def read_data(): class TestLargeDatasetPerformance: """Tests for performance with large datasets.""" - def test_query_performance_with_large_dataset(self, temp_project_dir): + def test_query_performance_with_large_dataset( + self, isolated_cache_dir, isolated_db_path + ): """Queries complete in reasonable time with large datasets.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Create 1000 entries (reduced from 10k for test speed) entries = [] @@ -692,7 +717,7 @@ def test_query_performance_with_large_dataset(self, temp_project_dir): ) entries.append(entry) - jsonl_file = temp_project_dir / "large.jsonl" + jsonl_file = isolated_cache_dir / "large.jsonl" jsonl_file.write_text( "\n".join(json.dumps(e.model_dump()) for e in entries), encoding="utf-8", @@ -713,9 +738,13 @@ def test_query_performance_with_large_dataset(self, temp_project_dir): class TestSessionBoundaryDetection: """Tests for session boundary correctness.""" - def test_sessions_contain_correct_messages(self, temp_project_dir): + def test_sessions_contain_correct_messages( + self, isolated_cache_dir, isolated_db_path + ): """Each session contains only its messages.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Create entries for multiple sessions entries = [] @@ -743,7 +772,7 @@ def test_sessions_contain_correct_messages(self, temp_project_dir): ) entries.append(entry) - jsonl_file = temp_project_dir / "sessions.jsonl" + jsonl_file = isolated_cache_dir / "sessions.jsonl" jsonl_file.write_text( "\n".join(json.dumps(e.model_dump()) for e in entries), encoding="utf-8", @@ -766,13 +795,19 @@ class TestCacheStatsAccuracy: """Tests for cache statistics accuracy.""" def test_cache_stats_match_actual_counts( - self, temp_project_dir, sample_user_entry, sample_assistant_entry + self, + isolated_cache_dir, + isolated_db_path, + sample_user_entry, + sample_assistant_entry, ): """get_cache_stats() returns accurate data.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) entries = [sample_user_entry, sample_assistant_entry] - jsonl_file = temp_project_dir / "stats.jsonl" + jsonl_file = isolated_cache_dir / "stats.jsonl" jsonl_file.write_text( "\n".join(json.dumps(e.model_dump()) for e in entries), encoding="utf-8", @@ -814,9 +849,13 @@ def test_cache_stats_match_actual_counts( class TestWorkingDirectoryQuery: """Tests for working directory queries.""" - def test_get_working_directories_returns_distinct_cwds(self, temp_project_dir): + def test_get_working_directories_returns_distinct_cwds( + self, isolated_cache_dir, isolated_db_path + ): """get_working_directories() returns unique values.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) # Create sessions with duplicate cwds cache_manager.update_session_cache( @@ -861,11 +900,15 @@ def test_get_working_directories_returns_distinct_cwds(self, temp_project_dir): class TestFileModificationDetection: """Tests for file modification time detection.""" - def test_mtime_change_invalidates_cache(self, temp_project_dir, sample_user_entry): + def test_mtime_change_invalidates_cache( + self, isolated_cache_dir, isolated_db_path, sample_user_entry + ): """Changing file mtime marks cache as stale.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) - jsonl_file = temp_project_dir / "mtime.jsonl" + jsonl_file = isolated_cache_dir / "mtime.jsonl" jsonl_file.write_text( json.dumps(sample_user_entry.model_dump()), encoding="utf-8" ) @@ -887,9 +930,11 @@ def test_mtime_change_invalidates_cache(self, temp_project_dir, sample_user_entr class TestMigrationIntegrity: """Tests for migration system integrity.""" - def test_migration_checksum_stored(self, temp_project_dir): + def test_migration_checksum_stored(self, isolated_cache_dir, isolated_db_path): """Migration checksums are stored in _schema_version.""" - cache_manager = CacheManager(temp_project_dir, "1.0.0") + cache_manager = CacheManager( + isolated_cache_dir, "1.0.0", db_path=isolated_db_path + ) with cache_manager._get_connection() as conn: rows = conn.execute( @@ -902,10 +947,10 @@ def test_migration_checksum_stored(self, temp_project_dir): assert row["filename"].endswith(".sql") assert len(row["checksum"]) == 64 # SHA256 hex length - def test_migration_applied_only_once(self, temp_project_dir): + def test_migration_applied_only_once(self, isolated_cache_dir, isolated_db_path): """Migrations are not re-applied on subsequent runs.""" # First run - cm1 = CacheManager(temp_project_dir, "1.0.0") + cm1 = CacheManager(isolated_cache_dir, "1.0.0", db_path=isolated_db_path) with cm1._get_connection() as conn: initial_count = conn.execute( @@ -913,7 +958,7 @@ def test_migration_applied_only_once(self, temp_project_dir): ).fetchone()[0] # Second run - cm2 = CacheManager(temp_project_dir, "1.0.0") + cm2 = CacheManager(isolated_cache_dir, "1.0.0", db_path=isolated_db_path) with cm2._get_connection() as conn: final_count = conn.execute( diff --git a/test/test_cli.py b/test/test_cli.py new file mode 100644 index 00000000..320614e0 --- /dev/null +++ b/test/test_cli.py @@ -0,0 +1,469 @@ +#!/usr/bin/env python3 +"""Tests for CLI functionality and helper functions.""" + +import json +from pathlib import Path +from typing import Generator + +import pytest +from click.testing import CliRunner + +from claude_code_log.cli import ( + _clear_caches, + _clear_output_files, + _discover_projects, + get_default_projects_dir, + main, +) +from claude_code_log.cache import CacheManager + + +class ProjectsSetup: + """Container for test projects setup.""" + + def __init__(self, projects_dir: Path, db_path: Path): + self.projects_dir = projects_dir + self.db_path = db_path + + +@pytest.fixture +def cli_projects_setup( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> Generator[ProjectsSetup, None, None]: + """Create isolated projects setup for CLI tests.""" + projects_dir = tmp_path / "projects" + projects_dir.mkdir() + + # Set env var to isolate cache + isolated_db = tmp_path / "test-cache.db" + monkeypatch.setenv("CLAUDE_CODE_LOG_CACHE_PATH", str(isolated_db)) + + yield ProjectsSetup(projects_dir, isolated_db) + + +@pytest.fixture +def sample_jsonl_content() -> list[dict]: + """Sample JSONL data for tests.""" + return [ + { + "type": "user", + "uuid": "user-1", + "timestamp": "2023-01-01T10:00:00Z", + "sessionId": "session-1", + "version": "1.0.0", + "parentUuid": None, + "isSidechain": False, + "userType": "user", + "cwd": "/test", + "message": {"role": "user", "content": "Hello"}, + }, + { + "type": "assistant", + "uuid": "assistant-1", + "timestamp": "2023-01-01T10:01:00Z", + "sessionId": "session-1", + "version": "1.0.0", + "parentUuid": None, + "isSidechain": False, + "userType": "assistant", + "cwd": "/test", + "requestId": "req-1", + "message": { + "id": "msg-1", + "type": "message", + "role": "assistant", + "model": "claude-3", + "content": [{"type": "text", "text": "Hi there!"}], + "usage": {"input_tokens": 10, "output_tokens": 15}, + }, + }, + {"type": "summary", "summary": "A greeting", "leafUuid": "assistant-1"}, + ] + + +def create_project_with_jsonl( + projects_dir: Path, name: str, jsonl_data: list[dict] +) -> Path: + """Helper to create a project directory with JSONL file.""" + project_dir = projects_dir / name + project_dir.mkdir(exist_ok=True) + jsonl_file = project_dir / "session-1.jsonl" + with open(jsonl_file, "w") as f: + for entry in jsonl_data: + f.write(json.dumps(entry) + "\n") + return project_dir + + +class TestGetDefaultProjectsDir: + """Tests for get_default_projects_dir helper.""" + + def test_returns_expected_path(self): + """Default projects dir is ~/.claude/projects.""" + result = get_default_projects_dir() + assert result == Path.home() / ".claude" / "projects" + + +class TestDiscoverProjects: + """Tests for _discover_projects helper.""" + + def test_discovers_active_projects( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """Finds directories with JSONL files.""" + projects_dir = cli_projects_setup.projects_dir + + # Create two active projects + create_project_with_jsonl(projects_dir, "project-1", sample_jsonl_content) + create_project_with_jsonl(projects_dir, "project-2", sample_jsonl_content) + + # Create an empty directory (not a project) + (projects_dir / "empty-dir").mkdir() + + project_dirs, archived = _discover_projects(projects_dir) + + assert len(project_dirs) == 2 + assert len(archived) == 0 + project_names = {p.name for p in project_dirs} + assert project_names == {"project-1", "project-2"} + + def test_discovers_archived_projects( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """Finds archived projects from cache.""" + projects_dir = cli_projects_setup.projects_dir + db_path = cli_projects_setup.db_path + + # Create a project and cache it + project_dir = create_project_with_jsonl( + projects_dir, "my-project", sample_jsonl_content + ) + cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path) + + # Save entries to cache + from claude_code_log.converter import load_transcript + + jsonl_file = project_dir / "session-1.jsonl" + entries = load_transcript(jsonl_file, silent=True) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Delete the JSONL file to simulate archival + jsonl_file.unlink() + + project_dirs, archived = _discover_projects(projects_dir) + + assert len(project_dirs) == 1 + assert len(archived) == 1 + assert project_dir in archived + + def test_empty_directory(self, cli_projects_setup: ProjectsSetup): + """Empty projects directory returns empty lists.""" + project_dirs, archived = _discover_projects(cli_projects_setup.projects_dir) + assert project_dirs == [] + assert archived == set() + + +class TestClearCaches: + """Tests for _clear_caches helper.""" + + def test_clear_cache_single_project( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """Clears cache for a single project.""" + projects_dir = cli_projects_setup.projects_dir + db_path = cli_projects_setup.db_path + + project_dir = create_project_with_jsonl( + projects_dir, "test-project", sample_jsonl_content + ) + + # Create cache + cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path) + from claude_code_log.converter import load_transcript + + jsonl_file = project_dir / "session-1.jsonl" + entries = load_transcript(jsonl_file, silent=True) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Verify cache has data + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert len(cached_data.cached_files) >= 1 + + # Clear cache + _clear_caches(project_dir, all_projects=False) + + # Verify cache is cleared + cache_manager2 = CacheManager(project_dir, "1.0.0", db_path=db_path) + cached_data2 = cache_manager2.get_cached_project_data() + assert cached_data2 is not None + assert len(cached_data2.cached_files) == 0 + + def test_clear_cache_all_projects( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """Clears cache database for all projects.""" + projects_dir = cli_projects_setup.projects_dir + db_path = cli_projects_setup.db_path + + # Create multiple projects + for i in range(3): + create_project_with_jsonl( + projects_dir, f"project-{i}", sample_jsonl_content + ) + + # Create cache entries + for i in range(3): + project_dir = projects_dir / f"project-{i}" + cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path) + from claude_code_log.converter import load_transcript + + jsonl_file = project_dir / "session-1.jsonl" + entries = load_transcript(jsonl_file, silent=True) + cache_manager.save_cached_entries(jsonl_file, entries) + + # Verify cache exists + assert db_path.exists() + + # Clear all caches + _clear_caches(projects_dir, all_projects=True) + + # Database file should be deleted + assert not db_path.exists() + + def test_clear_cache_single_file_noop(self, tmp_path: Path): + """Clearing cache for single file has no effect.""" + # Create a single JSONL file (not in a project structure) + jsonl_file = tmp_path / "test.jsonl" + jsonl_file.write_text('{"type": "user"}') + + # Should complete without error + _clear_caches(jsonl_file, all_projects=False) + + +class TestClearOutputFiles: + """Tests for _clear_output_files helper.""" + + def test_clear_html_single_project( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """Clears HTML files from single project.""" + project_dir = create_project_with_jsonl( + cli_projects_setup.projects_dir, "test-project", sample_jsonl_content + ) + + # Create some HTML files + (project_dir / "combined_transcripts.html").write_text("") + (project_dir / "session-1.html").write_text("") + + assert len(list(project_dir.glob("*.html"))) == 2 + + _clear_output_files(project_dir, all_projects=False, file_ext="html") + + assert len(list(project_dir.glob("*.html"))) == 0 + + def test_clear_html_all_projects( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """Clears HTML files from all projects.""" + projects_dir = cli_projects_setup.projects_dir + + # Create projects with HTML files + for i in range(2): + project_dir = create_project_with_jsonl( + projects_dir, f"project-{i}", sample_jsonl_content + ) + (project_dir / "combined_transcripts.html").write_text("") + + # Create index file + (projects_dir / "index.html").write_text("") + + _clear_output_files(projects_dir, all_projects=True, file_ext="html") + + # All HTML files should be gone + assert not (projects_dir / "index.html").exists() + for i in range(2): + project_dir = projects_dir / f"project-{i}" + assert len(list(project_dir.glob("*.html"))) == 0 + + def test_clear_md_files( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """Clears Markdown files.""" + project_dir = create_project_with_jsonl( + cli_projects_setup.projects_dir, "test-project", sample_jsonl_content + ) + + (project_dir / "combined_transcripts.md").write_text("# Test") + assert len(list(project_dir.glob("*.md"))) == 1 + + _clear_output_files(project_dir, all_projects=False, file_ext="md") + + assert len(list(project_dir.glob("*.md"))) == 0 + + def test_clear_no_files_to_remove(self, cli_projects_setup: ProjectsSetup): + """No error when no files to remove.""" + project_dir = cli_projects_setup.projects_dir / "empty-project" + project_dir.mkdir() + (project_dir / "test.jsonl").write_text('{"type": "user"}') + + # Should complete without error + _clear_output_files(project_dir, all_projects=False, file_ext="html") + + +class TestCLIMainCommand: + """Tests for main CLI command.""" + + def test_help_shows_options(self): + """Help shows all expected options.""" + runner = CliRunner() + result = runner.invoke(main, ["--help"]) + + assert result.exit_code == 0 + assert "--output" in result.output + assert "--all-projects" in result.output + assert "--clear-cache" in result.output + assert "--open-browser" in result.output + + def test_no_arguments_uses_default_or_cwd(self, monkeypatch: pytest.MonkeyPatch): + """Running without arguments attempts to find projects.""" + runner = CliRunner() + # Mock to avoid actual file system operations + result = runner.invoke(main, []) + # Should either succeed or fail gracefully (no crash) + assert result.exit_code in (0, 1) + + def test_clear_cache_flag( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """--clear-cache flag clears cache.""" + project_dir = create_project_with_jsonl( + cli_projects_setup.projects_dir, "test-project", sample_jsonl_content + ) + + runner = CliRunner() + + # First run to create cache + result1 = runner.invoke(main, [str(project_dir)]) + assert result1.exit_code == 0 + + # Clear cache + result2 = runner.invoke(main, [str(project_dir), "--clear-cache"]) + assert result2.exit_code == 0 + assert "clearing" in result2.output.lower() or "clear" in result2.output.lower() + + def test_clear_html_flag( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """--clear-html flag clears HTML files.""" + project_dir = create_project_with_jsonl( + cli_projects_setup.projects_dir, "test-project", sample_jsonl_content + ) + + runner = CliRunner() + + # Generate HTML + result1 = runner.invoke(main, [str(project_dir)]) + assert result1.exit_code == 0 + assert len(list(project_dir.glob("*.html"))) > 0 + + # Clear HTML + result2 = runner.invoke(main, [str(project_dir), "--clear-html"]) + assert result2.exit_code == 0 + assert len(list(project_dir.glob("*.html"))) == 0 + + def test_format_option_md( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """--format md generates Markdown output.""" + project_dir = create_project_with_jsonl( + cli_projects_setup.projects_dir, "test-project", sample_jsonl_content + ) + + runner = CliRunner() + result = runner.invoke(main, [str(project_dir), "--format", "md"]) + + assert result.exit_code == 0 + assert len(list(project_dir.glob("*.md"))) > 0 + + def test_no_cache_flag( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """--no-cache flag processes without caching.""" + project_dir = create_project_with_jsonl( + cli_projects_setup.projects_dir, "test-project", sample_jsonl_content + ) + db_path = cli_projects_setup.db_path + + runner = CliRunner() + result = runner.invoke(main, [str(project_dir), "--no-cache"]) + + assert result.exit_code == 0 + + # Cache should exist but be empty for this project + cache_manager = CacheManager(project_dir, "1.0.0", db_path=db_path) + cached_data = cache_manager.get_cached_project_data() + assert cached_data is not None + assert cached_data.total_message_count == 0 + + def test_nonexistent_path_error(self): + """Nonexistent path shows appropriate error.""" + runner = CliRunner() + result = runner.invoke(main, ["/nonexistent/path/to/file.jsonl"]) + + # Should fail gracefully + assert result.exit_code != 0 or "error" in result.output.lower() + + def test_output_option( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """--output option specifies output path.""" + project_dir = create_project_with_jsonl( + cli_projects_setup.projects_dir, "test-project", sample_jsonl_content + ) + output_path = cli_projects_setup.projects_dir / "custom_output.html" + + runner = CliRunner() + result = runner.invoke(main, [str(project_dir), "--output", str(output_path)]) + + assert result.exit_code == 0 + assert output_path.exists() + + +class TestCLIErrorHandling: + """Tests for CLI error handling paths.""" + + def test_invalid_format_option( + self, cli_projects_setup: ProjectsSetup, sample_jsonl_content: list[dict] + ): + """Invalid format option shows error.""" + project_dir = create_project_with_jsonl( + cli_projects_setup.projects_dir, "test-project", sample_jsonl_content + ) + + runner = CliRunner() + result = runner.invoke(main, [str(project_dir), "--format", "invalid"]) + + assert result.exit_code != 0 + + def test_empty_project_directory(self, cli_projects_setup: ProjectsSetup): + """Empty project directory handled gracefully.""" + project_dir = cli_projects_setup.projects_dir / "empty-project" + project_dir.mkdir() + + runner = CliRunner() + result = runner.invoke(main, [str(project_dir)]) + + # Should complete (possibly with warning) + assert result.exit_code == 0 + + def test_malformed_jsonl_handled(self, cli_projects_setup: ProjectsSetup): + """Malformed JSONL handled gracefully.""" + project_dir = cli_projects_setup.projects_dir / "bad-project" + project_dir.mkdir() + (project_dir / "test.jsonl").write_text("not valid json\n{also: bad}") + + runner = CliRunner() + result = runner.invoke(main, [str(project_dir)]) + + # Should not crash + assert result.exit_code in (0, 1) diff --git a/test/test_migrations.py b/test/test_migrations.py new file mode 100644 index 00000000..97da2e94 --- /dev/null +++ b/test/test_migrations.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python3 +"""Tests for migration runner functionality.""" + +import sqlite3 +from pathlib import Path + +import pytest + +from claude_code_log.migrations.runner import ( + _compute_checksum, + _ensure_schema_version_table, + _parse_migration_number, + apply_migration, + get_applied_migrations, + get_available_migrations, + get_current_version, + get_pending_migrations, + run_migrations, + verify_migrations, +) + + +class TestParseMigrationNumber: + """Tests for migration filename parsing.""" + + def test_parses_standard_format(self): + """Parses standard migration filename.""" + assert _parse_migration_number("001_initial_schema.sql") == 1 + assert _parse_migration_number("002_add_column.sql") == 2 + assert _parse_migration_number("010_fix_bug.sql") == 10 + assert _parse_migration_number("100_big_change.sql") == 100 + + def test_handles_double_underscores(self): + """Handles filenames with multiple underscores.""" + assert _parse_migration_number("003_add_html_cache.sql") == 3 + + def test_invalid_format_raises_error(self): + """Invalid format raises ValueError.""" + with pytest.raises(ValueError): + _parse_migration_number("invalid.sql") + with pytest.raises(ValueError): + _parse_migration_number("no_number.sql") + with pytest.raises(ValueError): + _parse_migration_number("abc_name.sql") + + +class TestComputeChecksum: + """Tests for checksum computation.""" + + def test_consistent_checksum(self): + """Same content produces same checksum.""" + content = "CREATE TABLE test (id INTEGER);" + checksum1 = _compute_checksum(content) + checksum2 = _compute_checksum(content) + assert checksum1 == checksum2 + + def test_different_content_different_checksum(self): + """Different content produces different checksum.""" + checksum1 = _compute_checksum("CREATE TABLE test1;") + checksum2 = _compute_checksum("CREATE TABLE test2;") + assert checksum1 != checksum2 + + def test_checksum_is_sha256_hex(self): + """Checksum is 64-character hex string (SHA256).""" + checksum = _compute_checksum("test") + assert len(checksum) == 64 + assert all(c in "0123456789abcdef" for c in checksum) + + +class TestEnsureSchemaVersionTable: + """Tests for schema version table creation.""" + + def test_creates_table_if_not_exists(self, tmp_path: Path): + """Creates _schema_version table on fresh database.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + + _ensure_schema_version_table(conn) + + # Verify table exists with correct columns + columns = conn.execute("PRAGMA table_info(_schema_version)").fetchall() + column_names = {col[1] for col in columns} + assert "version" in column_names + assert "filename" in column_names + assert "applied_at" in column_names + assert "checksum" in column_names + + conn.close() + + def test_upgrades_old_format_table(self, tmp_path: Path): + """Upgrades old format table (without checksum) to new format.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + + # Create old format table (without checksum) + conn.execute(""" + CREATE TABLE _schema_version ( + version INTEGER PRIMARY KEY, + filename TEXT NOT NULL, + applied_at TEXT NOT NULL + ) + """) + conn.commit() + + # Call ensure - should upgrade + _ensure_schema_version_table(conn) + + # Verify new schema + columns = conn.execute("PRAGMA table_info(_schema_version)").fetchall() + column_names = {col[1] for col in columns} + assert "checksum" in column_names + + conn.close() + + +class TestGetAppliedMigrations: + """Tests for getting applied migrations.""" + + def test_empty_database_returns_empty_list(self, tmp_path: Path): + """Fresh database returns empty list.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + + applied = get_applied_migrations(conn) + assert applied == [] + + conn.close() + + def test_returns_applied_migrations(self, tmp_path: Path): + """Returns list of applied migrations.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + _ensure_schema_version_table(conn) + + # Insert some migration records + conn.execute( + "INSERT INTO _schema_version VALUES (1, '001_test.sql', '2024-01-01', 'abc')" + ) + conn.execute( + "INSERT INTO _schema_version VALUES (2, '002_test.sql', '2024-01-02', 'def')" + ) + conn.commit() + + applied = get_applied_migrations(conn) + assert len(applied) == 2 + assert applied[0] == (1, "abc") + assert applied[1] == (2, "def") + + conn.close() + + +class TestGetAvailableMigrations: + """Tests for getting available migrations.""" + + def test_returns_sql_files_in_order(self): + """Returns migration files sorted by version.""" + migrations = get_available_migrations() + + # Should have at least the initial migrations + assert len(migrations) >= 1 + + # Should be sorted by version + versions = [v for v, _ in migrations] + assert versions == sorted(versions) + + # All should be .sql files + for _, path in migrations: + assert path.suffix == ".sql" + + +class TestGetPendingMigrations: + """Tests for getting pending migrations.""" + + def test_all_pending_on_fresh_database(self, tmp_path: Path): + """All migrations pending on fresh database.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + _ensure_schema_version_table(conn) + + pending = get_pending_migrations(conn) + available = get_available_migrations() + + assert len(pending) == len(available) + + conn.close() + + def test_none_pending_after_all_applied(self, tmp_path: Path): + """No migrations pending after all applied.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + + # Run all migrations + run_migrations(db_path) + + # Reconnect and check + conn = sqlite3.connect(db_path) + pending = get_pending_migrations(conn) + assert len(pending) == 0 + + conn.close() + + +class TestApplyMigration: + """Tests for applying individual migrations.""" + + def test_applies_migration_and_records(self, tmp_path: Path): + """Applies migration and records in schema version.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + _ensure_schema_version_table(conn) + + # Create a test migration file + migration_file = tmp_path / "001_test.sql" + migration_file.write_text("CREATE TABLE test_table (id INTEGER);") + + apply_migration(conn, 1, migration_file) + + # Verify table was created + tables = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='test_table'" + ).fetchall() + assert len(tables) == 1 + + # Verify migration was recorded + applied = get_applied_migrations(conn) + assert len(applied) == 1 + assert applied[0][0] == 1 + + conn.close() + + +class TestVerifyMigrations: + """Tests for migration verification.""" + + def test_no_warnings_for_unmodified_migrations(self, tmp_path: Path): + """No warnings when migrations haven't been modified.""" + db_path = tmp_path / "test.db" + + # Run migrations + run_migrations(db_path) + + conn = sqlite3.connect(db_path) + warnings = verify_migrations(conn) + + # Should have no warnings for unmodified migrations + assert warnings == [] + + conn.close() + + def test_warning_for_modified_migration(self, tmp_path: Path): + """Warning when migration file has been modified.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + _ensure_schema_version_table(conn) + + # Insert a fake migration record with wrong checksum + conn.execute( + "INSERT INTO _schema_version VALUES (1, '001_initial_schema.sql', '2024-01-01', 'wrong_checksum')" + ) + conn.commit() + + warnings = verify_migrations(conn) + + # Should warn about modified migration + assert len(warnings) == 1 + assert "modified" in warnings[0].lower() + + conn.close() + + +class TestRunMigrations: + """Tests for running all migrations.""" + + def test_runs_all_pending_migrations(self, tmp_path: Path): + """Runs all pending migrations on fresh database.""" + db_path = tmp_path / "test.db" + + count = run_migrations(db_path) + + # Should have run at least the initial migrations + assert count >= 1 + + # Verify schema was created + conn = sqlite3.connect(db_path) + tables = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table'" + ).fetchall() + table_names = {t[0] for t in tables} + + # Should have core tables from initial migration + assert "projects" in table_names + assert "_schema_version" in table_names + + conn.close() + + def test_idempotent_multiple_runs(self, tmp_path: Path): + """Running multiple times is safe.""" + db_path = tmp_path / "test.db" + + count1 = run_migrations(db_path) + count2 = run_migrations(db_path) + + # First run applies migrations + assert count1 >= 1 + # Second run applies nothing (already applied) + assert count2 == 0 + + def test_creates_database_if_not_exists(self, tmp_path: Path): + """Creates database file if it doesn't exist.""" + db_path = tmp_path / "new_db.db" + assert not db_path.exists() + + run_migrations(db_path) + + assert db_path.exists() + + +class TestGetCurrentVersion: + """Tests for getting current schema version.""" + + def test_returns_zero_for_fresh_database(self, tmp_path: Path): + """Returns 0 for database with no migrations.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + + version = get_current_version(conn) + assert version == 0 + + conn.close() + + def test_returns_highest_version(self, tmp_path: Path): + """Returns highest applied migration version.""" + db_path = tmp_path / "test.db" + conn = sqlite3.connect(db_path) + _ensure_schema_version_table(conn) + + # Insert migrations out of order + conn.execute( + "INSERT INTO _schema_version VALUES (3, '003_test.sql', '2024-01-03', 'c')" + ) + conn.execute( + "INSERT INTO _schema_version VALUES (1, '001_test.sql', '2024-01-01', 'a')" + ) + conn.execute( + "INSERT INTO _schema_version VALUES (2, '002_test.sql', '2024-01-02', 'b')" + ) + conn.commit() + + version = get_current_version(conn) + assert version == 3 + + conn.close() + + def test_returns_version_after_real_migrations(self, tmp_path: Path): + """Returns correct version after running real migrations.""" + db_path = tmp_path / "test.db" + run_migrations(db_path) + + conn = sqlite3.connect(db_path) + version = get_current_version(conn) + + # Should match number of available migrations + available = get_available_migrations() + expected_version = max(v for v, _ in available) + assert version == expected_version + + conn.close() diff --git a/test/test_renderer_timings.py b/test/test_renderer_timings.py new file mode 100644 index 00000000..c5e91ec8 --- /dev/null +++ b/test/test_renderer_timings.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python3 +"""Tests for renderer timing utilities.""" + +import time + +import pytest + + +class TestDebugTimingFlag: + """Tests for DEBUG_TIMING environment variable.""" + + def test_debug_timing_disabled_by_default(self): + """DEBUG_TIMING is False by default.""" + # Import with fresh module state + + # Note: We can't easily test the default since the module is already loaded + # This test just documents the expected default behavior + # The actual value depends on environment at import time + + def test_debug_timing_enabled_with_1(self, monkeypatch: pytest.MonkeyPatch): + """DEBUG_TIMING enabled with '1'.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + # Reimport to pick up env var + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + assert rt.DEBUG_TIMING is True + + def test_debug_timing_enabled_with_true(self, monkeypatch: pytest.MonkeyPatch): + """DEBUG_TIMING enabled with 'true'.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "true") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + assert rt.DEBUG_TIMING is True + + def test_debug_timing_enabled_with_yes(self, monkeypatch: pytest.MonkeyPatch): + """DEBUG_TIMING enabled with 'yes'.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "yes") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + assert rt.DEBUG_TIMING is True + + def test_debug_timing_case_insensitive(self, monkeypatch: pytest.MonkeyPatch): + """DEBUG_TIMING handles uppercase values.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "TRUE") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + assert rt.DEBUG_TIMING is True + + +class TestSetTimingVar: + """Tests for set_timing_var function.""" + + def test_sets_variable_when_enabled(self, monkeypatch: pytest.MonkeyPatch): + """Sets timing variable when DEBUG_TIMING enabled.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + rt._timing_data.clear() + + rt.set_timing_var("test_var", "test_value") + assert rt._timing_data.get("test_var") == "test_value" + + def test_ignores_when_disabled(self, monkeypatch: pytest.MonkeyPatch): + """Ignores set when DEBUG_TIMING disabled.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + rt._timing_data.clear() + + rt.set_timing_var("test_var", "test_value") + assert "test_var" not in rt._timing_data + + +class TestLogTiming: + """Tests for log_timing context manager.""" + + def test_logs_phase_timing_when_enabled( + self, monkeypatch: pytest.MonkeyPatch, capsys + ): + """Logs phase timing when DEBUG_TIMING enabled.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + + with rt.log_timing("Test Phase"): + time.sleep(0.01) # Brief sleep to measure + + captured = capsys.readouterr() + assert "[TIMING]" in captured.out + assert "Test Phase" in captured.out + + def test_no_output_when_disabled(self, monkeypatch: pytest.MonkeyPatch, capsys): + """No output when DEBUG_TIMING disabled.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + + with rt.log_timing("Test Phase"): + pass + + captured = capsys.readouterr() + assert "[TIMING]" not in captured.out + + def test_callable_phase_name(self, monkeypatch: pytest.MonkeyPatch, capsys): + """Supports callable for dynamic phase names.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + + items = [1, 2, 3] + with rt.log_timing(lambda: f"Processing ({len(items)} items)"): + pass + + captured = capsys.readouterr() + assert "Processing (3 items)" in captured.out + + def test_shows_total_time_when_t_start_provided( + self, monkeypatch: pytest.MonkeyPatch, capsys + ): + """Shows total elapsed time when t_start provided.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + + t_start = time.time() + time.sleep(0.01) + + with rt.log_timing("Test Phase", t_start=t_start): + pass + + captured = capsys.readouterr() + assert "total:" in captured.out + + +class TestTimingStat: + """Tests for timing_stat context manager.""" + + def test_tracks_operation_timing_when_enabled( + self, monkeypatch: pytest.MonkeyPatch + ): + """Tracks operation timing when DEBUG_TIMING enabled.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + rt._timing_data.clear() + rt._timing_data["_test_timings"] = [] + rt._timing_data["_current_msg_id"] = "msg-123" + + with rt.timing_stat("_test_timings"): + time.sleep(0.01) + + assert len(rt._timing_data["_test_timings"]) == 1 + duration, msg_id = rt._timing_data["_test_timings"][0] + assert duration >= 0.01 + assert msg_id == "msg-123" + + def test_no_tracking_when_disabled(self, monkeypatch: pytest.MonkeyPatch): + """No tracking when DEBUG_TIMING disabled.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + rt._timing_data.clear() + rt._timing_data["_test_timings"] = [] + + with rt.timing_stat("_test_timings"): + pass + + assert len(rt._timing_data["_test_timings"]) == 0 + + +class TestReportTimingStatistics: + """Tests for report_timing_statistics function.""" + + def test_reports_statistics(self, monkeypatch: pytest.MonkeyPatch, capsys): + """Reports timing statistics.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + + timings = [ + (0.1, "msg-1"), + (0.2, "msg-2"), + (0.05, "msg-3"), + ] + + rt.report_timing_statistics([("Test Operation", timings)]) + + captured = capsys.readouterr() + assert "Test Operation" in captured.out + assert "Total operations: 3" in captured.out + assert "Total time:" in captured.out + assert "Slowest 10 operations" in captured.out + + def test_empty_timings_no_output(self, monkeypatch: pytest.MonkeyPatch, capsys): + """No output for empty timings.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + + rt.report_timing_statistics([("Test Operation", [])]) + + captured = capsys.readouterr() + # Empty timings produce no output (the if timings: check) + assert "Test Operation" not in captured.out + + def test_sorts_by_duration_descending( + self, monkeypatch: pytest.MonkeyPatch, capsys + ): + """Slowest operations listed first.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + + # Create timings in ascending order + timings = [ + (0.001, "msg-fast"), + (0.1, "msg-slow"), + (0.01, "msg-medium"), + ] + + rt.report_timing_statistics([("Test", timings)]) + + captured = capsys.readouterr() + # msg-slow should appear before msg-medium and msg-fast + slow_pos = captured.out.find("msg-slow") + medium_pos = captured.out.find("msg-medium") + fast_pos = captured.out.find("msg-fast") + assert slow_pos < medium_pos < fast_pos + + def test_limits_to_10_slowest(self, monkeypatch: pytest.MonkeyPatch, capsys): + """Only shows 10 slowest operations.""" + monkeypatch.setenv("CLAUDE_CODE_LOG_DEBUG_TIMING", "1") + + import importlib + import claude_code_log.renderer_timings as rt + + importlib.reload(rt) + + # Create 15 timings + timings = [(i * 0.001, f"msg-{i}") for i in range(15)] + + rt.report_timing_statistics([("Test", timings)]) + + captured = capsys.readouterr() + # Should only show 10 + assert captured.out.count("msg-") == 10 From 4695f792738ff38a8a8f6bd72244f75abcf53323 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 12 Jan 2026 14:06:04 +0000 Subject: [PATCH 10/23] CR PR feedback --- claude_code_log/cache.py | 53 ++++++++++---- claude_code_log/tui.py | 135 +++++++++++++++++++++++++--------- test/test_pagination.py | 152 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 296 insertions(+), 44 deletions(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 220cc90c..3a6ffc9a 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -492,11 +492,17 @@ def save_cached_entries( with self._get_connection() as conn: # Insert or update file record + # Use ON CONFLICT to preserve file ID and avoid cascade deletes on messages conn.execute( """ - INSERT OR REPLACE INTO cached_files + INSERT INTO cached_files (project_id, file_name, file_path, source_mtime, cached_mtime, message_count) VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(project_id, file_name) DO UPDATE SET + file_path = excluded.file_path, + source_mtime = excluded.source_mtime, + cached_mtime = excluded.cached_mtime, + message_count = excluded.message_count """, ( self._project_id, @@ -552,12 +558,23 @@ def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> Non for session_id, data in session_data.items(): conn.execute( """ - INSERT OR REPLACE INTO sessions ( + INSERT INTO sessions ( project_id, session_id, summary, first_timestamp, last_timestamp, message_count, first_user_message, cwd, total_input_tokens, total_output_tokens, total_cache_creation_tokens, total_cache_read_tokens ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(project_id, session_id) DO UPDATE SET + summary = excluded.summary, + first_timestamp = excluded.first_timestamp, + last_timestamp = excluded.last_timestamp, + message_count = excluded.message_count, + first_user_message = excluded.first_user_message, + cwd = excluded.cwd, + total_input_tokens = excluded.total_input_tokens, + total_output_tokens = excluded.total_output_tokens, + total_cache_creation_tokens = excluded.total_cache_creation_tokens, + total_cache_read_tokens = excluded.total_cache_read_tokens """, ( self._project_id, @@ -1301,18 +1318,30 @@ def is_page_stale( # Check if any session on this page has changed with self._get_connection() as conn: - for session_id in page_data.session_ids: - row = conn.execute( - """SELECT message_count FROM sessions - WHERE project_id = ? AND session_id = ?""", - (self._project_id, session_id), - ).fetchone() + # Build placeholders for IN clause + placeholders = ",".join("?" for _ in page_data.session_ids) + params = [self._project_id, *page_data.session_ids] - if not row: - return True, "session_missing" + row = conn.execute( + f"""SELECT COUNT(*) as session_count, + COALESCE(SUM(message_count), 0) as total_messages, + MAX(last_timestamp) as max_timestamp + FROM sessions + WHERE project_id = ? AND session_id IN ({placeholders})""", + params, + ).fetchone() + + # Check if any sessions are missing + if row["session_count"] != len(page_data.session_ids): + return True, "session_missing" + + # Check if message count changed + if row["total_messages"] != page_data.message_count: + return True, "message_count_changed" - # We need to check if session content changed - # For now, just check if session exists + # Check if last timestamp changed (session content updated) + if row["max_timestamp"] != page_data.last_timestamp: + return True, "timestamp_changed" return False, "up_to_date" diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index e1495e17..ca8580e7 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -273,22 +273,46 @@ def _handle_archive_project_confirm(self, confirmed: bool | None) -> None: return project_path = self.selected_project_path - archived_count = 0 + + # Collect all JSONL files first + jsonl_files = list(project_path.glob("*.jsonl")) + if not jsonl_files: + self.notify("No sessions to archive", severity="warning") + return + + # Track successes and failures + succeeded: list[str] = [] + failed: list[tuple[str, str]] = [] # (filename, error message) # Delete all JSONL files in the project - for jsonl_file in project_path.glob("*.jsonl"): + for jsonl_file in jsonl_files: try: jsonl_file.unlink() - archived_count += 1 + succeeded.append(jsonl_file.name) except Exception as e: - self.notify( - f"Failed to delete {jsonl_file.name}: {e}", severity="error" - ) + failed.append((jsonl_file.name, str(e))) + + # Report results clearly + total = len(jsonl_files) + if failed: + # Show detailed failure information + failed_names = ", ".join(f[0] for f in failed[:3]) + if len(failed) > 3: + failed_names += f" and {len(failed) - 3} more" + self.notify( + f"Archive incomplete: {len(succeeded)}/{total} sessions deleted. " + f"Failed: {failed_names}", + severity="error", + ) + else: + self.notify(f"Archived {len(succeeded)} sessions") - if archived_count > 0: - self.notify(f"Archived {archived_count} sessions") - # Add to archived projects set + # Only mark as fully archived if ALL files were deleted + if not failed and succeeded: self.archived_projects.add(project_path) + + # Always refresh to show current state + if succeeded: self.populate_table() def action_delete_project(self) -> None: @@ -318,13 +342,29 @@ def _handle_delete_project_confirm(self, result: Optional[str]) -> None: cache_manager.clear_cache() # If deleting both, also delete JSONL files + file_delete_failed = False if result == "both": - for jsonl_file in project_path.glob("*.jsonl"): - try: - jsonl_file.unlink() - except Exception as e: + jsonl_files = list(project_path.glob("*.jsonl")) + if jsonl_files: + succeeded: list[str] = [] + failed: list[tuple[str, str]] = [] + + for jsonl_file in jsonl_files: + try: + jsonl_file.unlink() + succeeded.append(jsonl_file.name) + except Exception as e: + failed.append((jsonl_file.name, str(e))) + + if failed: + file_delete_failed = True + failed_names = ", ".join(f[0] for f in failed[:3]) + if len(failed) > 3: + failed_names += f" and {len(failed) - 3} more" self.notify( - f"Failed to delete {jsonl_file.name}: {e}", severity="error" + f"Cache deleted but {len(failed)}/{len(jsonl_files)} " + f"session files failed to delete: {failed_names}", + severity="error", ) # Remove from projects list @@ -335,7 +375,8 @@ def _handle_delete_project_confirm(self, result: Optional[str]) -> None: if project_path in self.archived_projects: self.archived_projects.discard(project_path) - self.notify(f"Deleted project: {project_path.name}") + if not file_delete_failed: + self.notify(f"Deleted project: {project_path.name}") self.selected_project_path = None self.populate_table() @@ -371,26 +412,56 @@ def _handle_restore_project_confirm(self, confirmed: bool | None) -> None: # Ensure project directory exists project_path.mkdir(parents=True, exist_ok=True) - restored_count = 0 - for session_id in project_cache.sessions: + # Identify sessions that need restoration (don't already exist as files) + sessions_to_restore = [ + session_id + for session_id in project_cache.sessions + if not (project_path / f"{session_id}.jsonl").exists() + ] + + if not sessions_to_restore: + self.notify("All sessions already exist as files", severity="warning") + return + + # Track successes and failures + succeeded: list[str] = [] + failed: list[tuple[str, str]] = [] # (session_id, error message) + + for session_id in sessions_to_restore: jsonl_path = project_path / f"{session_id}.jsonl" - if not jsonl_path.exists(): - try: - messages = cache_manager.export_session_to_jsonl(session_id) - if messages: - with open(jsonl_path, "w", encoding="utf-8") as f: - for msg in messages: - f.write(msg + "\n") - restored_count += 1 - except Exception as e: - self.notify( - f"Failed to restore {session_id}: {e}", severity="error" - ) + try: + messages = cache_manager.export_session_to_jsonl(session_id) + if messages: + with open(jsonl_path, "w", encoding="utf-8") as f: + for msg in messages: + f.write(msg + "\n") + succeeded.append(session_id) + else: + failed.append((session_id, "No messages found in cache")) + except Exception as e: + failed.append((session_id, str(e))) + + # Report results clearly + total = len(sessions_to_restore) + if failed: + # Show detailed failure information + failed_ids = ", ".join(f[0][:8] for f in failed[:3]) # Truncate UUIDs + if len(failed) > 3: + failed_ids += f" and {len(failed) - 3} more" + self.notify( + f"Restore incomplete: {len(succeeded)}/{total} sessions restored. " + f"Failed: {failed_ids}", + severity="error", + ) + else: + self.notify(f"Restored {len(succeeded)} sessions") - if restored_count > 0: - self.notify(f"Restored {restored_count} sessions") - # Remove from archived projects set + # Only mark as fully restored if ALL sessions were restored + if not failed and succeeded: self.archived_projects.discard(project_path) + + # Always refresh to show current state + if succeeded: self.populate_table() diff --git a/test/test_pagination.py b/test/test_pagination.py index 63285f60..417c5785 100644 --- a/test/test_pagination.py +++ b/test/test_pagination.py @@ -273,6 +273,158 @@ def test_is_page_stale_page_size_changed(self, cache_manager): assert is_stale is True assert "page_size" in reason.lower() or "size" in reason.lower() + def test_is_page_stale_session_missing(self, cache_manager, temp_project_dir): + """is_page_stale should return True when a session is missing from sessions table.""" + # Create page cache entry referencing session "s1" + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + # Create the HTML file so it passes the file existence check + (temp_project_dir / "combined_transcripts.html").write_text("") + + # Don't add session "s1" to sessions table - it should be detected as missing + # Mock is_html_outdated to skip HTML version check (tested separately) + with patch("claude_code_log.renderer.is_html_outdated", return_value=False): + is_stale, reason = cache_manager.is_page_stale(1, 5000) + assert is_stale is True + assert "session_missing" in reason + + def test_is_page_stale_message_count_changed(self, cache_manager, temp_project_dir): + """is_page_stale should return True when session message count has changed.""" + # Create page cache entry with message_count=1000 + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, # Page expects 1000 messages + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + # Create the HTML file + (temp_project_dir / "combined_transcripts.html").write_text("") + + # Add session with different message count + cache_manager.update_session_cache( + { + "s1": SessionCacheData( + session_id="s1", + message_count=1500, # Different from page's 1000 + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + first_user_message="Test", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + } + ) + + # Mock is_html_outdated to skip HTML version check (tested separately) + with patch("claude_code_log.renderer.is_html_outdated", return_value=False): + is_stale, reason = cache_manager.is_page_stale(1, 5000) + assert is_stale is True + assert "message_count" in reason + + def test_is_page_stale_timestamp_changed(self, cache_manager, temp_project_dir): + """is_page_stale should return True when session last_timestamp has changed.""" + # Create page cache entry + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", # Page expects this timestamp + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + # Create the HTML file + (temp_project_dir / "combined_transcripts.html").write_text("") + + # Add session with same message_count but different last_timestamp + cache_manager.update_session_cache( + { + "s1": SessionCacheData( + session_id="s1", + message_count=1000, # Same as page + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T12:00:00Z", # Different timestamp + first_user_message="Test", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + } + ) + + # Mock is_html_outdated to skip HTML version check (tested separately) + with patch("claude_code_log.renderer.is_html_outdated", return_value=False): + is_stale, reason = cache_manager.is_page_stale(1, 5000) + assert is_stale is True + assert "timestamp" in reason + + def test_is_page_stale_up_to_date(self, cache_manager, temp_project_dir): + """is_page_stale should return False when page matches session data.""" + # Create page cache entry + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=5000, + session_ids=["s1"], + message_count=1000, + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + # Create the HTML file + (temp_project_dir / "combined_transcripts.html").write_text("") + + # Add session with matching data + cache_manager.update_session_cache( + { + "s1": SessionCacheData( + session_id="s1", + message_count=1000, # Same as page + first_timestamp="2023-01-01T10:00:00Z", + last_timestamp="2023-01-01T11:00:00Z", # Same as page + first_user_message="Test", + total_input_tokens=100, + total_output_tokens=50, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + } + ) + + # Mock is_html_outdated to skip HTML version check (tested separately) + with patch("claude_code_log.renderer.is_html_outdated", return_value=False): + is_stale, reason = cache_manager.is_page_stale(1, 5000) + assert is_stale is False + assert "up_to_date" in reason + def test_invalidate_all_pages(self, cache_manager): """invalidate_all_pages should remove all page cache entries.""" cache_manager.update_page_cache( From dd4c5969dadb9e4186a3c372b94f9d6f0ea3e86f Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 12 Jan 2026 15:32:26 +0000 Subject: [PATCH 11/23] CR PR feedback --- claude_code_log/cache.py | 7 ++++ test/test_cache_integration.py | 65 ++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 3a6ffc9a..3c53d770 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -1419,6 +1419,13 @@ def delete_session(self, session_id: str) -> bool: (self._project_id, session_id), ) + # Delete page_sessions entries referencing this session + conn.execute( + """DELETE FROM page_sessions WHERE session_id = ? + AND page_id IN (SELECT id FROM html_pages WHERE project_id = ?)""", + (session_id, self._project_id), + ) + # Delete cached_files entry for this session's JSONL file # File name pattern is {session_id}.jsonl conn.execute( diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py index ddccd39e..385ccadc 100644 --- a/test/test_cache_integration.py +++ b/test/test_cache_integration.py @@ -890,6 +890,71 @@ def test_delete_session_invalidates_file_cache( "because the file is no longer considered cached" ) + def test_delete_session_removes_page_sessions( + self, temp_projects_dir, sample_jsonl_data + ): + """Test that delete_session removes page_sessions entries. + + When a session is part of a paginated combined transcript, deleting + the session should also remove its entry from the page_sessions table. + """ + project_dir = temp_projects_dir / "delete-page-sessions-test" + project_dir.mkdir() + + session_id = "session-1" + jsonl_file = project_dir / f"{session_id}.jsonl" + with open(jsonl_file, "w") as f: + for entry in sample_jsonl_data: + f.write(json.dumps(entry) + "\n") + + # Process to populate cache + convert_jsonl_to_html(input_path=project_dir, use_cache=True) + + cache_manager = CacheManager(project_dir, "1.0.0") + + # Add page cache entry with this session + cache_manager.update_page_cache( + page_number=1, + html_path="combined_transcripts.html", + page_size_config=50, + session_ids=[session_id], + message_count=5, + first_timestamp="2024-01-01T00:00:00Z", + last_timestamp="2024-01-01T01:00:00Z", + total_input_tokens=100, + total_output_tokens=200, + total_cache_creation_tokens=0, + total_cache_read_tokens=0, + ) + + # Verify page has the session + page_data = cache_manager.get_page_data(1) + assert page_data is not None + assert session_id in page_data.session_ids + + # Delete the session + result = cache_manager.delete_session(session_id) + assert result is True + + # Verify page_sessions entry is removed + # The page itself still exists, but the session mapping should be gone + import sqlite3 + + conn = sqlite3.connect(cache_manager.db_path) + conn.row_factory = sqlite3.Row + try: + row = conn.execute( + """SELECT COUNT(*) as cnt FROM page_sessions ps + JOIN html_pages hp ON ps.page_id = hp.id + WHERE hp.project_id = ? AND ps.session_id = ?""", + (cache_manager._project_id, session_id), + ).fetchone() + assert row["cnt"] == 0, ( + "page_sessions entry should be removed after delete_session()" + ) + finally: + conn.close() + def test_delete_nonexistent_session(self, temp_projects_dir): """Test deleting a session that doesn't exist returns False.""" project_dir = temp_projects_dir / "delete-nonexistent" From f8a074a5fefc2be51f31c86affe358cd6b2d3c56 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 12 Jan 2026 19:49:54 +0000 Subject: [PATCH 12/23] CR PR feedback - take 3 --- claude_code_log/cache.py | 55 ++++++++++++++++------------------ test/test_cache_integration.py | 17 ++++++----- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 3c53d770..ebc4363a 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -2,6 +2,7 @@ """SQLite-based cache management for Claude Code Log.""" import json +import logging import os import sqlite3 import zlib @@ -24,6 +25,8 @@ UserTranscriptEntry, ) +logger = logging.getLogger(__name__) + # ========== Data Models ========== @@ -442,14 +445,7 @@ def load_cached_entries_filtered( if to_date: to_dt = dateparser.parse(to_date) if to_dt: - if to_date in ["today", "yesterday"] or "days ago" in to_date: - to_dt = to_dt.replace( - hour=23, minute=59, second=59, microsecond=999999 - ) - else: - to_dt = to_dt.replace( - hour=23, minute=59, second=59, microsecond=999999 - ) + to_dt = to_dt.replace(hour=23, minute=59, second=59, microsecond=999999) # Build query with SQL-based filtering sql = "SELECT content FROM messages WHERE file_id = ?" @@ -525,26 +521,27 @@ def save_cached_entries( conn.execute("DELETE FROM messages WHERE file_id = ?", (file_id,)) # Insert all entries in a batch - for entry in entries: - serialized = self._serialize_entry(entry, file_id) - conn.execute( - """ - INSERT INTO messages ( - project_id, file_id, type, timestamp, session_id, - _uuid, _parent_uuid, _is_sidechain, _user_type, _cwd, _version, - _is_meta, _agent_id, _request_id, - input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, - _leaf_uuid, _level, _operation, content - ) VALUES ( - :project_id, :file_id, :type, :timestamp, :session_id, - :_uuid, :_parent_uuid, :_is_sidechain, :_user_type, :_cwd, :_version, - :_is_meta, :_agent_id, :_request_id, - :input_tokens, :output_tokens, :cache_creation_tokens, :cache_read_tokens, - :_leaf_uuid, :_level, :_operation, :content - ) - """, - serialized, + serialized_entries = [ + self._serialize_entry(entry, file_id) for entry in entries + ] + conn.executemany( + """ + INSERT INTO messages ( + project_id, file_id, type, timestamp, session_id, + _uuid, _parent_uuid, _is_sidechain, _user_type, _cwd, _version, + _is_meta, _agent_id, _request_id, + input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, + _leaf_uuid, _level, _operation, content + ) VALUES ( + :project_id, :file_id, :type, :timestamp, :session_id, + :_uuid, :_parent_uuid, :_is_sidechain, :_user_type, :_cwd, :_version, + :_is_meta, :_agent_id, :_request_id, + :input_tokens, :output_tokens, :cache_creation_tokens, :cache_read_tokens, + :_leaf_uuid, :_level, :_operation, :content ) + """, + serialized_entries, + ) self._update_last_updated(conn) conn.commit() @@ -1513,8 +1510,8 @@ def get_all_cached_projects( result.append((row["project_path"], is_archived)) finally: conn.close() - except Exception: - pass + except (sqlite3.Error, OSError) as e: + logger.debug("Failed to read cached projects from %s: %s", actual_db_path, e) return result diff --git a/test/test_cache_integration.py b/test/test_cache_integration.py index 385ccadc..5e633766 100644 --- a/test/test_cache_integration.py +++ b/test/test_cache_integration.py @@ -475,19 +475,20 @@ def test_mixed_cached_and_uncached_files( def test_cache_corruption_recovery(self, setup_test_project): """Test recovery from corrupted cache files.""" - project_dir = setup_test_project + project_with_cache = setup_test_project + project_dir = project_with_cache.project_dir + db_path = project_with_cache.db_path # Create initial cache convert_jsonl_to_html(input_path=project_dir, use_cache=True) - # Corrupt cache file - cache_dir = project_dir / "cache" - cache_files = list(cache_dir.glob("*.json")) - if cache_files: - cache_file = [f for f in cache_files if f.name != "index.json"][0] - cache_file.write_text("corrupted json data", encoding="utf-8") + # Corrupt SQLite database + assert db_path.exists() + with open(db_path, "r+b") as f: + f.seek(100) # Skip SQLite header + f.write(b"corrupted data here") - # Should recover gracefully + # Should recover gracefully (recreates database) output = convert_jsonl_to_html(input_path=project_dir, use_cache=True) assert output.exists() From 53157d2a794e54f65e5aaac6a82898d68110cc4d Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Tue, 13 Jan 2026 20:44:40 +0000 Subject: [PATCH 13/23] ty update + fix sloppy types --- claude_code_log/converter.py | 4 +-- claude_code_log/factories/user_factory.py | 4 +-- claude_code_log/html/renderer_code.py | 29 ++++++++-------- claude_code_log/html/tool_formatters.py | 5 +-- claude_code_log/html/utils.py | 24 +++++++++----- claude_code_log/renderer.py | 8 ++--- pyproject.toml | 6 +++- stubs/pygments/__init__.pyi | 5 +++ stubs/pygments/formatter.pyi | 7 ++++ stubs/pygments/formatters/__init__.pyi | 16 +++++++++ stubs/pygments/lexer.pyi | 7 ++++ stubs/pygments/lexers/__init__.pyi | 20 ++++++++++++ stubs/pygments/util.pyi | 6 ++++ test/test_cache.py | 2 +- test/test_index_timezone.py | 8 ++--- test/test_message_types.py | 6 ++-- uv.lock | 40 +++++++++++------------ 17 files changed, 137 insertions(+), 60 deletions(-) create mode 100644 stubs/pygments/__init__.pyi create mode 100644 stubs/pygments/formatter.pyi create mode 100644 stubs/pygments/formatters/__init__.pyi create mode 100644 stubs/pygments/lexer.pyi create mode 100644 stubs/pygments/lexers/__init__.pyi create mode 100644 stubs/pygments/util.pyi diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index e41a839b..464163b5 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -6,7 +6,7 @@ from dataclasses import dataclass, field from pathlib import Path import traceback -from typing import Any, Dict, List, Optional, TYPE_CHECKING +from typing import Any, Dict, List, Optional, TYPE_CHECKING, cast import dateparser @@ -190,7 +190,7 @@ def load_transcript( isinstance(tool_use_result, dict) and "agentId" in tool_use_result ): - agent_id_value = tool_use_result.get("agentId") # type: ignore[reportUnknownVariableType, reportUnknownMemberType] + agent_id_value = cast(Any, tool_use_result).get("agentId") if isinstance(agent_id_value, str): agent_ids.add(agent_id_value) # Copy agentId to top level for Pydantic to preserve diff --git a/claude_code_log/factories/user_factory.py b/claude_code_log/factories/user_factory.py index f14a4e4d..02cb01fa 100644 --- a/claude_code_log/factories/user_factory.py +++ b/claude_code_log/factories/user_factory.py @@ -445,7 +445,7 @@ def create_user_message( for item in content_list: # Check for text content if hasattr(item, "text"): - item_text: str = getattr(item, "text") # type: ignore[assignment] + item_text: str = getattr(item, "text") if ide_content := create_ide_notification_content(item_text): # Add IDE notification item first @@ -462,6 +462,6 @@ def create_user_message( items.append(item) elif hasattr(item, "source") and getattr(item, "type", None) == "image": # Duck-typed image content - convert to our Pydantic model - items.append(ImageContent.model_validate(item.model_dump())) # type: ignore[union-attr] + items.append(ImageContent.model_validate(item.model_dump())) return UserTextMessage(items=items, meta=meta) diff --git a/claude_code_log/html/renderer_code.py b/claude_code_log/html/renderer_code.py index 7a633d45..8878d82b 100644 --- a/claude_code_log/html/renderer_code.py +++ b/claude_code_log/html/renderer_code.py @@ -12,10 +12,12 @@ import re from typing import Callable, Optional -from pygments import highlight # type: ignore[reportUnknownVariableType] -from pygments.lexers import TextLexer, get_lexer_by_name, get_all_lexers # type: ignore[reportUnknownVariableType] -from pygments.formatters import HtmlFormatter # type: ignore[reportUnknownVariableType] -from pygments.util import ClassNotFound # type: ignore[reportUnknownVariableType] +from pygments import highlight +from pygments.lexer import Lexer +from pygments.lexers import TextLexer, get_lexer_by_name, get_all_lexers +from pygments.formatter import Formatter +from pygments.formatters import HtmlFormatter +from pygments.util import ClassNotFound from ..renderer_timings import timing_stat @@ -49,7 +51,7 @@ def _init_lexer_caches() -> tuple[dict[str, str], dict[str, str]]: extension_cache: dict[str, str] = {} # Use public API: get_all_lexers() returns (name, aliases, patterns, mimetypes) tuples - for name, aliases, patterns, mimetypes in get_all_lexers(): # type: ignore[reportUnknownVariableType] + for _name, aliases, patterns, _mimetypes in get_all_lexers(): if aliases and patterns: # Use first alias as the lexer name lexer_alias = aliases[0] @@ -93,6 +95,9 @@ def highlight_code_with_pygments( # Get basename for matching (patterns are like "*.py") basename = os.path.basename(file_path).lower() + # Default to plain text lexer + lexer: Lexer = TextLexer() + try: # OPTIMIZATION: Try fast extension lookup first (O(1) dict lookup) lexer_alias = None @@ -107,18 +112,16 @@ def highlight_code_with_pygments( lexer_alias = lex_alias break - # Get lexer or use TextLexer as fallback + # Get lexer based on file extension # Note: stripall=False preserves leading whitespace (important for code indentation) if lexer_alias: - lexer = get_lexer_by_name(lexer_alias, stripall=False) # type: ignore[reportUnknownVariableType] - else: - lexer = TextLexer() # type: ignore[reportUnknownVariableType] + lexer = get_lexer_by_name(lexer_alias, stripall=False) except ClassNotFound: - # Fall back to plain text lexer - lexer = TextLexer() # type: ignore[reportUnknownVariableType] + # Fall back to plain text lexer (already set as default) + pass # Create formatter with line numbers in table format - formatter = HtmlFormatter( # type: ignore[reportUnknownVariableType] + formatter: Formatter = HtmlFormatter( linenos="table" if show_linenos else False, cssclass="highlight", wrapcode=True, @@ -127,7 +130,7 @@ def highlight_code_with_pygments( # Highlight the code with timing if enabled with timing_stat("_pygments_timings"): - return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] + return str(highlight(code, lexer, formatter)) def truncate_highlighted_preview(highlighted_html: str, max_lines: int) -> str: diff --git a/claude_code_log/html/tool_formatters.py b/claude_code_log/html/tool_formatters.py index 6ce93fe3..2b99d352 100644 --- a/claude_code_log/html/tool_formatters.py +++ b/claude_code_log/html/tool_formatters.py @@ -542,7 +542,7 @@ def render_params_table(params: dict[str, Any]) -> str: # If value is structured (dict/list), render as JSON if isinstance(value, (dict, list)): try: - formatted_value = json.dumps(value, indent=2, ensure_ascii=False) # type: ignore[arg-type] + formatted_value = json.dumps(value, indent=2, ensure_ascii=False) escaped_value = escape_html(formatted_value) # Make long structured values collapsible @@ -559,7 +559,8 @@ def render_params_table(params: dict[str, Any]) -> str: f"
{escaped_value}
" ) except (TypeError, ValueError): - escaped_value = escape_html(str(value)) # type: ignore[arg-type] + # Fallback: convert to string when JSON serialization fails + escaped_value = escape_html(str(cast(object, value))) value_html = escaped_value else: # Simple value, render as-is (or collapsible if long) diff --git a/claude_code_log/html/utils.py b/claude_code_log/html/utils.py index 8822ab3e..613bab43 100644 --- a/claude_code_log/html/utils.py +++ b/claude_code_log/html/utils.py @@ -200,10 +200,12 @@ def escape_html(text: str) -> str: def _create_pygments_plugin() -> Any: """Create a mistune plugin that uses Pygments for code block syntax highlighting.""" - from pygments import highlight # type: ignore[reportUnknownVariableType] - from pygments.lexers import get_lexer_by_name, TextLexer # type: ignore[reportUnknownVariableType] - from pygments.formatters import HtmlFormatter # type: ignore[reportUnknownVariableType] - from pygments.util import ClassNotFound # type: ignore[reportUnknownVariableType] + from pygments import highlight + from pygments.lexer import Lexer + from pygments.lexers import get_lexer_by_name, TextLexer + from pygments.formatter import Formatter + from pygments.formatters import HtmlFormatter + from pygments.util import ClassNotFound def plugin_pygments(md: Any) -> None: """Plugin to add Pygments syntax highlighting to code blocks.""" @@ -214,19 +216,21 @@ def block_code(code: str, info: Optional[str] = None) -> str: if info: # Language hint provided, use Pygments lang = info.split()[0] if info else "" + # Default to plain text lexer + lexer: Lexer = TextLexer() try: - lexer = get_lexer_by_name(lang, stripall=False) # type: ignore[reportUnknownVariableType] + lexer = get_lexer_by_name(lang, stripall=False) except ClassNotFound: - lexer = TextLexer() # type: ignore[reportUnknownVariableType] + pass # Already have default - formatter = HtmlFormatter( # type: ignore[reportUnknownVariableType] + formatter: Formatter = HtmlFormatter( linenos=False, # No line numbers in markdown code blocks cssclass="highlight", wrapcode=True, ) # Track Pygments timing if enabled with timing_stat("_pygments_timings"): - return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] + return str(highlight(code, lexer, formatter)) else: # No language hint, use default rendering return original_render(code, info) @@ -438,5 +442,7 @@ def get_template_environment() -> Environment: autoescape=select_autoescape(["html", "xml"]), ) # Add custom filters/functions - env.globals["starts_with_emoji"] = starts_with_emoji # type: ignore[index] + # Cast to Any to bypass Jinja2's overly strict globals type + globals_dict: Any = env.globals + globals_dict["starts_with_emoji"] = starts_with_emoji return env diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py index 6eb2f1af..e54594a2 100644 --- a/claude_code_log/renderer.py +++ b/claude_code_log/renderer.py @@ -1531,7 +1531,7 @@ def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]: content = message.content message_content = content if isinstance(content, list) else [] else: - message_content = message.message.content # type: ignore[union-attr] + message_content = message.message.content text_content = extract_text_content(message_content) @@ -1735,16 +1735,16 @@ def _render_messages( ) effective_type = "user" else: - message_content = message.message.content # type: ignore + message_content = message.message.content meta = create_meta(message) effective_type = message_type # Chunk content: regular items (text/image) accumulate, special items (tool/thinking) separate if isinstance(message_content, list): - chunks = chunk_message_content(message_content) # type: ignore[arg-type] + chunks = chunk_message_content(message_content) else: # String content - wrap in list with single TextContent - content_str: str = message_content.strip() if message_content else "" # type: ignore[union-attr] + content_str: str = message_content.strip() if message_content else "" if content_str: chunks: list[ContentChunk] = [ [TextContent(type="text", text=content_str)] # pyright: ignore[reportUnknownArgumentType] diff --git a/pyproject.toml b/pyproject.toml index dc27cb5b..ef3d53a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,10 @@ markers = [ "benchmark: Performance benchmarks that output to GitHub Job Summary", ] +[tool.ty.environment] +# Use custom stubs for untyped libraries +extra-paths = ["stubs"] + [tool.pyright] # Pyright configuration with strict settings include = ["claude_code_log"] # TODO: , "test" @@ -112,7 +116,7 @@ dev = [ "pytest-xdist[psutil]>=3.6.1", "pyright>=1.1.350", "vulture>=2.14", - "ty>=0.0.1a12", + "ty>=0.0.11", "pytest-playwright>=0.7.0", "syrupy>=5.0.0", ] diff --git a/stubs/pygments/__init__.pyi b/stubs/pygments/__init__.pyi new file mode 100644 index 00000000..06d21785 --- /dev/null +++ b/stubs/pygments/__init__.pyi @@ -0,0 +1,5 @@ +"""Type stubs for pygments - minimal stubs for functions used in this project.""" + +from typing import Any + +def highlight(code: str, lexer: Any, formatter: Any, outfile: Any = None) -> str: ... diff --git a/stubs/pygments/formatter.pyi b/stubs/pygments/formatter.pyi new file mode 100644 index 00000000..fdf8cc00 --- /dev/null +++ b/stubs/pygments/formatter.pyi @@ -0,0 +1,7 @@ +"""Type stubs for pygments.formatter - base formatter class.""" + +from typing import Any + +class Formatter: + """Base class for formatters.""" + def __init__(self, **options: Any) -> None: ... diff --git a/stubs/pygments/formatters/__init__.pyi b/stubs/pygments/formatters/__init__.pyi new file mode 100644 index 00000000..8f181ab2 --- /dev/null +++ b/stubs/pygments/formatters/__init__.pyi @@ -0,0 +1,16 @@ +"""Type stubs for pygments.formatters - minimal stubs for functions used in this project.""" + +from typing import Any, Literal + +from ..formatter import Formatter + +class HtmlFormatter(Formatter): + """HTML formatter for syntax highlighted code.""" + def __init__( + self, + linenos: bool | Literal["table", "inline"] = False, + cssclass: str = "highlight", + wrapcode: bool = False, + linenostart: int = 1, + **options: Any, + ) -> None: ... diff --git a/stubs/pygments/lexer.pyi b/stubs/pygments/lexer.pyi new file mode 100644 index 00000000..16f50b37 --- /dev/null +++ b/stubs/pygments/lexer.pyi @@ -0,0 +1,7 @@ +"""Type stubs for pygments.lexer - base lexer class.""" + +from typing import Any + +class Lexer: + """Base class for lexers.""" + def __init__(self, **options: Any) -> None: ... diff --git a/stubs/pygments/lexers/__init__.pyi b/stubs/pygments/lexers/__init__.pyi new file mode 100644 index 00000000..ad20345f --- /dev/null +++ b/stubs/pygments/lexers/__init__.pyi @@ -0,0 +1,20 @@ +"""Type stubs for pygments.lexers - minimal stubs for functions used in this project.""" + +from typing import Any, Iterator + +from ..lexer import Lexer + +class TextLexer(Lexer): + """Plain text lexer.""" + def __init__(self, **options: Any) -> None: ... + +def get_lexer_by_name(name: str, **options: Any) -> Lexer: ... +def get_all_lexers() -> Iterator[ + tuple[str, tuple[str, ...], tuple[str, ...], tuple[str, ...]] +]: + """Get all registered lexers. + + Returns: + Iterator of (name, aliases, patterns, mimetypes) tuples + """ + ... diff --git a/stubs/pygments/util.pyi b/stubs/pygments/util.pyi new file mode 100644 index 00000000..a2d52e07 --- /dev/null +++ b/stubs/pygments/util.pyi @@ -0,0 +1,6 @@ +"""Type stubs for pygments.util - minimal stubs for functions used in this project.""" + +class ClassNotFound(Exception): + """Exception raised when a lexer or formatter class is not found.""" + + ... diff --git a/test/test_cache.py b/test/test_cache.py index e2832ccd..ee843ba6 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -695,7 +695,7 @@ def test_cache_directory_permissions(self, temp_project_dir, mock_version): assert cache_manager is not None except PermissionError: # If we get permission errors, just skip this test - return pytest.skip("Cannot test permissions on this system") # type: ignore[misc] + pytest.skip("Cannot test permissions on this system") finally: # Restore permissions try: diff --git a/test/test_index_timezone.py b/test/test_index_timezone.py index 3bdee10d..7bfdbc86 100644 --- a/test/test_index_timezone.py +++ b/test/test_index_timezone.py @@ -11,7 +11,7 @@ def test_index_timezone_conversion(page): index_path = Path.home() / ".claude" / "projects" / "index.html" if not index_path.exists(): - pytest.skip("Index file not found") # type: ignore[call-non-callable] + pytest.skip("Index file not found") # Load the page page.goto(f"file://{index_path}") @@ -23,7 +23,7 @@ def test_index_timezone_conversion(page): timestamp_elements = page.query_selector_all(".timestamp[data-timestamp]") if len(timestamp_elements) == 0: - pytest.skip("No timestamps found in index page") # type: ignore[call-non-callable] + pytest.skip("No timestamps found in index page") # Get the first timestamp element first_timestamp = timestamp_elements[0] @@ -68,7 +68,7 @@ def test_session_navigation_timezone_conversion(page): test_html_path = Path("/tmp/test_output_tz.html") if not test_html_path.exists(): - pytest.skip("Test HTML file not found") # type: ignore[call-non-callable] + pytest.skip("Test HTML file not found") # Load the page page.goto(f"file://{test_html_path}") @@ -82,7 +82,7 @@ def test_session_navigation_timezone_conversion(page): ) if len(session_timestamps) == 0: - pytest.skip("No session navigation timestamps found") # type: ignore[call-non-callable] + pytest.skip("No session navigation timestamps found") # Get the first session timestamp first_session_ts = session_timestamps[0] diff --git a/test/test_message_types.py b/test/test_message_types.py index 9be0e4c9..78f385a9 100644 --- a/test/test_message_types.py +++ b/test/test_message_types.py @@ -4,6 +4,8 @@ import json import tempfile from pathlib import Path + +from pytest import CaptureFixture from claude_code_log.converter import load_transcript from claude_code_log.html.renderer import generate_html from claude_code_log.models import QueueOperationTranscriptEntry @@ -148,7 +150,7 @@ def test_queue_operation_type_support(): test_file_path.unlink() -def test_load_transcript_missing_file_returns_empty_list(capsys): +def test_load_transcript_missing_file_returns_empty_list(capsys: CaptureFixture[str]): """Test that load_transcript handles missing files gracefully. This handles the race condition where a file exists when globbed but @@ -188,6 +190,6 @@ def test_load_transcript_missing_file_silent_mode(): if __name__ == "__main__": test_summary_type_support() test_queue_operation_type_support() - test_load_transcript_missing_file_returns_empty_list(None) # type: ignore + # test_load_transcript_missing_file_returns_empty_list requires pytest's capsys fixture test_load_transcript_missing_file_silent_mode() print("\n✅ All message type tests passed!") diff --git a/uv.lock b/uv.lock index df5825e1..ed571709 100644 --- a/uv.lock +++ b/uv.lock @@ -175,7 +175,7 @@ dev = [ { name = "pytest-xdist", extras = ["psutil"], specifier = ">=3.6.1" }, { name = "ruff", specifier = ">=0.11.2" }, { name = "syrupy", specifier = ">=5.0.0" }, - { name = "ty", specifier = ">=0.0.1a12" }, + { name = "ty", specifier = ">=0.0.11" }, { name = "vulture", specifier = ">=2.14" }, ] @@ -1252,27 +1252,27 @@ wheels = [ [[package]] name = "ty" -version = "0.0.1a29" +version = "0.0.11" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/82/e1/1a75c95fbb284954b2f6fbbf7fbf1d35f531f50ebe93b23cf53145d1bc1d/ty-0.0.1a29.tar.gz", hash = "sha256:43bb55fd467a057880d62ad4bbb048223fd4fba7d8e4d7d5372a0f4881da83fe", size = 4624122, upload-time = "2025-11-28T20:23:51.728Z" } +sdist = { url = "https://files.pythonhosted.org/packages/bc/45/5ae578480168d4b3c08cf8e5eac3caf8eb7acdb1a06a9bed7519564bd9b4/ty-0.0.11.tar.gz", hash = "sha256:ebcbc7d646847cb6610de1da4ffc849d8b800e29fd1e9ebb81ba8f3fbac88c25", size = 4920340, upload-time = "2026-01-09T21:06:01.592Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/be/49/35034e045ef27ebf220de742c092b1982902740c3ca151ad2072035df77a/ty-0.0.1a29-py3-none-linux_armv6l.whl", hash = "sha256:0276e8e0779046d464dec8415c240cc76b22e22c8c22c227dec2d79395f037be", size = 9581368, upload-time = "2025-11-28T20:24:07.099Z" }, - { url = "https://files.pythonhosted.org/packages/c4/76/350ab2592984907a7ed4a887b4e041ce4afe002ca0dff796c81e06b66e1d/ty-0.0.1a29-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4a47afe9be12667ff521a0f2ed5a0873fb85c1f8330a6680e0e3366f016e3e42", size = 9371567, upload-time = "2025-11-28T20:23:49.634Z" }, - { url = "https://files.pythonhosted.org/packages/7d/f6/27977a0206c9914a2b2be5a96c155cd38cf976492388b58ad09e14c42050/ty-0.0.1a29-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c0041f1c36fac2099bc61aa8cdad18d890ceb15544ba33f522f9967372fb3b82", size = 8889114, upload-time = "2025-11-28T20:23:42.492Z" }, - { url = "https://files.pythonhosted.org/packages/34/1b/4f3c20ae1dac6cdc1c42f020a9fd37733f695bff13c4759ba4d84d1dcd51/ty-0.0.1a29-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46ed30fafdde93968f033bbdd3576f068ebe979c65fd2fcc166b1cff00097e5c", size = 9172880, upload-time = "2025-11-28T20:23:53.906Z" }, - { url = "https://files.pythonhosted.org/packages/fb/76/a671e3e560f37a3f82979637ec362d66363a94e5f23c99cf4f16a9fc737b/ty-0.0.1a29-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3814a7bd8b38d761ea621bf9ae7d1d38a7dde514d9f0e07fb2e70ea5aeeea0f5", size = 9377768, upload-time = "2025-11-28T20:24:11.365Z" }, - { url = "https://files.pythonhosted.org/packages/2b/f3/44b03bb0d96451c473a15bb2ab1dbd828b73f2f3c98f8991c8ac2f8a8083/ty-0.0.1a29-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:466f9eedee4fe17f6d1da352e5174374c935facf3dd4a6c6d301891864ac1797", size = 9756864, upload-time = "2025-11-28T20:24:03.523Z" }, - { url = "https://files.pythonhosted.org/packages/0f/75/056a750c4db3326825e0ba009b018892fff47b56efee8e648e01410f9199/ty-0.0.1a29-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:dc47493923c850ebc3a545b4d738da9924361cc9a9c67fbf4b49786462c5998e", size = 10390516, upload-time = "2025-11-28T20:23:44.891Z" }, - { url = "https://files.pythonhosted.org/packages/9d/1b/e2a764f84cf71ccb65671ce7678fa787d1a73bfffa4804f443c642c691aa/ty-0.0.1a29-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91acf7c35f655c1cb38b7029dc09330fccbc5d18fffb0ecbd7f218518d5fb8d3", size = 10135269, upload-time = "2025-11-28T20:23:59.584Z" }, - { url = "https://files.pythonhosted.org/packages/c9/46/6a7db14e584bd1b3da6b21a02190e218ddc3720a5b699b56039d142c4674/ty-0.0.1a29-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3260e3524d038c08234d77c0b8aecd8c12096ec6df153960591dedbd5688078", size = 10161544, upload-time = "2025-11-28T20:23:47.514Z" }, - { url = "https://files.pythonhosted.org/packages/52/9e/04dfd308788117fe04cc6fe85612ea2945d852c1c9c80150f5aae0d7fe0c/ty-0.0.1a29-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fc361cc48f901727a5f7a05cede0102cd4d1eba4aebc6269cb8ec7db23e6c86", size = 9706468, upload-time = "2025-11-28T20:24:13.565Z" }, - { url = "https://files.pythonhosted.org/packages/3f/86/3c0e21b7d7a4f721f5eb35542ed672c790ef3c0570f5665ff26bad4f3c69/ty-0.0.1a29-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8ef39318e49f0cf7f7d1de79003c0939749dc0bb5e3a54c8c3a15d0c6950dc8f", size = 9142491, upload-time = "2025-11-28T20:24:01.311Z" }, - { url = "https://files.pythonhosted.org/packages/0e/a8/55ce8472174efe1d53a6f25c8e325894e121471ecf4332957c941a503cef/ty-0.0.1a29-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:c6f4288b96d37d714542f2e8475d3822be427ebc573a85fc374c1eda7f0381fc", size = 9405392, upload-time = "2025-11-28T20:23:56.368Z" }, - { url = "https://files.pythonhosted.org/packages/46/a6/2889a049257b0dd5c41ee0ca4c0081959b46184338ed378743f45c3d997d/ty-0.0.1a29-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e0fb272452129ba2cd1445a596a4a85c94ec52cb58fb800ed19a3056d8aa84d5", size = 9516865, upload-time = "2025-11-28T20:24:05.247Z" }, - { url = "https://files.pythonhosted.org/packages/50/78/35e5bdce73d9f631a14cb838b024377a5c7fcc73a2254a993e9060247d52/ty-0.0.1a29-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:fb6d3ac94a95e86d6f3bc9e39b8e7a3e300be4224b1ac7984ccb3136dfa41d77", size = 9815887, upload-time = "2025-11-28T20:24:08.895Z" }, - { url = "https://files.pythonhosted.org/packages/95/f9/6bb402efa8ad252d5e6b39eeb2a920ef85792c9602617bf391df7c40313a/ty-0.0.1a29-py3-none-win32.whl", hash = "sha256:fb4df9f8bf401a42019526c0da72e26de1b9dab3188d1df59ec6ecbd15edce4a", size = 9029263, upload-time = "2025-11-28T20:24:18.057Z" }, - { url = "https://files.pythonhosted.org/packages/be/f0/3e314ee1a369eba776f3f8e9fac535b9703127097b7e52de5aba025d5c99/ty-0.0.1a29-py3-none-win_amd64.whl", hash = "sha256:3908a8b12616c52520bc7dc1a14732c3b86181125b1326444fc37049d2a20c37", size = 9875790, upload-time = "2025-11-28T20:24:15.884Z" }, - { url = "https://files.pythonhosted.org/packages/af/c2/37d81529242602cd486cd112a93312874948d276515e5fb0718b0f99758d/ty-0.0.1a29-py3-none-win_arm64.whl", hash = "sha256:999ae9077f153fd1804b840d21d904850b9047e361a6a67da8d06dadf94a189a", size = 9373509, upload-time = "2025-11-28T20:24:19.941Z" }, + { url = "https://files.pythonhosted.org/packages/0f/34/b1d05cdcd01589a8d2e63011e0a1e24dcefdc2a09d024fee3e27755963f6/ty-0.0.11-py3-none-linux_armv6l.whl", hash = "sha256:68f0b8d07b0a2ea7ec63a08ba2624f853e4f9fa1a06fce47fb453fa279dead5a", size = 9521748, upload-time = "2026-01-09T21:06:13.221Z" }, + { url = "https://files.pythonhosted.org/packages/43/21/f52d93f4b3784b91bfbcabd01b84dc82128f3a9de178536bcf82968f3367/ty-0.0.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:cbf82d7ef0618e9ae3cc3c37c33abcfa302c9b3e3b8ff11d71076f98481cb1a8", size = 9454903, upload-time = "2026-01-09T21:06:42.363Z" }, + { url = "https://files.pythonhosted.org/packages/ad/01/3a563dba8b1255e474c35e1c3810b7589e81ae8c41df401b6a37c8e2cde9/ty-0.0.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:121987c906e02264c3b511b95cb9f8a3cdd66f3283b8bbab678ca3525652e304", size = 8823417, upload-time = "2026-01-09T21:06:26.315Z" }, + { url = "https://files.pythonhosted.org/packages/6f/b1/99b87222c05d3a28fb7bbfb85df4efdde8cb6764a24c1b138f3a615283dd/ty-0.0.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:999390b6cc045fe5e1b3da1c2c9ae8e8c0def23b69455e7c9191ba9ffd747023", size = 9290785, upload-time = "2026-01-09T21:05:59.028Z" }, + { url = "https://files.pythonhosted.org/packages/3d/9f/598809a8fff2194f907ba6de07ac3d7b7788342592d8f8b98b1b50c2fb49/ty-0.0.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed504d78eb613c49be3c848f236b345b6c13dc6bcfc4b202790a60a97e1d8f35", size = 9359392, upload-time = "2026-01-09T21:06:37.459Z" }, + { url = "https://files.pythonhosted.org/packages/71/3e/aeea2a97b38f3dcd9f8224bf83609848efa4bc2f484085508165567daa7b/ty-0.0.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7fedc8b43cc8a9991e0034dd205f957a8380dd29bfce36f2a35b5d321636dfd9", size = 9852973, upload-time = "2026-01-09T21:06:21.245Z" }, + { url = "https://files.pythonhosted.org/packages/72/40/86173116995e38f954811a86339ac4c00a2d8058cc245d3e4903bc4a132c/ty-0.0.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:0808bdfb7efe09881bf70249b85b0498fb8b75fbb036ce251c496c20adb10075", size = 10796113, upload-time = "2026-01-09T21:06:16.034Z" }, + { url = "https://files.pythonhosted.org/packages/69/71/97c92c401dacae9baa3696163ebe8371635ebf34ba9fda781110d0124857/ty-0.0.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:07185b3e38b18c562056dfbc35fb51d866f872977ea1ebcd64ca24a001b5b4f1", size = 10432137, upload-time = "2026-01-09T21:06:07.498Z" }, + { url = "https://files.pythonhosted.org/packages/18/10/9ab43f3cfc5f7792f6bc97620f54d0a0a81ef700be84ea7f6be330936a99/ty-0.0.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5c72f1ada8eb5be984502a600f71d1a3099e12fb6f3c0607aaba2f86f0e9d80", size = 10240520, upload-time = "2026-01-09T21:06:34.823Z" }, + { url = "https://files.pythonhosted.org/packages/74/18/8dd4fe6df1fd66f3e83b4798eddb1d8482d9d9b105f25099b76703402ebb/ty-0.0.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25f88e8789072830348cb59b761d5ced70642ed5600673b4bf6a849af71eca8b", size = 9973340, upload-time = "2026-01-09T21:06:39.657Z" }, + { url = "https://files.pythonhosted.org/packages/e4/0b/fb2301450cf8f2d7164944d6e1e659cac9ec7021556cc173d54947cf8ef4/ty-0.0.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f370e1047a62dcedcd06e2b27e1f0b16c7f8ea2361d9070fcbf0d0d69baaa192", size = 9262101, upload-time = "2026-01-09T21:06:28.989Z" }, + { url = "https://files.pythonhosted.org/packages/f7/8c/d6374af023541072dee1c8bcfe8242669363a670b7619e6fffcc7415a995/ty-0.0.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:52be34047ed6177bfcef9247459a767ec03d775714855e262bca1fb015895e8a", size = 9382756, upload-time = "2026-01-09T21:06:24.097Z" }, + { url = "https://files.pythonhosted.org/packages/0d/44/edd1e63ffa8d49d720c475c2c1c779084e5efe50493afdc261938705d10a/ty-0.0.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b9e5762ccb3778779378020b8d78f936b3f52ea83f18785319cceba3ae85d8e6", size = 9553944, upload-time = "2026-01-09T21:06:18.426Z" }, + { url = "https://files.pythonhosted.org/packages/35/cd/4afdb0d182d23d07ff287740c4954cc6dde5c3aed150ec3f2a1d72b00f71/ty-0.0.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e9334646ee3095e778e3dbc45fdb2bddfc16acc7804283830ad84991ece16dd7", size = 10060365, upload-time = "2026-01-09T21:06:45.083Z" }, + { url = "https://files.pythonhosted.org/packages/d1/94/a009ad9d8b359933cfea8721c689c0331189be28650d74dcc6add4d5bb09/ty-0.0.11-py3-none-win32.whl", hash = "sha256:44cfb7bb2d6784bd7ffe7b5d9ea90851d9c4723729c50b5f0732d4b9a2013cfc", size = 9040448, upload-time = "2026-01-09T21:06:32.241Z" }, + { url = "https://files.pythonhosted.org/packages/df/04/5a5dfd0aec0ea99ead1e824ee6e347fb623c464da7886aa1e3660fb0f36c/ty-0.0.11-py3-none-win_amd64.whl", hash = "sha256:1bb205db92715d4a13343bfd5b0c59ce8c0ca0daa34fb220ec9120fc66ccbda7", size = 9780112, upload-time = "2026-01-09T21:06:04.69Z" }, + { url = "https://files.pythonhosted.org/packages/ad/07/47d4fccd7bcf5eea1c634d518d6cb233f535a85d0b63fcd66815759e2fa0/ty-0.0.11-py3-none-win_arm64.whl", hash = "sha256:4688bd87b2dc5c85da277bda78daba14af2e66f3dda4d98f3604e3de75519eba", size = 9194038, upload-time = "2026-01-09T21:06:10.152Z" }, ] [[package]] From 4ef769f16670c4dc7ddc2bf4bda351ea34efa542 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Tue, 13 Jan 2026 23:38:32 +0000 Subject: [PATCH 14/23] TUI Markdown viewer pagination and tweaks --- claude_code_log/tui.py | 164 ++++++++++++++++++++- test/test_tui.py | 326 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 486 insertions(+), 4 deletions(-) diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index ca8580e7..4536e00c 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -4,7 +4,7 @@ import os import webbrowser from datetime import datetime -from pathlib import Path +from pathlib import Path, PurePath from typing import Any, ClassVar, List, Optional, cast from textual.app import App, ComposeResult @@ -465,9 +465,44 @@ def _handle_restore_project_confirm(self, confirmed: bool | None) -> None: self.populate_table() +class SafeMarkdownViewer(MarkdownViewer): + """MarkdownViewer that handles link clicks safely. + + Intercepts link clicks to prevent crashes from file/external links + while still allowing anchor navigation for ToC. + """ + + async def go(self, location: str | PurePath) -> None: + """Navigate to a new location - intercept non-anchor links. + + Override parent's go() method to handle links appropriately: + - Anchor links (#section): allow default scrolling + - HTTP/HTTPS URLs: open in browser + - Relative file links: show warning (not supported) + """ + location_str = str(location) + + if location_str.startswith("#"): + # Anchor link - allow default scroll behaviour + await super().go(location) + elif location_str.startswith(("http://", "https://")): + # External URL - open in browser + webbrowser.open(location_str) + self.notify(f"Opening in browser: {location_str[:50]}...") + else: + # Relative file link - not supported in embedded viewer + self.notify( + "File links not supported in embedded viewer", + severity="warning", + ) + + class MarkdownViewerScreen(ModalScreen[None]): """Modal screen for viewing Markdown content with table of contents.""" + # Character-based pagination - ~50KB per page for good scroll performance + PAGE_SIZE_CHARS = 50_000 + CSS = """ MarkdownViewerScreen { align: center middle; @@ -498,6 +533,14 @@ class MarkdownViewerScreen(ModalScreen[None]): max-width: 60; } + #pagination-controls { + dock: top; + height: 1; + background: $warning; + color: $text; + text-align: center; + } + #md-footer { dock: bottom; height: 1; @@ -510,20 +553,89 @@ class MarkdownViewerScreen(ModalScreen[None]): BINDINGS: ClassVar[list[BindingType]] = [ Binding("escape", "dismiss", "Close", show=True), Binding("q", "dismiss", "Close", show=False), + Binding("t", "toggle_toc", "Toggle ToC"), + Binding("n", "next_page", "Next page"), + Binding("right", "next_page", "Next page", show=False), + Binding("p", "prev_page", "Prev page"), + Binding("left", "prev_page", "Prev page", show=False), ] def __init__(self, content: str, title: str = "Markdown Viewer") -> None: super().__init__() self.md_content = content self.md_title = title + self._pages = self._split_into_pages(content) + self._current_page = 0 + self._is_paginated = len(self._pages) > 1 + + def _split_into_pages(self, content: str) -> list[str]: + """Split markdown content into pages by character count. + + Splits at section boundaries (## ) when possible to avoid + cutting mid-section, but will split within sections if + a single section exceeds PAGE_SIZE_CHARS. + """ + import re + + if len(content) <= self.PAGE_SIZE_CHARS: + return [content] + + pages: list[str] = [] + current_page = "" + + # Split by level 2 headings, keeping the delimiter + sections = re.split(r"(\n(?=## ))", content) + + for section in sections: + if not section: + continue + + # If adding this section exceeds page size + if len(current_page) + len(section) > self.PAGE_SIZE_CHARS: + # If current page has content, save it + if current_page.strip(): + pages.append(current_page) + current_page = "" + + # If section itself exceeds page size, split it by lines + if len(section) > self.PAGE_SIZE_CHARS: + lines = section.split("\n") + for line in lines: + if len(current_page) + len(line) + 1 > self.PAGE_SIZE_CHARS: + if current_page.strip(): + pages.append(current_page) + current_page = line + "\n" + else: + current_page += line + "\n" + else: + current_page = section + else: + current_page += section + + # Don't forget the last page + if current_page.strip(): + pages.append(current_page) + + return pages if pages else [content] def compose(self) -> ComposeResult: with Container(id="md-container"): yield Static(self.md_title, id="md-header") - yield MarkdownViewer( - self.md_content, id="md-viewer", show_table_of_contents=True + if self._is_paginated: + yield Static( + f"Page {self._current_page + 1}/{len(self._pages)} | " + "← or p: prev | → or n: next", + id="pagination-controls", + ) + yield SafeMarkdownViewer( + self._pages[self._current_page], + id="md-viewer", + show_table_of_contents=True, ) - yield Static("Press ESC or q to close | t: toggle ToC", id="md-footer") + footer_text = "Press ESC or q to close | t: toggle ToC" + if self._is_paginated: + footer_text += " | n/p: navigate pages" + yield Static(footer_text, id="md-footer") def on_mount(self) -> None: """Customize ToC tree after mount.""" @@ -586,6 +698,50 @@ def _clean_toc_labels(self, node: Any) -> None: async def action_dismiss(self, result: None = None) -> None: self.dismiss(result) + def action_toggle_toc(self) -> None: + """Toggle table of contents visibility.""" + viewer = self.query_one("#md-viewer", MarkdownViewer) + viewer.show_table_of_contents = not viewer.show_table_of_contents + + def action_next_page(self) -> None: + """Navigate to next page (if paginated).""" + if not self._is_paginated: + return + if self._current_page < len(self._pages) - 1: + self._current_page += 1 + if self.is_mounted: + self._update_viewer_content() + + def action_prev_page(self) -> None: + """Navigate to previous page (if paginated).""" + if not self._is_paginated: + return + if self._current_page > 0: + self._current_page -= 1 + if self.is_mounted: + self._update_viewer_content() + + def _update_viewer_content(self) -> None: + """Update the markdown viewer with current page content.""" + try: + # Update pagination controls + controls = self.query_one("#pagination-controls", Static) + controls.update( + f"Page {self._current_page + 1}/{len(self._pages)} | ← or p: prev | → or n: next" + ) + + # Update the markdown content directly + viewer = self.query_one("#md-viewer", SafeMarkdownViewer) + viewer.document.update(self._pages[self._current_page]) + + # Scroll to top of content + viewer.scroll_home(animate=False) + + # Re-customize ToC after content loads + self.call_later(self._customize_toc_tree) + except Exception as e: + self.notify(f"Error updating page: {e}", severity="error") + class ArchiveConfirmScreen(ModalScreen[bool]): """Modal screen for confirming session archiving (delete JSONL, keep cache).""" diff --git a/test/test_tui.py b/test/test_tui.py index 9009a490..0bb26dcc 100644 --- a/test/test_tui.py +++ b/test/test_tui.py @@ -1933,3 +1933,329 @@ async def test_restore_project_not_archived_shows_warning(self): # Try to restore - should show warning await pilot.press("r") await pilot.pause(0.1) + + +@pytest.mark.tui +class TestMarkdownViewerScreen: + """Tests for the MarkdownViewerScreen modal.""" + + @pytest.mark.asyncio + async def test_toc_toggle_binding_exists(self): + """Test that 't' key binding exists for ToC toggle.""" + from claude_code_log.tui import MarkdownViewerScreen + + binding_keys = [ + b.key if hasattr(b, "key") else b[0] for b in MarkdownViewerScreen.BINDINGS + ] + assert "t" in binding_keys, "Should have 't' binding for ToC toggle" + + @pytest.mark.asyncio + async def test_toc_toggle_action_toggles_visibility(self): + """Test that pressing 't' toggles ToC visibility.""" + from claude_code_log.tui import MarkdownViewerScreen + from textual.app import App + from textual.widgets import MarkdownViewer + + content = "# Heading 1\n\nSome content\n\n## Heading 2\n\nMore content" + screen = MarkdownViewerScreen(content, "Test Title") + + class TestApp(App): + def compose(self): + yield from [] + + app = TestApp() + async with app.run_test() as pilot: + app.push_screen(screen) + await pilot.pause(0.3) + + viewer = screen.query_one("#md-viewer", MarkdownViewer) + + # Initial state: ToC visible + assert viewer.show_table_of_contents is True + + # Press 't' to toggle + await pilot.press("t") + await pilot.pause(0.1) + + # ToC should now be hidden + assert viewer.show_table_of_contents is False + + # Press 't' again + await pilot.press("t") + await pilot.pause(0.1) + + # ToC should be visible again + assert viewer.show_table_of_contents is True + + @pytest.mark.asyncio + async def test_safe_markdown_viewer_overrides_go(self): + """Test that SafeMarkdownViewer overrides the go method.""" + from claude_code_log.tui import SafeMarkdownViewer + from textual.widgets import MarkdownViewer + + # SafeMarkdownViewer should have its own go method + assert "go" in SafeMarkdownViewer.__dict__, "Should override go method" + # And it should be different from the parent + assert SafeMarkdownViewer.go is not MarkdownViewer.go + + @pytest.mark.asyncio + async def test_file_link_click_does_not_crash(self): + """Test that clicking file link shows notification instead of crashing.""" + from claude_code_log.tui import MarkdownViewerScreen, SafeMarkdownViewer + from textual.app import App + from textual.widgets.markdown import Markdown + + content = "# Test\n\n[Back to combined](combined_transcripts.md)" + screen = MarkdownViewerScreen(content, "Link Test") + + class TestApp(App): + def compose(self): + yield from [] + + app = TestApp() + notifications = [] + + async with app.run_test() as pilot: + app.push_screen(screen) + await pilot.pause(0.3) + + # Track notifications on the viewer (where they're called from) + viewer = screen.query_one("#md-viewer", SafeMarkdownViewer) + original_notify = viewer.notify + + def tracking_notify( + message: str, + *, + title: str = "", + severity: str = "information", + timeout: float | None = None, + markup: bool = True, + ) -> None: + notifications.append(str(message)) + original_notify( + message, + title=title, + severity=severity, # type: ignore[arg-type] + timeout=timeout, + markup=markup, + ) + + viewer.notify = tracking_notify # type: ignore[method-assign] + + # Simulate link click by posting the event + markdown_widget = viewer.query_one(Markdown) + markdown_widget.post_message( + Markdown.LinkClicked(markdown_widget, "combined_transcripts.md") + ) + await pilot.pause(0.2) + + # Should not crash - screen still mounted + assert screen.is_mounted + # Should have shown a notification + assert len(notifications) > 0 + assert any("not supported" in n.lower() for n in notifications) + + @pytest.mark.asyncio + async def test_http_link_opens_browser(self): + """Test that HTTP links open in browser.""" + from claude_code_log.tui import MarkdownViewerScreen, SafeMarkdownViewer + from textual.app import App + from textual.widgets.markdown import Markdown + + content = "# Test\n\n[Example](https://example.com)" + screen = MarkdownViewerScreen(content, "Link Test") + + class TestApp(App): + def compose(self): + yield from [] + + app = TestApp() + + with patch("claude_code_log.tui.webbrowser.open") as mock_open: + async with app.run_test() as pilot: + app.push_screen(screen) + await pilot.pause(0.3) + + viewer = screen.query_one("#md-viewer", SafeMarkdownViewer) + markdown_widget = viewer.query_one(Markdown) + markdown_widget.post_message( + Markdown.LinkClicked(markdown_widget, "https://example.com") + ) + await pilot.pause(0.2) + + # Should be called at least once (may be called twice due to event propagation) + mock_open.assert_called_with("https://example.com") + assert mock_open.call_count >= 1 + + +@pytest.mark.tui +class TestMarkdownViewerPagination: + """Tests for pagination in MarkdownViewerScreen.""" + + @pytest.mark.asyncio + async def test_pagination_constants_defined(self): + """Test that pagination constants exist.""" + from claude_code_log.tui import MarkdownViewerScreen + + assert hasattr(MarkdownViewerScreen, "PAGE_SIZE_CHARS"), ( + "Should have PAGE_SIZE_CHARS constant" + ) + assert MarkdownViewerScreen.PAGE_SIZE_CHARS > 0 + + @pytest.mark.asyncio + async def test_small_content_no_pagination(self): + """Test that small content loads without pagination controls.""" + from claude_code_log.tui import MarkdownViewerScreen + from textual.app import App + + small_content = "# Small\n\nJust a bit of content." + screen = MarkdownViewerScreen(small_content, "Small Test") + + class TestApp(App): + def compose(self): + yield from [] + + app = TestApp() + async with app.run_test() as pilot: + app.push_screen(screen) + await pilot.pause(0.3) + + # Should NOT have pagination controls + try: + screen.query_one("#pagination-controls") + assert False, "Small content should not show pagination controls" + except NoMatches: + pass # Expected - no pagination for small content + + @pytest.mark.asyncio + async def test_large_content_shows_pagination(self): + """Test that large content shows pagination controls.""" + from claude_code_log.tui import MarkdownViewerScreen + from textual.app import App + + # Generate content larger than PAGE_SIZE_CHARS to trigger pagination + # Use line breaks so the algorithm can split properly + page_size = MarkdownViewerScreen.PAGE_SIZE_CHARS + line = "Content line with some text here.\n" + num_lines = int(page_size * 2.5 / len(line)) + large_content = "# Large Session\n\n" + (line * num_lines) + + screen = MarkdownViewerScreen(large_content, "Large Test") + + # Screen should be paginated (test without UI for speed) + assert screen._is_paginated + assert len(screen._pages) >= 2 + + class TestApp(App): + def compose(self): + yield from [] + + app = TestApp() + async with app.run_test() as pilot: + app.push_screen(screen) + await pilot.pause(0.5) + + # Should have pagination controls + controls = screen.query_one("#pagination-controls") + assert controls is not None + + @pytest.mark.asyncio + async def test_pagination_bindings_exist(self): + """Test that pagination key bindings exist.""" + from claude_code_log.tui import MarkdownViewerScreen + + binding_keys = [ + b.key if hasattr(b, "key") else b[0] for b in MarkdownViewerScreen.BINDINGS + ] + assert "n" in binding_keys, "Should have 'n' binding for next page" + assert "p" in binding_keys, "Should have 'p' binding for previous page" + assert "right" in binding_keys, ( + "Should have 'right' arrow binding for next page" + ) + assert "left" in binding_keys, "Should have 'left' arrow binding for prev page" + + @pytest.mark.asyncio + async def test_next_page_action_updates_state(self): + """Test that action_next_page advances internal page state.""" + from claude_code_log.tui import MarkdownViewerScreen + + # Generate content larger than PAGE_SIZE_CHARS (creates 3+ pages) + # Use line breaks so the algorithm can split properly + page_size = MarkdownViewerScreen.PAGE_SIZE_CHARS + line = "Content line with some text here.\n" + num_lines = int(page_size * 2.5 / len(line)) + large_content = "# Large Session\n\n" + (line * num_lines) + + screen = MarkdownViewerScreen(large_content, "Pagination Test") + + # Initial page should be 0 + assert screen._current_page == 0 + assert screen._is_paginated + assert len(screen._pages) >= 3, f"Expected 3+ pages, got {len(screen._pages)}" + + # Call action directly (bypass UI) + screen.action_next_page() + assert screen._current_page == 1 + + screen.action_next_page() + assert screen._current_page == 2 + + @pytest.mark.asyncio + async def test_prev_page_action_updates_state(self): + """Test that action_prev_page goes to previous page.""" + from claude_code_log.tui import MarkdownViewerScreen + + # Generate content larger than PAGE_SIZE_CHARS (creates 3+ pages) + # Use line breaks so the algorithm can split properly + page_size = MarkdownViewerScreen.PAGE_SIZE_CHARS + line = "Content line with some text here.\n" + num_lines = int(page_size * 2.5 / len(line)) + large_content = "# Large Session\n\n" + (line * num_lines) + + screen = MarkdownViewerScreen(large_content, "Pagination Test") + + # Verify we have enough pages + assert len(screen._pages) >= 3, f"Expected 3+ pages, got {len(screen._pages)}" + + # Go forward first + screen.action_next_page() + screen.action_next_page() + assert screen._current_page == 2 + + # Now go back + screen.action_prev_page() + assert screen._current_page == 1 + + screen.action_prev_page() + assert screen._current_page == 0 + + @pytest.mark.asyncio + async def test_page_boundaries_respected(self): + """Test can't go past first or last page.""" + from claude_code_log.tui import MarkdownViewerScreen + + # Generate content larger than PAGE_SIZE_CHARS + # Use line breaks so the algorithm can split properly + page_size = MarkdownViewerScreen.PAGE_SIZE_CHARS + line = "Content line with some text here.\n" + num_lines = int(page_size * 2.5 / len(line)) + large_content = "# Large Session\n\n" + (line * num_lines) + + screen = MarkdownViewerScreen(large_content, "Pagination Test") + + # On first page, prev should stay on first page + assert screen._current_page == 0 + screen.action_prev_page() + assert screen._current_page == 0 + + # Go to last page + total_pages = len(screen._pages) + for _ in range(total_pages + 5): # Call more than needed + screen.action_next_page() + + # Should be on last page, not beyond + assert screen._current_page == total_pages - 1 + + # Try to go beyond last page + screen.action_next_page() + assert screen._current_page == total_pages - 1 From 6b0f1193e1cd760f50fc562aa34a95382ba5ea02 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Sun, 18 Jan 2026 23:21:02 +0000 Subject: [PATCH 15/23] CodeRabbit feedback --- claude_code_log/tui.py | 4 ++-- test/test_tui.py | 30 +++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 4536e00c..79c9e1b6 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -111,7 +111,7 @@ def populate_table(self) -> None: table.clear(columns=True) # Add columns - table.add_column("Project", width=self.size.width - 13) + table.add_column("Project", width=max(20, self.size.width - 13)) table.add_column("Sessions", width=10) # Add rows @@ -1515,7 +1515,7 @@ def format_timestamp( elif short_format: return dt.strftime("%m-%d %H:%M") else: - return dt.strftime("%m-%d %H:%M") + return dt.strftime("%Y-%m-%d %H:%M") except (ValueError, AttributeError): return "Unknown" diff --git a/test/test_tui.py b/test/test_tui.py index 0bb26dcc..dc45ba24 100644 --- a/test/test_tui.py +++ b/test/test_tui.py @@ -560,9 +560,15 @@ def test_format_timestamp(self, temp_project_dir): """Test timestamp formatting.""" app = SessionBrowser(temp_project_dir) - # Test valid timestamp + # Test valid timestamp (default long format includes year) formatted = app.format_timestamp("2025-01-01T10:00:00Z") - assert formatted == "01-01 10:00" + assert formatted == "2025-01-01 10:00" + + # Test short format (no year) + formatted_short = app.format_timestamp( + "2025-01-01T10:00:00Z", short_format=True + ) + assert formatted_short == "01-01 10:00" # Test date only formatted_date = app.format_timestamp("2025-01-01T10:00:00Z", date_only=True) @@ -1691,11 +1697,15 @@ async def test_enter_key_selects_project(self): with tempfile.TemporaryDirectory() as temp_dir: project1 = Path(temp_dir) / "project1" project1.mkdir() - (project1 / "session-1.jsonl").write_text('{"type":"user"}\n') + (project1 / "session-1.jsonl").write_text( + '{"type":"user"}\n', encoding="utf-8" + ) project2 = Path(temp_dir) / "project2" project2.mkdir() - (project2 / "session-2.jsonl").write_text('{"type":"user"}\n') + (project2 / "session-2.jsonl").write_text( + '{"type":"user"}\n', encoding="utf-8" + ) app = ProjectSelector( projects=[project1, project2], @@ -1738,8 +1748,8 @@ async def test_archive_project_action(self): project_path.mkdir() jsonl1 = project_path / "session-1.jsonl" jsonl2 = project_path / "session-2.jsonl" - jsonl1.write_text('{"type":"user"}\n') - jsonl2.write_text('{"type":"user"}\n') + jsonl1.write_text('{"type":"user"}\n', encoding="utf-8") + jsonl2.write_text('{"type":"user"}\n', encoding="utf-8") app = ProjectSelector( projects=[project_path], @@ -1800,7 +1810,7 @@ async def test_delete_project_cache_only(self): project_path = Path(temp_dir) / "project1" project_path.mkdir() jsonl = project_path / "session-1.jsonl" - jsonl.write_text('{"type":"user"}\n') + jsonl.write_text('{"type":"user"}\n', encoding="utf-8") app = ProjectSelector( projects=[project_path], @@ -1831,7 +1841,7 @@ async def test_delete_project_both(self): project_path = Path(temp_dir) / "project1" project_path.mkdir() jsonl = project_path / "session-1.jsonl" - jsonl.write_text('{"type":"user"}\n') + jsonl.write_text('{"type":"user"}\n', encoding="utf-8") app = ProjectSelector( projects=[project_path], @@ -1916,7 +1926,9 @@ async def test_restore_project_not_archived_shows_warning(self): with tempfile.TemporaryDirectory() as temp_dir: project_path = Path(temp_dir) / "project1" project_path.mkdir() - (project_path / "session-1.jsonl").write_text('{"type":"user"}\n') + (project_path / "session-1.jsonl").write_text( + '{"type":"user"}\n', encoding="utf-8" + ) app = ProjectSelector( projects=[project_path], From 2d071c69b4b9783b56c199398f70ae3e8fccc369 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 19 Jan 2026 00:28:42 +0000 Subject: [PATCH 16/23] Last little fixes --- claude_code_log/cache.py | 40 +++++++++- claude_code_log/converter.py | 12 ++- test/test_cache.py | 143 +++++++++++++++++++++++++++++++++++ 3 files changed, 193 insertions(+), 2 deletions(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index ebc4363a..924121cf 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -4,6 +4,7 @@ import json import logging import os +import re import sqlite3 import zlib from contextlib import contextmanager @@ -294,13 +295,50 @@ def _update_last_updated(self, conn: sqlite3.Connection) -> None: (datetime.now().isoformat(), self._project_id), ) + def _normalize_timestamp(self, timestamp: Optional[str]) -> Optional[str]: + """Normalize timestamp to consistent format for reliable string comparison. + + Converts various ISO 8601 formats to a canonical form: + - Strips fractional seconds (e.g., '.875368') + - Normalizes timezone to 'Z' suffix + + This ensures lexicographic string comparison works correctly in SQL queries. + Without normalization, '2023-01-01T10:00:00.5Z' < '2023-01-01T10:00:00Z' + because '.' < 'Z' in ASCII, even though the first is 500ms later. + + Args: + timestamp: ISO 8601 timestamp string, or None + + Returns: + Normalized timestamp in 'YYYY-MM-DDTHH:MM:SSZ' format, or None + """ + if timestamp is None: + return None + + # Pattern matches: YYYY-MM-DDTHH:MM:SS followed by optional fractional seconds + # and timezone (Z or +HH:MM or +HH or +HHMM) + match = re.match( + r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})" # Base datetime + r"(?:\.\d+)?" # Optional fractional seconds (discard) + r"(?:Z|[+-]\d{2}:?\d{0,2})?$", # Optional timezone + timestamp, + ) + + if match: + # Return just the base datetime with Z suffix + return match.group(1) + "Z" + + # If pattern doesn't match, return original (shouldn't happen with valid data) + return timestamp + def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, Any]: """Convert TranscriptEntry to dict for SQLite insertion.""" + raw_timestamp = getattr(entry, "timestamp", None) base: Dict[str, Any] = { "project_id": self._project_id, "file_id": file_id, "type": entry.type, - "timestamp": getattr(entry, "timestamp", None), + "timestamp": self._normalize_timestamp(raw_timestamp), "session_id": getattr(entry, "sessionId", None), "_uuid": getattr(entry, "uuid", None), "_parent_uuid": getattr(entry, "parentUuid", None), diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index 464163b5..b22aa7bf 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -1027,7 +1027,17 @@ def convert_jsonl_to( assert cache_manager is not None # Ensured by use_pagination condition # Use cached session data if available, otherwise build from messages if cached_data is not None: - session_data = cached_data.sessions + warmup_session_ids = get_warmup_session_ids(messages) + current_session_ids: set[str] = set() + for message in messages: + session_id = getattr(message, "sessionId", "") + if session_id and session_id not in warmup_session_ids: + current_session_ids.add(session_id) + session_data = { + session_id: session_cache + for session_id, session_cache in cached_data.sessions.items() + if session_id in current_session_ids + } else: session_data = _build_session_data_from_messages(messages) output_path = _generate_paginated_html( diff --git a/test/test_cache.py b/test/test_cache.py index ee843ba6..71a57b34 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -327,6 +327,149 @@ def test_filtered_loading_with_z_suffix_boundary( ) assert "End of day message" in str(user_messages[0].message.content) + def test_filtered_loading_with_mixed_timestamp_formats( + self, cache_manager, temp_project_dir + ): + """Test filtering with mixed timestamp formats (with/without fractional seconds). + + This tests the bug where timestamps like '2023-01-01T10:00:00.875368Z' + were incorrectly compared against filter bounds like '2023-01-01T10:00:00Z'. + String comparison fails because '.' < 'Z' alphabetically, causing the + timestamp with microseconds to be incorrectly excluded even though it's + actually 875ms AFTER the filter bound. + """ + entries = [ + UserTranscriptEntry( + parentUuid=None, + isSidechain=False, + userType="user", + cwd="/test", + sessionId="session1", + version="1.0.0", + uuid="user1", + timestamp="2023-01-01T10:00:00Z", # No fractional seconds + type="user", + message=UserMessageModel( + role="user", + content=[ + TextContent(type="text", text="Message without microseconds") + ], + ), + ), + UserTranscriptEntry( + parentUuid=None, + isSidechain=False, + userType="user", + cwd="/test", + sessionId="session1", + version="1.0.0", + uuid="user2", + timestamp="2023-01-01T10:00:00.875368Z", # With microseconds - same second + type="user", + message=UserMessageModel( + role="user", + content=[ + TextContent(type="text", text="Message with microseconds") + ], + ), + ), + UserTranscriptEntry( + parentUuid=None, + isSidechain=False, + userType="user", + cwd="/test", + sessionId="session1", + version="1.0.0", + uuid="user3", + timestamp="2023-01-01T10:00:01.123456Z", # Next second with microseconds + type="user", + message=UserMessageModel( + role="user", + content=[TextContent(type="text", text="Message next second")], + ), + ), + ] + + jsonl_path = temp_project_dir / "test.jsonl" + jsonl_path.write_text("dummy content", encoding="utf-8") + + cache_manager.save_cached_entries(jsonl_path, entries) + + # Filter with from_date at exactly 10:00:00 - should include ALL messages + # The bug would cause the microsecond messages to be excluded because + # '2023-01-01T10:00:00.875368Z' < '2023-01-01T10:00:00Z' in string comparison + filtered = cache_manager.load_cached_entries_filtered( + jsonl_path, "2023-01-01 10:00:00", "2023-01-01 10:00:01" + ) + + assert filtered is not None + user_messages = [entry for entry in filtered if entry.type == "user"] + + # All 3 messages should be included + assert len(user_messages) == 3, ( + f"Expected 3 messages, got {len(user_messages)}. " + "Messages with fractional seconds may have been incorrectly excluded " + "due to string comparison where '.' < 'Z'." + ) + + def test_timestamp_ordering_with_mixed_formats( + self, cache_manager, temp_project_dir + ): + """Test that timestamps are correctly ordered regardless of format. + + Without normalization, ORDER BY timestamp would sort: + - '2023-01-01T10:00:00.5Z' BEFORE '2023-01-01T10:00:00Z' + because '.' < 'Z' in ASCII, even though .5 seconds is AFTER 0 seconds. + """ + entries = [ + UserTranscriptEntry( + parentUuid=None, + isSidechain=False, + userType="user", + cwd="/test", + sessionId="session1", + version="1.0.0", + uuid="user1", + timestamp="2023-01-01T10:00:00.500000Z", # 500ms into the second + type="user", + message=UserMessageModel( + role="user", + content=[TextContent(type="text", text="Second message (500ms)")], + ), + ), + UserTranscriptEntry( + parentUuid=None, + isSidechain=False, + userType="user", + cwd="/test", + sessionId="session1", + version="1.0.0", + uuid="user2", + timestamp="2023-01-01T10:00:00Z", # Start of the second + type="user", + message=UserMessageModel( + role="user", + content=[TextContent(type="text", text="First message (0ms)")], + ), + ), + ] + + jsonl_path = temp_project_dir / "test.jsonl" + jsonl_path.write_text("dummy content", encoding="utf-8") + + cache_manager.save_cached_entries(jsonl_path, entries) + + # Load all entries - they should be in timestamp order + loaded = cache_manager.load_cached_entries(jsonl_path) + + assert loaded is not None + user_messages = [entry for entry in loaded if entry.type == "user"] + + # With normalization to second precision, both messages have the same + # normalized timestamp, so order may vary. The key thing is that the + # filtering works correctly - ordering within the same second is less critical. + assert len(user_messages) == 2 + def test_clear_cache(self, cache_manager, temp_project_dir, sample_entries): """Test cache clearing functionality.""" jsonl_path = temp_project_dir / "test.jsonl" From 99312262528cc62529a333c1bfa4f046df04ac70 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Mon, 19 Jan 2026 23:54:41 +0100 Subject: [PATCH 17/23] Fix 'h' action with relative --projects-dir path Resolve project_path to absolute at SessionBrowser init so file:// URLs work correctly in the browser regardless of how the path was specified. Co-Authored-By: Claude Opus 4.5 --- claude_code_log/tui.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 79c9e1b6..8e9cb718 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -1212,9 +1212,9 @@ def __init__(self, project_path: Path, is_archived: bool = False): """Initialize the session browser with a project path.""" super().__init__() self.theme = "gruvbox" - self.project_path = project_path + self.project_path = project_path.resolve() self.is_archived_project = is_archived - self.cache_manager = CacheManager(project_path, get_library_version()) + self.cache_manager = CacheManager(self.project_path, get_library_version()) self.sessions = {} self.archived_sessions = {} From 6878843098d96799d33302cc8528c6639e32df29 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Tue, 20 Jan 2026 23:09:08 +0100 Subject: [PATCH 18/23] Fix Markdown viewer focus/scroll behavior with TOC - Preserve scroll position when toggling TOC visibility - Tab/Shift+Tab switch focus between TOC and content without scrolling - Hide TOC: focus moves to content, position preserved - Show TOC: focus moves to outline tree - Use on_key handler to intercept Tab before child widgets consume it - Focus Tree widget inside TOC and document inside viewer (not containers) Co-Authored-By: Claude Opus 4.5 --- claude_code_log/tui.py | 63 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 8e9cb718..8c0387f5 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -554,6 +554,8 @@ class MarkdownViewerScreen(ModalScreen[None]): Binding("escape", "dismiss", "Close", show=True), Binding("q", "dismiss", "Close", show=False), Binding("t", "toggle_toc", "Toggle ToC"), + Binding("tab", "switch_focus", "Switch focus", show=False, priority=True), + Binding("shift+tab", "switch_focus", "Switch focus", show=False, priority=True), Binding("n", "next_page", "Next page"), Binding("right", "next_page", "Next page", show=False), Binding("p", "prev_page", "Prev page"), @@ -698,10 +700,65 @@ def _clean_toc_labels(self, node: Any) -> None: async def action_dismiss(self, result: None = None) -> None: self.dismiss(result) + def on_key(self, event: Any) -> None: + """Intercept Tab keys to handle focus switching without scroll.""" + if event.key in ("tab", "shift+tab"): + event.prevent_default() + event.stop() + self.action_switch_focus() + + def _focus_viewer_content(self, viewer: SafeMarkdownViewer) -> None: + """Focus the viewer's document content without scrolling.""" + # MarkdownViewer is a container; focus its document widget + try: + viewer.document.focus(scroll_visible=False) + except Exception: + viewer.focus(scroll_visible=False) + def action_toggle_toc(self) -> None: - """Toggle table of contents visibility.""" - viewer = self.query_one("#md-viewer", MarkdownViewer) - viewer.show_table_of_contents = not viewer.show_table_of_contents + """Toggle table of contents visibility, preserving scroll position.""" + viewer = self.query_one("#md-viewer", SafeMarkdownViewer) + scroll_y = viewer.scroll_y + will_show_toc = not viewer.show_table_of_contents + viewer.show_table_of_contents = will_show_toc + + def restore_and_focus() -> None: + viewer.scroll_to(y=scroll_y, animate=False) + if will_show_toc: + # Focus the Tree inside TOC when showing + try: + toc = viewer.table_of_contents + tree = cast("Tree[Any]", toc.query_one(Tree)) + tree.focus(scroll_visible=False) + except Exception: + pass + else: + # Focus the document content when hiding TOC + self._focus_viewer_content(viewer) + + self.call_later(restore_and_focus) + + def action_switch_focus(self) -> None: + """Switch focus between TOC and content without scrolling.""" + viewer = self.query_one("#md-viewer", SafeMarkdownViewer) + if not viewer.show_table_of_contents: + # TOC hidden, just focus the document + self._focus_viewer_content(viewer) + return + + try: + toc = viewer.table_of_contents + # Get the Tree widget inside the TOC + tree = cast("Tree[Any]", toc.query_one(Tree)) + if tree.has_focus: + # Currently in TOC tree, switch to document + self._focus_viewer_content(viewer) + else: + # Currently in document, switch to TOC tree + tree.focus(scroll_visible=False) + except Exception as e: + self.notify(f"Focus switch error: {e}", severity="warning") + self._focus_viewer_content(viewer) def action_next_page(self) -> None: """Navigate to next page (if paginated).""" From 2aa677406941620e2bc79d37b5374f54f84a6878 Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Tue, 20 Jan 2026 23:26:54 +0100 Subject: [PATCH 19/23] Fix test_init assertion for Windows short paths Compare resolved paths since SessionBrowser.project_path is now resolved. Fixes Windows CI where temp paths use 8.3 short names (RUNNER~1). Co-Authored-By: Claude Opus 4.5 --- test/test_tui.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_tui.py b/test/test_tui.py index dc45ba24..8e78f6cb 100644 --- a/test/test_tui.py +++ b/test/test_tui.py @@ -115,7 +115,8 @@ class TestSessionBrowser: def test_init(self, temp_project_dir): """Test SessionBrowser initialization.""" app = SessionBrowser(temp_project_dir) - assert app.project_path == temp_project_dir + # SessionBrowser resolves path, so compare resolved paths + assert app.project_path == temp_project_dir.resolve() assert isinstance(app.cache_manager, CacheManager) assert app.sessions == {} assert app.selected_session_id is None From afb2f8090a378743e8c3bd8d80eb424ca1de1a4c Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Tue, 20 Jan 2026 23:57:36 +0100 Subject: [PATCH 20/23] Fix Markdown viewer maximize and focus behavior - Override action_maximize to maximize entire MarkdownViewer, not children - Set ALLOW_MAXIMIZE=True on SafeMarkdownViewer - Make document focusable for proper keyboard navigation - Fix test_init for Windows short path resolution Co-Authored-By: Claude Opus 4.5 --- claude_code_log/tui.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py index 8c0387f5..1eb1e233 100644 --- a/claude_code_log/tui.py +++ b/claude_code_log/tui.py @@ -472,6 +472,14 @@ class SafeMarkdownViewer(MarkdownViewer): while still allowing anchor navigation for ToC. """ + # Allow maximizing the viewer (screen will redirect children to this) + ALLOW_MAXIMIZE = True + + def on_mount(self) -> None: + """Configure document for proper keyboard navigation.""" + # Enable focus on the document so keys work after focus changes + self.document.can_focus = True + async def go(self, location: str | PurePath) -> None: """Navigate to a new location - intercept non-anchor links. @@ -700,6 +708,14 @@ def _clean_toc_labels(self, node: Any) -> None: async def action_dismiss(self, result: None = None) -> None: self.dismiss(result) + def action_maximize(self) -> None: + """Maximize the MarkdownViewer (not individual children).""" + try: + viewer = self.query_one("#md-viewer", SafeMarkdownViewer) + self.maximize(viewer) + except Exception: + pass + def on_key(self, event: Any) -> None: """Intercept Tab keys to handle focus switching without scroll.""" if event.key in ("tab", "shift+tab"): From ed0dc10611b19b68eac3265efa6319234ccc371d Mon Sep 17 00:00:00 2001 From: Christian Boos Date: Wed, 21 Jan 2026 00:57:42 +0100 Subject: [PATCH 21/23] Fix test_export_action_with_selection for Windows short paths Use resolved path when checking browser call since SessionBrowser resolves project_path internally. Co-Authored-By: Claude Opus 4.5 --- test/test_tui.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_tui.py b/test/test_tui.py index 8e78f6cb..6e37f2b8 100644 --- a/test/test_tui.py +++ b/test/test_tui.py @@ -410,7 +410,8 @@ async def test_export_action_with_selection(self, temp_project_dir): app.action_export_selected() # Check that browser was opened with the session HTML file - expected_file = temp_project_dir / "session-session-123.html" + # Use resolved path since SessionBrowser resolves project_path + expected_file = temp_project_dir.resolve() / "session-session-123.html" mock_browser.assert_called_once_with(f"file://{expected_file}") @pytest.mark.asyncio From 449bae40e6c5047088eae9b03c310305d50b3e69 Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Mon, 19 Jan 2026 14:37:42 +0000 Subject: [PATCH 22/23] Fix confusing get_page_size_config method --- claude_code_log/cache.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 924121cf..67109652 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -1123,7 +1123,10 @@ def load_session_entries(self, session_id: str) -> List[TranscriptEntry]: # ========== Page Cache Methods (Pagination) ========== def get_page_size_config(self) -> Optional[int]: - """Get the configured page size from the most recent page, if any.""" + """Get the configured page size, if any pages exist. + + All pages in a project share the same page_size_config value. + """ if self._project_id is None: return None @@ -1131,7 +1134,6 @@ def get_page_size_config(self) -> Optional[int]: row = conn.execute( """SELECT page_size_config FROM html_pages WHERE project_id = ? - ORDER BY page_number ASC LIMIT 1""", (self._project_id,), ).fetchone() From 2b04a14dc2e0b870b1f4c5493f19eafeccbe545d Mon Sep 17 00:00:00 2001 From: Daniel Demmel Date: Wed, 21 Jan 2026 23:36:23 +0000 Subject: [PATCH 23/23] Ignore progress updates + prevent massive lines overwhelming the terminal buffer --- claude_code_log/converter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index b22aa7bf..d05791a6 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -212,13 +212,15 @@ def load_transcript( entry_type in [ "file-history-snapshot", # Internal Claude Code file backup metadata + "progress", # Real-time progress updates (hook_progress, bash_progress) ] ): # Silently skip internal message types we don't render pass else: + display_line = line[:1000] + "..." if len(line) > 1000 else line print( - f"Line {line_no} of {jsonl_path} is not a recognised message type: {line}" + f"Line {line_no} of {jsonl_path} is not a recognised message type: {display_line}" ) except json.JSONDecodeError as e: print(