diff --git a/claude_code_log/cache.py b/claude_code_log/cache.py index 3f5d43b7..67109652 100644 --- a/claude_code_log/cache.py +++ b/claude_code_log/cache.py @@ -1,14 +1,35 @@ #!/usr/bin/env python3 -"""Cache management for Claude Code Log to improve performance.""" +"""SQLite-based cache management for Claude Code Log.""" import json +import logging +import os +import re +import sqlite3 +import zlib +from contextlib import contextmanager +from datetime import datetime, timezone from pathlib import Path -from typing import Any, Optional, cast -from datetime import datetime -from pydantic import BaseModel +from typing import Any, Dict, Generator, List, Optional + from packaging import version +from pydantic import BaseModel + +from .factories import create_transcript_entry +from .migrations.runner import run_migrations +from .models import ( + AssistantTranscriptEntry, + QueueOperationTranscriptEntry, + SummaryTranscriptEntry, + SystemTranscriptEntry, + TranscriptEntry, + UserTranscriptEntry, +) + +logger = logging.getLogger(__name__) -from .models import TranscriptEntry + +# ========== Data Models ========== class CachedFileInfo(BaseModel): @@ -37,6 +58,38 @@ class SessionCacheData(BaseModel): total_cache_read_tokens: int = 0 +class HtmlCacheEntry(BaseModel): + """Information about a generated HTML file.""" + + html_path: str # e.g., "session-abc123.html" or "combined_transcripts.html" + generated_at: str # ISO timestamp when HTML was generated + source_session_id: Optional[str] = ( + None # session_id for individual files, None for combined + ) + message_count: int = 0 # for sanity checking + library_version: str # which version generated it + + +class PageCacheData(BaseModel): + """Information about a paginated combined transcript page.""" + + page_number: int + html_path: str # e.g., "combined_transcripts.html" or "combined_transcripts_2.html" + page_size_config: int # the --page-size value used + message_count: int # total messages on this page + session_ids: List[str] # sessions on this page, in order + first_session_id: str + last_session_id: str + first_timestamp: Optional[str] = None + last_timestamp: Optional[str] = None + total_input_tokens: int = 0 + total_output_tokens: int = 0 + total_cache_creation_tokens: int = 0 + total_cache_read_tokens: int = 0 + generated_at: str # ISO timestamp when page was generated + library_version: str + + class ProjectCache(BaseModel): """Project-level cache index structure for index.json.""" @@ -66,275 +119,516 @@ class ProjectCache(BaseModel): latest_timestamp: str = "" +# ========== Helper Functions ========== + + +def get_library_version() -> str: + """Get the current library version from package metadata or pyproject.toml.""" + # First try to get version from installed package metadata + try: + from importlib.metadata import version as get_version + + return get_version("claude-code-log") + except Exception: + # Package not installed or other error, continue to file-based detection + pass + + # Second approach: Use importlib.resources for more robust package location detection + try: + from importlib import resources + import toml + + # Get the package directory and navigate to parent for pyproject.toml + package_files = resources.files("claude_code_log") + # Convert to Path to access parent reliably + package_root = Path(str(package_files)).parent + pyproject_path = package_root / "pyproject.toml" + + if pyproject_path.exists(): + with open(pyproject_path, "r", encoding="utf-8") as f: + pyproject_data = toml.load(f) + return pyproject_data.get("project", {}).get("version", "unknown") + except Exception: + pass + + # Final fallback: Try to read from pyproject.toml using file-relative path + try: + import toml + + project_root = Path(__file__).parent.parent + pyproject_path = project_root / "pyproject.toml" + + if pyproject_path.exists(): + with open(pyproject_path, "r", encoding="utf-8") as f: + pyproject_data = toml.load(f) + return pyproject_data.get("project", {}).get("version", "unknown") + except Exception: + pass + + return "unknown" + + +# ========== Cache Path Configuration ========== + + +def get_cache_db_path(projects_dir: Path) -> Path: + """Get cache database path, respecting CLAUDE_CODE_LOG_CACHE_PATH env var. + + Priority: CLAUDE_CODE_LOG_CACHE_PATH env var > default location. + + Args: + projects_dir: Path to the projects directory (e.g., ~/.claude/projects) + + Returns: + Path to the SQLite cache database. + """ + env_path = os.getenv("CLAUDE_CODE_LOG_CACHE_PATH") + if env_path: + return Path(env_path) + return projects_dir / "claude-code-log-cache.db" + + +# ========== Cache Manager ========== + + class CacheManager: - """Manages cache operations for a project directory.""" + """SQLite-based cache manager for Claude Code Log.""" - def __init__(self, project_path: Path, library_version: str): - """Initialize cache manager for a project. + def __init__( + self, + project_path: Path, + library_version: str, + db_path: Optional[Path] = None, + ): + """Initialise cache manager for a project. Args: project_path: Path to the project directory containing JSONL files library_version: Current version of the library for cache invalidation + db_path: Optional explicit path to the cache database. If not provided, + uses CLAUDE_CODE_LOG_CACHE_PATH env var or default location. """ self.project_path = project_path self.library_version = library_version - self.cache_dir = project_path / "cache" - self.index_file = self.cache_dir / "index.json" - - # Ensure cache directory exists - self.cache_dir.mkdir(exist_ok=True) - - # Load existing cache index if available - self._project_cache: Optional[ProjectCache] = None - self._load_project_cache() - - def _load_project_cache(self) -> None: - """Load the project cache index from disk.""" - if self.index_file.exists(): - try: - with open(self.index_file, "r", encoding="utf-8") as f: - cache_data = json.load(f) - self._project_cache = ProjectCache.model_validate(cache_data) - - # Check if cache version is compatible with current library version - if not self._is_cache_version_compatible(self._project_cache.version): + + # Priority: explicit db_path > env var > default location + if db_path: + self.db_path = db_path + else: + self.db_path = get_cache_db_path(project_path.parent) + + # Initialise database and ensure project exists + self._init_database() + self._project_id: Optional[int] = None + self._ensure_project_exists() + + @contextmanager + def _get_connection(self) -> Generator[sqlite3.Connection, None, None]: + """Get a database connection with proper settings.""" + conn = sqlite3.connect(self.db_path, timeout=30.0) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + conn.execute("PRAGMA journal_mode = WAL") + try: + yield conn + finally: + conn.close() + + def _init_database(self) -> None: + """Create schema if needed using migration runner.""" + # Run any pending migrations + run_migrations(self.db_path) + + def _ensure_project_exists(self) -> None: + """Ensure project record exists and get its ID.""" + project_path_str = str(self.project_path) + + with self._get_connection() as conn: + row = conn.execute( + "SELECT id, version FROM projects WHERE project_path = ?", + (project_path_str,), + ).fetchone() + + if row: + self._project_id = row["id"] + cached_version = row["version"] + + # Check version compatibility + if not self._is_cache_version_compatible(cached_version): print( - f"Cache version incompatible: {self._project_cache.version} -> {self.library_version}, invalidating cache" + f"Cache version incompatible: {cached_version} -> {self.library_version}, invalidating cache" ) - self.clear_cache() - self._project_cache = None - except Exception as e: - print(f"Warning: Failed to load cache index, will rebuild: {e}") - self._project_cache = None - - # Initialize empty cache if none exists - if self._project_cache is None: - self._project_cache = ProjectCache( - version=self.library_version, - cache_created=datetime.now().isoformat(), - last_updated=datetime.now().isoformat(), - project_path=str(self.project_path), - cached_files={}, - sessions={}, - ) + self._clear_project_data(conn) + self._project_id = self._create_project(conn) + else: + self._project_id = self._create_project(conn) + + conn.commit() + + def _create_project(self, conn: sqlite3.Connection) -> int: + """Create a new project record.""" + now = datetime.now().isoformat() + cursor = conn.execute( + """ + INSERT INTO projects (project_path, version, cache_created, last_updated) + VALUES (?, ?, ?, ?) + """, + (str(self.project_path), self.library_version, now, now), + ) + return cursor.lastrowid or 0 + + def _clear_project_data(self, conn: sqlite3.Connection) -> None: + """Clear all data for the current project.""" + if self._project_id is None: + return + + # Cascade delete will handle messages and files + conn.execute("DELETE FROM projects WHERE id = ?", (self._project_id,)) - def _save_project_cache(self) -> None: - """Save the project cache index to disk.""" - if self._project_cache is None: + def _update_last_updated(self, conn: sqlite3.Connection) -> None: + """Update the last_updated timestamp for the project.""" + if self._project_id is None: return - self._project_cache.last_updated = datetime.now().isoformat() + conn.execute( + "UPDATE projects SET last_updated = ? WHERE id = ?", + (datetime.now().isoformat(), self._project_id), + ) + + def _normalize_timestamp(self, timestamp: Optional[str]) -> Optional[str]: + """Normalize timestamp to consistent format for reliable string comparison. + + Converts various ISO 8601 formats to a canonical form: + - Strips fractional seconds (e.g., '.875368') + - Normalizes timezone to 'Z' suffix + + This ensures lexicographic string comparison works correctly in SQL queries. + Without normalization, '2023-01-01T10:00:00.5Z' < '2023-01-01T10:00:00Z' + because '.' < 'Z' in ASCII, even though the first is 500ms later. + + Args: + timestamp: ISO 8601 timestamp string, or None + + Returns: + Normalized timestamp in 'YYYY-MM-DDTHH:MM:SSZ' format, or None + """ + if timestamp is None: + return None + + # Pattern matches: YYYY-MM-DDTHH:MM:SS followed by optional fractional seconds + # and timezone (Z or +HH:MM or +HH or +HHMM) + match = re.match( + r"(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})" # Base datetime + r"(?:\.\d+)?" # Optional fractional seconds (discard) + r"(?:Z|[+-]\d{2}:?\d{0,2})?$", # Optional timezone + timestamp, + ) + + if match: + # Return just the base datetime with Z suffix + return match.group(1) + "Z" + + # If pattern doesn't match, return original (shouldn't happen with valid data) + return timestamp + + def _serialize_entry(self, entry: TranscriptEntry, file_id: int) -> Dict[str, Any]: + """Convert TranscriptEntry to dict for SQLite insertion.""" + raw_timestamp = getattr(entry, "timestamp", None) + base: Dict[str, Any] = { + "project_id": self._project_id, + "file_id": file_id, + "type": entry.type, + "timestamp": self._normalize_timestamp(raw_timestamp), + "session_id": getattr(entry, "sessionId", None), + "_uuid": getattr(entry, "uuid", None), + "_parent_uuid": getattr(entry, "parentUuid", None), + "_is_sidechain": 1 if getattr(entry, "isSidechain", False) else 0, + "_user_type": getattr(entry, "userType", None), + "_cwd": getattr(entry, "cwd", None), + "_version": getattr(entry, "version", None), + "_is_meta": ( + 1 + if getattr(entry, "isMeta", None) is True + else (0 if getattr(entry, "isMeta", None) is False else None) + ), + "_agent_id": getattr(entry, "agentId", None), + "_request_id": None, + "input_tokens": None, + "output_tokens": None, + "cache_creation_tokens": None, + "cache_read_tokens": None, + "_leaf_uuid": None, + "_level": None, + "_operation": None, + "content": zlib.compress( + json.dumps(entry.model_dump(), separators=(",", ":")).encode("utf-8") + ), + } + + # Extract flattened usage for assistant messages + if isinstance(entry, AssistantTranscriptEntry): + base["_request_id"] = entry.requestId + if entry.message and entry.message.usage: + usage = entry.message.usage + base["input_tokens"] = usage.input_tokens + base["output_tokens"] = usage.output_tokens + base["cache_creation_tokens"] = usage.cache_creation_input_tokens + base["cache_read_tokens"] = usage.cache_read_input_tokens + + # User entry specific + if isinstance(entry, UserTranscriptEntry): + if entry.agentId: + base["_agent_id"] = entry.agentId + + # Summary specific + if isinstance(entry, SummaryTranscriptEntry): + base["_leaf_uuid"] = entry.leafUuid + + # System specific + if isinstance(entry, SystemTranscriptEntry): + base["_level"] = entry.level + + # Queue-operation specific + if isinstance(entry, QueueOperationTranscriptEntry): + base["_operation"] = entry.operation + + return base + + def _deserialize_entry(self, row: sqlite3.Row) -> TranscriptEntry: + """Convert SQLite row back to TranscriptEntry.""" + content_dict = json.loads(zlib.decompress(row["content"]).decode("utf-8")) + return create_transcript_entry(content_dict) + + def _get_file_id(self, jsonl_path: Path) -> Optional[int]: + """Get the file ID for a JSONL file.""" + if self._project_id is None: + return None - with open(self.index_file, "w", encoding="utf-8") as f: - json.dump(self._project_cache.model_dump(), f, indent=2) + with self._get_connection() as conn: + row = conn.execute( + "SELECT id FROM cached_files WHERE project_id = ? AND file_name = ?", + (self._project_id, jsonl_path.name), + ).fetchone() - def _get_cache_file_path(self, jsonl_path: Path) -> Path: - """Get the cache file path for a given JSONL file.""" - return self.cache_dir / f"{jsonl_path.stem}.json" + return row["id"] if row else None def is_file_cached(self, jsonl_path: Path) -> bool: """Check if a JSONL file has a valid cache entry.""" - if self._project_cache is None: + if self._project_id is None: return False - file_key = jsonl_path.name - if file_key not in self._project_cache.cached_files: + if not jsonl_path.exists(): return False - # Check if source file exists and modification time matches - if not jsonl_path.exists(): + with self._get_connection() as conn: + row = conn.execute( + "SELECT source_mtime FROM cached_files WHERE project_id = ? AND file_name = ?", + (self._project_id, jsonl_path.name), + ).fetchone() + + if not row: return False - cached_info = self._project_cache.cached_files[file_key] source_mtime = jsonl_path.stat().st_mtime + cached_mtime = row["source_mtime"] - # Cache is valid if modification times match and cache file exists - cache_file = self._get_cache_file_path(jsonl_path) - return ( - abs(source_mtime - cached_info.source_mtime) < 1.0 and cache_file.exists() - ) + # Cache is valid if modification times match (within 1 second tolerance) + return abs(source_mtime - cached_mtime) < 1.0 - def load_cached_entries(self, jsonl_path: Path) -> Optional[list[TranscriptEntry]]: + def load_cached_entries(self, jsonl_path: Path) -> Optional[List[TranscriptEntry]]: """Load cached transcript entries for a JSONL file.""" if not self.is_file_cached(jsonl_path): return None - cache_file = self._get_cache_file_path(jsonl_path) - try: - with open(cache_file, "r", encoding="utf-8") as f: - cache_data = json.load(f) - - # Expect timestamp-keyed format - flatten all entries - entries_data: list[dict[str, Any]] = [] - for timestamp_entries in cache_data.values(): - if isinstance(timestamp_entries, list): - # Type cast to ensure Pyright knows this is list[dict[str, Any]] - entries_data.extend(cast(list[dict[str, Any]], timestamp_entries)) + file_id = self._get_file_id(jsonl_path) + if file_id is None: + return None - # Deserialize back to TranscriptEntry objects - from .factories import create_transcript_entry + with self._get_connection() as conn: + rows = conn.execute( + "SELECT content FROM messages WHERE file_id = ? ORDER BY timestamp NULLS LAST", + (file_id,), + ).fetchall() - entries = [ - create_transcript_entry(entry_dict) for entry_dict in entries_data - ] - return entries - except Exception as e: - print(f"Warning: Failed to load cached entries from {cache_file}: {e}") - return None + return [self._deserialize_entry(row) for row in rows] def load_cached_entries_filtered( self, jsonl_path: Path, from_date: Optional[str], to_date: Optional[str] - ) -> Optional[list[TranscriptEntry]]: - """Load cached entries with efficient timestamp-based filtering.""" + ) -> Optional[List[TranscriptEntry]]: + """Load cached entries with SQL-based timestamp filtering.""" if not self.is_file_cached(jsonl_path): return None - cache_file = self._get_cache_file_path(jsonl_path) - try: - with open(cache_file, "r", encoding="utf-8") as f: - cache_data = json.load(f) + # If no date filtering needed, fall back to regular loading + if not from_date and not to_date: + return self.load_cached_entries(jsonl_path) - # If no date filtering needed, fall back to regular loading - if not from_date and not to_date: - return self.load_cached_entries(jsonl_path) + file_id = self._get_file_id(jsonl_path) + if file_id is None: + return None - # Parse date filters - from .parser import parse_timestamp - import dateparser + # Parse dates + import dateparser + + from_dt = None + to_dt = None + + if from_date: + from_dt = dateparser.parse(from_date) + if from_dt and ( + from_date in ["today", "yesterday"] or "days ago" in from_date + ): + from_dt = from_dt.replace(hour=0, minute=0, second=0, microsecond=0) + + if to_date: + to_dt = dateparser.parse(to_date) + if to_dt: + to_dt = to_dt.replace(hour=23, minute=59, second=59, microsecond=999999) + + # Build query with SQL-based filtering + sql = "SELECT content FROM messages WHERE file_id = ?" + params: List[Any] = [file_id] + + if from_dt: + # Normalize to UTC 'Z' format for consistent string comparison + # with stored timestamps (which use 'Z' suffix from JSONL) + if from_dt.tzinfo is None: + from_dt = from_dt.replace(tzinfo=timezone.utc) + from_bound = from_dt.strftime("%Y-%m-%dT%H:%M:%SZ") + # Include entries with NULL timestamp (like summaries) OR within date range + sql += " AND (timestamp IS NULL OR timestamp >= ?)" + params.append(from_bound) + + if to_dt: + # Normalize to UTC 'Z' format for consistent string comparison + if to_dt.tzinfo is None: + to_dt = to_dt.replace(tzinfo=timezone.utc) + to_bound = to_dt.strftime("%Y-%m-%dT%H:%M:%SZ") + sql += " AND (timestamp IS NULL OR timestamp <= ?)" + params.append(to_bound) + + sql += " ORDER BY timestamp NULLS LAST" + + with self._get_connection() as conn: + rows = conn.execute(sql, params).fetchall() + + return [self._deserialize_entry(row) for row in rows] - from_dt = None - to_dt = None + def save_cached_entries( + self, jsonl_path: Path, entries: List[TranscriptEntry] + ) -> None: + """Save parsed transcript entries to cache.""" + if self._project_id is None: + return - if from_date: - from_dt = dateparser.parse(from_date) - if from_dt and ( - from_date in ["today", "yesterday"] or "days ago" in from_date - ): - from_dt = from_dt.replace(hour=0, minute=0, second=0, microsecond=0) - - if to_date: - to_dt = dateparser.parse(to_date) - if to_dt: - if to_date in ["today", "yesterday"] or "days ago" in to_date: - to_dt = to_dt.replace( - hour=23, minute=59, second=59, microsecond=999999 - ) - else: - # For simple date strings like "2023-01-01", set to end of day - to_dt = to_dt.replace( - hour=23, minute=59, second=59, microsecond=999999 - ) - - # Filter entries by timestamp - filtered_entries_data: list[dict[str, Any]] = [] - - for timestamp_key, timestamp_entries in cache_data.items(): - if timestamp_key == "_no_timestamp": - # Always include entries without timestamps (like summaries) - if isinstance(timestamp_entries, list): - # Type cast to ensure Pyright knows this is list[dict[str, Any]] - filtered_entries_data.extend( - cast(list[dict[str, Any]], timestamp_entries) - ) - else: - # Check if timestamp falls within range - message_dt = parse_timestamp(timestamp_key) - if message_dt: - # Convert to naive datetime for comparison - if message_dt.tzinfo: - message_dt = message_dt.replace(tzinfo=None) - - # Apply date filtering - if from_dt and message_dt < from_dt: - continue - if to_dt and message_dt > to_dt: - continue - - if isinstance(timestamp_entries, list): - # Type cast to ensure Pyright knows this is list[dict[str, Any]] - filtered_entries_data.extend( - cast(list[dict[str, Any]], timestamp_entries) - ) - - # Deserialize filtered entries - from .factories import create_transcript_entry - - entries = [ - create_transcript_entry(entry_dict) - for entry_dict in filtered_entries_data - ] - return entries - except Exception as e: - print( - f"Warning: Failed to load filtered cached entries from {cache_file}: {e}" + source_mtime = jsonl_path.stat().st_mtime + cached_mtime = datetime.now().timestamp() + + with self._get_connection() as conn: + # Insert or update file record + # Use ON CONFLICT to preserve file ID and avoid cascade deletes on messages + conn.execute( + """ + INSERT INTO cached_files + (project_id, file_name, file_path, source_mtime, cached_mtime, message_count) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(project_id, file_name) DO UPDATE SET + file_path = excluded.file_path, + source_mtime = excluded.source_mtime, + cached_mtime = excluded.cached_mtime, + message_count = excluded.message_count + """, + ( + self._project_id, + jsonl_path.name, + str(jsonl_path), + source_mtime, + cached_mtime, + len(entries), + ), ) - return None - def save_cached_entries( - self, jsonl_path: Path, entries: list[TranscriptEntry] - ) -> None: - """Save parsed transcript entries to cache with timestamp-based structure.""" - cache_file = self._get_cache_file_path(jsonl_path) + # Get the file ID + row = conn.execute( + "SELECT id FROM cached_files WHERE project_id = ? AND file_name = ?", + (self._project_id, jsonl_path.name), + ).fetchone() + file_id = row["id"] - try: - # Create timestamp-keyed cache structure for efficient date filtering - cache_data: dict[str, Any] = {} - - for entry in entries: - # Get timestamp - use empty string as fallback for entries without timestamps - timestamp = ( - getattr(entry, "timestamp", "") - if hasattr(entry, "timestamp") - else "" - ) - if not timestamp: - # Use a special key for entries without timestamps (like summaries) - timestamp = "_no_timestamp" - - # Store entry data under timestamp - if timestamp not in cache_data: - cache_data[timestamp] = [] - - cache_data[timestamp].append(entry.model_dump()) - - with open(cache_file, "w", encoding="utf-8") as f: - json.dump(cache_data, f, indent=2) - - # Update cache index - if self._project_cache is not None: - source_mtime = jsonl_path.stat().st_mtime - cached_mtime = cache_file.stat().st_mtime - - # Extract session IDs from entries - session_ids: list[str] = [] - for entry in entries: - if hasattr(entry, "sessionId"): - session_id = getattr(entry, "sessionId", "") - if session_id: - session_ids.append(session_id) - session_ids = list(set(session_ids)) # Remove duplicates - - self._project_cache.cached_files[jsonl_path.name] = CachedFileInfo( - file_path=str(jsonl_path), - source_mtime=source_mtime, - cached_mtime=cached_mtime, - message_count=len(entries), - session_ids=session_ids, + # Delete existing messages for this file + conn.execute("DELETE FROM messages WHERE file_id = ?", (file_id,)) + + # Insert all entries in a batch + serialized_entries = [ + self._serialize_entry(entry, file_id) for entry in entries + ] + conn.executemany( + """ + INSERT INTO messages ( + project_id, file_id, type, timestamp, session_id, + _uuid, _parent_uuid, _is_sidechain, _user_type, _cwd, _version, + _is_meta, _agent_id, _request_id, + input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, + _leaf_uuid, _level, _operation, content + ) VALUES ( + :project_id, :file_id, :type, :timestamp, :session_id, + :_uuid, :_parent_uuid, :_is_sidechain, :_user_type, :_cwd, :_version, + :_is_meta, :_agent_id, :_request_id, + :input_tokens, :output_tokens, :cache_creation_tokens, :cache_read_tokens, + :_leaf_uuid, :_level, :_operation, :content ) + """, + serialized_entries, + ) - self._save_project_cache() - except Exception as e: - print(f"Warning: Failed to save cached entries to {cache_file}: {e}") + self._update_last_updated(conn) + conn.commit() - def update_session_cache(self, session_data: dict[str, SessionCacheData]) -> None: + def update_session_cache(self, session_data: Dict[str, SessionCacheData]) -> None: """Update cached session information.""" - if self._project_cache is None: + if self._project_id is None: return - self._project_cache.sessions.update( - {session_id: data for session_id, data in session_data.items()} - ) - self._save_project_cache() + with self._get_connection() as conn: + for session_id, data in session_data.items(): + conn.execute( + """ + INSERT INTO sessions ( + project_id, session_id, summary, first_timestamp, last_timestamp, + message_count, first_user_message, cwd, + total_input_tokens, total_output_tokens, + total_cache_creation_tokens, total_cache_read_tokens + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(project_id, session_id) DO UPDATE SET + summary = excluded.summary, + first_timestamp = excluded.first_timestamp, + last_timestamp = excluded.last_timestamp, + message_count = excluded.message_count, + first_user_message = excluded.first_user_message, + cwd = excluded.cwd, + total_input_tokens = excluded.total_input_tokens, + total_output_tokens = excluded.total_output_tokens, + total_cache_creation_tokens = excluded.total_cache_creation_tokens, + total_cache_read_tokens = excluded.total_cache_read_tokens + """, + ( + self._project_id, + session_id, + data.summary, + data.first_timestamp, + data.last_timestamp, + data.message_count, + data.first_user_message, + data.cwd, + data.total_input_tokens, + data.total_output_tokens, + data.total_cache_creation_tokens, + data.total_cache_read_tokens, + ), + ) + + self._update_last_updated(conn) + conn.commit() def update_project_aggregates( self, @@ -347,79 +641,151 @@ def update_project_aggregates( latest_timestamp: str, ) -> None: """Update project-level aggregate information.""" - if self._project_cache is None: + if self._project_id is None: return - self._project_cache.total_message_count = total_message_count - self._project_cache.total_input_tokens = total_input_tokens - self._project_cache.total_output_tokens = total_output_tokens - self._project_cache.total_cache_creation_tokens = total_cache_creation_tokens - self._project_cache.total_cache_read_tokens = total_cache_read_tokens - self._project_cache.earliest_timestamp = earliest_timestamp - self._project_cache.latest_timestamp = latest_timestamp + with self._get_connection() as conn: + conn.execute( + """ + UPDATE projects SET + total_message_count = ?, + total_input_tokens = ?, + total_output_tokens = ?, + total_cache_creation_tokens = ?, + total_cache_read_tokens = ?, + earliest_timestamp = ?, + latest_timestamp = ?, + last_updated = ? + WHERE id = ? + """, + ( + total_message_count, + total_input_tokens, + total_output_tokens, + total_cache_creation_tokens, + total_cache_read_tokens, + earliest_timestamp, + latest_timestamp, + datetime.now().isoformat(), + self._project_id, + ), + ) + conn.commit() - self._save_project_cache() + def get_working_directories(self) -> List[str]: + """Get list of working directories associated with this project. - def update_working_directories(self, working_directories: list[str]) -> None: - """Update the list of working directories associated with this project.""" - if self._project_cache is None: - return - - self._project_cache.working_directories = working_directories - self._save_project_cache() + Queries distinct cwd values from sessions table. + """ + if self._project_id is None: + return [] - def get_modified_files(self, jsonl_files: list[Path]) -> list[Path]: - """Get list of JSONL files that need to be reprocessed.""" - modified_files: list[Path] = [] + with self._get_connection() as conn: + rows = conn.execute( + "SELECT DISTINCT cwd FROM sessions WHERE project_id = ? AND cwd IS NOT NULL", + (self._project_id,), + ).fetchall() - for jsonl_file in jsonl_files: - if not self.is_file_cached(jsonl_file): - modified_files.append(jsonl_file) + return [row["cwd"] for row in rows] - return modified_files + def get_modified_files(self, jsonl_files: List[Path]) -> List[Path]: + """Get list of JSONL files that need to be reprocessed.""" + return [ + jsonl_file + for jsonl_file in jsonl_files + if not self.is_file_cached(jsonl_file) + ] def get_cached_project_data(self) -> Optional[ProjectCache]: """Get the cached project data if available.""" - return self._project_cache + if self._project_id is None: + return None - def clear_cache(self) -> None: - """Clear all cache files and reset the project cache.""" - if self.cache_dir.exists(): - for cache_file in self.cache_dir.glob("*.json"): - if cache_file.name != "index.json": # Don't delete the index file here - try: - cache_file.unlink() - except Exception as e: - print(f"Warning: Failed to delete cache file {cache_file}: {e}") - - if self.index_file.exists(): - try: - self.index_file.unlink() - except Exception as e: - print(f"Warning: Failed to delete cache index {self.index_file}: {e}") - - # Reset the project cache - self._project_cache = ProjectCache( - version=self.library_version, - cache_created=datetime.now().isoformat(), - last_updated=datetime.now().isoformat(), - project_path=str(self.project_path), - cached_files={}, - sessions={}, + with self._get_connection() as conn: + # Get project data + project_row = conn.execute( + "SELECT * FROM projects WHERE id = ?", (self._project_id,) + ).fetchone() + + if not project_row: + return None + + # Get cached files + file_rows = conn.execute( + "SELECT * FROM cached_files WHERE project_id = ?", (self._project_id,) + ).fetchall() + + cached_files: Dict[str, CachedFileInfo] = {} + for row in file_rows: + # Get session IDs for this file from messages + session_rows = conn.execute( + "SELECT DISTINCT session_id FROM messages WHERE file_id = ? AND session_id IS NOT NULL", + (row["id"],), + ).fetchall() + session_ids = [r["session_id"] for r in session_rows] + + cached_files[row["file_name"]] = CachedFileInfo( + file_path=row["file_path"], + source_mtime=row["source_mtime"], + cached_mtime=row["cached_mtime"], + message_count=row["message_count"], + session_ids=session_ids, + ) + + # Get sessions + session_rows = conn.execute( + "SELECT * FROM sessions WHERE project_id = ?", (self._project_id,) + ).fetchall() + + sessions: Dict[str, SessionCacheData] = {} + for row in session_rows: + sessions[row["session_id"]] = SessionCacheData( + session_id=row["session_id"], + summary=row["summary"], + first_timestamp=row["first_timestamp"], + last_timestamp=row["last_timestamp"], + message_count=row["message_count"], + first_user_message=row["first_user_message"], + cwd=row["cwd"], + total_input_tokens=row["total_input_tokens"], + total_output_tokens=row["total_output_tokens"], + total_cache_creation_tokens=row["total_cache_creation_tokens"], + total_cache_read_tokens=row["total_cache_read_tokens"], + ) + + return ProjectCache( + version=project_row["version"], + cache_created=project_row["cache_created"], + last_updated=project_row["last_updated"], + project_path=project_row["project_path"], + cached_files=cached_files, + total_message_count=project_row["total_message_count"], + total_input_tokens=project_row["total_input_tokens"], + total_output_tokens=project_row["total_output_tokens"], + total_cache_creation_tokens=project_row["total_cache_creation_tokens"], + total_cache_read_tokens=project_row["total_cache_read_tokens"], + sessions=sessions, + working_directories=self.get_working_directories(), + earliest_timestamp=project_row["earliest_timestamp"], + latest_timestamp=project_row["latest_timestamp"], ) - def _is_cache_version_compatible(self, cache_version: str) -> bool: - """Check if a cache version is compatible with the current library version. + def clear_cache(self) -> None: + """Clear all cache data for this project.""" + if self._project_id is None: + return - This uses a compatibility matrix to determine if cache invalidation is needed. - Only breaking changes require cache invalidation, not every version bump. - """ + with self._get_connection() as conn: + self._clear_project_data(conn) + self._project_id = self._create_project(conn) + conn.commit() + + def _is_cache_version_compatible(self, cache_version: str) -> bool: + """Check if a cache version is compatible with the current library version.""" if cache_version == self.library_version: return True # Define compatibility rules - # Format: "cache_version": "minimum_library_version_required" - # If cache version is older than the minimum required, it needs invalidation breaking_changes: dict[str, str] = { # 0.9.0 introduced _compact_ide_tags_for_preview() which transforms # first_user_message to use emoji indicators instead of raw IDE tags @@ -429,83 +795,775 @@ def _is_cache_version_compatible(self, cache_version: str) -> bool: cache_ver = version.parse(cache_version) current_ver = version.parse(self.library_version) - # Check if cache version requires invalidation due to breaking changes for breaking_version_pattern, min_required in breaking_changes.items(): min_required_ver = version.parse(min_required) - # If current version is at or above the minimum required for this breaking change if current_ver >= min_required_ver: - # Check if cache version is affected by this breaking change if breaking_version_pattern.endswith(".x"): - # Pattern like "0.2.x" matches any 0.2.* version major_minor = breaking_version_pattern[:-2] if str(cache_ver).startswith(major_minor): return False else: - # Exact version or version comparison breaking_ver = version.parse(breaking_version_pattern) if cache_ver <= breaking_ver: return False - # If no breaking changes affect this cache version, it's compatible return True - def get_cache_stats(self) -> dict[str, Any]: + def get_cache_stats(self) -> Dict[str, Any]: """Get cache statistics for reporting.""" - if self._project_cache is None: + if self._project_id is None: + return {"cache_enabled": False} + + with self._get_connection() as conn: + project_row = conn.execute( + "SELECT * FROM projects WHERE id = ?", (self._project_id,) + ).fetchone() + + file_count = conn.execute( + "SELECT COUNT(*) as cnt FROM cached_files WHERE project_id = ?", + (self._project_id,), + ).fetchone() + + session_count = conn.execute( + "SELECT COUNT(*) as cnt FROM sessions WHERE project_id = ?", + (self._project_id,), + ).fetchone() + + if not project_row: return {"cache_enabled": False} return { "cache_enabled": True, - "cached_files_count": len(self._project_cache.cached_files), - "total_cached_messages": self._project_cache.total_message_count, - "total_sessions": len(self._project_cache.sessions), - "cache_created": self._project_cache.cache_created, - "last_updated": self._project_cache.last_updated, + "cached_files_count": file_count["cnt"] if file_count else 0, + "total_cached_messages": project_row["total_message_count"], + "total_sessions": session_count["cnt"] if session_count else 0, + "cache_created": project_row["cache_created"], + "last_updated": project_row["last_updated"], } + # ========== HTML Cache Methods ========== -def get_library_version() -> str: - """Get the current library version from package metadata or pyproject.toml.""" - # First try to get version from installed package metadata - try: - from importlib.metadata import version + def get_html_cache(self, html_path: str) -> Optional[HtmlCacheEntry]: + """Get HTML cache entry for a given path.""" + if self._project_id is None: + return None - return version("claude-code-log") - except Exception: - # Package not installed or other error, continue to file-based detection - pass + with self._get_connection() as conn: + row = conn.execute( + """SELECT html_path, generated_at, source_session_id, message_count, library_version + FROM html_cache + WHERE project_id = ? AND html_path = ?""", + (self._project_id, html_path), + ).fetchone() - # Second approach: Use importlib.resources for more robust package location detection - try: - from importlib import resources - import toml + if not row: + return None - # Get the package directory and navigate to parent for pyproject.toml - package_files = resources.files("claude_code_log") - # Convert to Path to access parent reliably - package_root = Path(str(package_files)).parent - pyproject_path = package_root / "pyproject.toml" + return HtmlCacheEntry( + html_path=row["html_path"], + generated_at=row["generated_at"], + source_session_id=row["source_session_id"], + message_count=row["message_count"] or 0, + library_version=row["library_version"], + ) - if pyproject_path.exists(): - with open(pyproject_path, "r", encoding="utf-8") as f: - pyproject_data = toml.load(f) - return pyproject_data.get("project", {}).get("version", "unknown") - except Exception: - pass + def update_html_cache( + self, + html_path: str, + session_id: Optional[str], + message_count: int, + ) -> None: + """Update or insert HTML cache entry.""" + if self._project_id is None: + return - # Final fallback: Try to read from pyproject.toml using file-relative path - try: - import toml + with self._get_connection() as conn: + conn.execute( + """INSERT INTO html_cache + (project_id, html_path, generated_at, source_session_id, message_count, library_version) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(project_id, html_path) + DO UPDATE SET + generated_at = excluded.generated_at, + source_session_id = excluded.source_session_id, + message_count = excluded.message_count, + library_version = excluded.library_version""", + ( + self._project_id, + html_path, + datetime.now().isoformat(), + session_id, + message_count, + self.library_version, + ), + ) + conn.commit() - project_root = Path(__file__).parent.parent - pyproject_path = project_root / "pyproject.toml" + def is_html_stale( + self, html_path: str, session_id: Optional[str] = None + ) -> tuple[bool, str]: + """Check if HTML file needs regeneration. - if pyproject_path.exists(): - with open(pyproject_path, "r", encoding="utf-8") as f: - pyproject_data = toml.load(f) - return pyproject_data.get("project", {}).get("version", "unknown") - except Exception: - pass + Args: + html_path: Path to HTML file (e.g., "session-abc123.html") + session_id: Session ID for individual session files, None for combined - return "unknown" + Returns: + Tuple of (is_stale: bool, reason: str) + """ + from .renderer import is_html_outdated + + if self._project_id is None: + return True, "no_cache" + + # Get existing HTML cache entry + html_cache = self.get_html_cache(html_path) + if html_cache is None: + return True, "not_cached" + + # Check library version in cache + if html_cache.library_version != self.library_version: + return True, "version_mismatch" + + # Check if file exists and has correct version + actual_file = self.project_path / html_path + if not actual_file.exists(): + return True, "file_missing" + if is_html_outdated(actual_file): + return True, "file_version_mismatch" + + with self._get_connection() as conn: + if session_id is not None: + # For individual session HTML: check if session message count changed + row = conn.execute( + """SELECT message_count FROM sessions + WHERE project_id = ? AND session_id = ?""", + (self._project_id, session_id), + ).fetchone() + + if not row: + return True, "session_not_found" + + # Compare message counts + if row["message_count"] != html_cache.message_count: + return True, "session_updated" + else: + # For combined transcript: check if total message count changed + # This is more reliable than timestamp comparison, which can + # trigger false positives when cache metadata is updated + row = conn.execute( + """SELECT total_message_count FROM projects + WHERE id = ?""", + (self._project_id,), + ).fetchone() + + if row and row["total_message_count"] != html_cache.message_count: + return True, "project_updated" + + return False, "up_to_date" + + def get_stale_sessions( + self, valid_session_ids: Optional[set[str]] = None + ) -> List[tuple[str, str]]: + """Get list of sessions that need HTML regeneration. + + Args: + valid_session_ids: If provided, only check sessions in this set. + Sessions not in this set are considered "archived" (JSONL deleted) + and are skipped to avoid perpetual staleness. + + Returns: + List of (session_id, reason) tuples for sessions needing regeneration + """ + if self._project_id is None: + return [] + + stale_sessions: List[tuple[str, str]] = [] + + with self._get_connection() as conn: + # Get all sessions + session_rows = conn.execute( + """SELECT session_id, last_timestamp FROM sessions + WHERE project_id = ?""", + (self._project_id,), + ).fetchall() + + for row in session_rows: + session_id = row["session_id"] + + # Skip archived sessions (JSONL deleted but cache remains) + if ( + valid_session_ids is not None + and session_id not in valid_session_ids + ): + continue + + html_path = f"session-{session_id}.html" + + is_stale, reason = self.is_html_stale(html_path, session_id) + if is_stale: + stale_sessions.append((session_id, reason)) + + return stale_sessions + + def get_archived_session_count(self, valid_session_ids: set[str]) -> int: + """Count sessions in cache whose JSONL files have been deleted. + + These are preserved for potential future archiving/restore features. + + Args: + valid_session_ids: Set of session IDs that currently exist in source data + + Returns: + Number of archived (orphan) sessions + """ + if self._project_id is None: + return 0 + + with self._get_connection() as conn: + cached_rows = conn.execute( + "SELECT session_id FROM sessions WHERE project_id = ?", + (self._project_id,), + ).fetchall() + + return sum( + 1 for row in cached_rows if row["session_id"] not in valid_session_ids + ) + + def get_archived_sessions( + self, valid_session_ids: set[str] + ) -> Dict[str, SessionCacheData]: + """Get session data for archived sessions (cached but JSONL deleted). + + Args: + valid_session_ids: Set of session IDs that currently exist in source data + + Returns: + Dict mapping session_id to SessionCacheData for archived sessions + """ + if self._project_id is None: + return {} + + archived_sessions: Dict[str, SessionCacheData] = {} + + with self._get_connection() as conn: + session_rows = conn.execute( + "SELECT * FROM sessions WHERE project_id = ?", + (self._project_id,), + ).fetchall() + + for row in session_rows: + session_id = row["session_id"] + if session_id not in valid_session_ids: + archived_sessions[session_id] = SessionCacheData( + session_id=session_id, + summary=row["summary"], + first_timestamp=row["first_timestamp"], + last_timestamp=row["last_timestamp"], + message_count=row["message_count"], + first_user_message=row["first_user_message"], + cwd=row["cwd"], + total_input_tokens=row["total_input_tokens"], + total_output_tokens=row["total_output_tokens"], + total_cache_creation_tokens=row["total_cache_creation_tokens"], + total_cache_read_tokens=row["total_cache_read_tokens"], + ) + + return archived_sessions + + def export_session_to_jsonl(self, session_id: str) -> List[str]: + """Export all message content JSONs for a session, for JSONL restoration. + + Args: + session_id: The session ID to export + + Returns: + List of JSON strings (one per line for JSONL file), compact format + """ + if self._project_id is None: + return [] + + with self._get_connection() as conn: + rows = conn.execute( + """SELECT content FROM messages + WHERE project_id = ? AND session_id = ? + ORDER BY timestamp NULLS LAST""", + (self._project_id, session_id), + ).fetchall() + + # Content is stored as compressed, compact JSON - just decompress + return [zlib.decompress(row["content"]).decode("utf-8") for row in rows] + + def load_session_entries(self, session_id: str) -> List[TranscriptEntry]: + """Load transcript entries for a session from cache. + + Used for rendering archived sessions to HTML/Markdown when + the original JSONL file no longer exists. + + Args: + session_id: The session ID to load + + Returns: + List of TranscriptEntry objects for the session + """ + if self._project_id is None: + return [] + + with self._get_connection() as conn: + rows = conn.execute( + """SELECT content FROM messages + WHERE project_id = ? AND session_id = ? + ORDER BY timestamp NULLS LAST""", + (self._project_id, session_id), + ).fetchall() + + return [self._deserialize_entry(row) for row in rows] + + # ========== Page Cache Methods (Pagination) ========== + + def get_page_size_config(self) -> Optional[int]: + """Get the configured page size, if any pages exist. + + All pages in a project share the same page_size_config value. + """ + if self._project_id is None: + return None + + with self._get_connection() as conn: + row = conn.execute( + """SELECT page_size_config FROM html_pages + WHERE project_id = ? + LIMIT 1""", + (self._project_id,), + ).fetchone() + + return row["page_size_config"] if row else None + + def get_page_data(self, page_number: int) -> Optional[PageCacheData]: + """Get cache data for a specific page.""" + if self._project_id is None: + return None + + with self._get_connection() as conn: + # Get page info + page_row = conn.execute( + """SELECT * FROM html_pages + WHERE project_id = ? AND page_number = ?""", + (self._project_id, page_number), + ).fetchone() + + if not page_row: + return None + + # Get sessions for this page + session_rows = conn.execute( + """SELECT session_id FROM page_sessions + WHERE page_id = ? + ORDER BY session_order ASC""", + (page_row["id"],), + ).fetchall() + + session_ids = [row["session_id"] for row in session_rows] + + return PageCacheData( + page_number=page_row["page_number"], + html_path=page_row["html_path"], + page_size_config=page_row["page_size_config"], + message_count=page_row["message_count"], + session_ids=session_ids, + first_session_id=page_row["first_session_id"], + last_session_id=page_row["last_session_id"], + first_timestamp=page_row["first_timestamp"], + last_timestamp=page_row["last_timestamp"], + total_input_tokens=page_row["total_input_tokens"] or 0, + total_output_tokens=page_row["total_output_tokens"] or 0, + total_cache_creation_tokens=page_row["total_cache_creation_tokens"] or 0, + total_cache_read_tokens=page_row["total_cache_read_tokens"] or 0, + generated_at=page_row["generated_at"], + library_version=page_row["library_version"], + ) + + def get_all_pages(self) -> List[PageCacheData]: + """Get all cached pages for this project.""" + if self._project_id is None: + return [] + + pages: List[PageCacheData] = [] + with self._get_connection() as conn: + page_rows = conn.execute( + """SELECT * FROM html_pages + WHERE project_id = ? + ORDER BY page_number ASC""", + (self._project_id,), + ).fetchall() + + for page_row in page_rows: + session_rows = conn.execute( + """SELECT session_id FROM page_sessions + WHERE page_id = ? + ORDER BY session_order ASC""", + (page_row["id"],), + ).fetchall() + + session_ids = [row["session_id"] for row in session_rows] + + pages.append( + PageCacheData( + page_number=page_row["page_number"], + html_path=page_row["html_path"], + page_size_config=page_row["page_size_config"], + message_count=page_row["message_count"], + session_ids=session_ids, + first_session_id=page_row["first_session_id"], + last_session_id=page_row["last_session_id"], + first_timestamp=page_row["first_timestamp"], + last_timestamp=page_row["last_timestamp"], + total_input_tokens=page_row["total_input_tokens"] or 0, + total_output_tokens=page_row["total_output_tokens"] or 0, + total_cache_creation_tokens=page_row[ + "total_cache_creation_tokens" + ] + or 0, + total_cache_read_tokens=page_row["total_cache_read_tokens"] + or 0, + generated_at=page_row["generated_at"], + library_version=page_row["library_version"], + ) + ) + + return pages + + def update_page_cache( + self, + page_number: int, + html_path: str, + page_size_config: int, + session_ids: List[str], + message_count: int, + first_timestamp: Optional[str], + last_timestamp: Optional[str], + total_input_tokens: int, + total_output_tokens: int, + total_cache_creation_tokens: int, + total_cache_read_tokens: int, + ) -> None: + """Update or insert page cache entry.""" + if self._project_id is None or not session_ids: + return + + with self._get_connection() as conn: + # Insert or update page + conn.execute( + """INSERT INTO html_pages + (project_id, page_number, html_path, page_size_config, message_count, + first_session_id, last_session_id, first_timestamp, last_timestamp, + total_input_tokens, total_output_tokens, + total_cache_creation_tokens, total_cache_read_tokens, + generated_at, library_version) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(project_id, page_number) + DO UPDATE SET + html_path = excluded.html_path, + page_size_config = excluded.page_size_config, + message_count = excluded.message_count, + first_session_id = excluded.first_session_id, + last_session_id = excluded.last_session_id, + first_timestamp = excluded.first_timestamp, + last_timestamp = excluded.last_timestamp, + total_input_tokens = excluded.total_input_tokens, + total_output_tokens = excluded.total_output_tokens, + total_cache_creation_tokens = excluded.total_cache_creation_tokens, + total_cache_read_tokens = excluded.total_cache_read_tokens, + generated_at = excluded.generated_at, + library_version = excluded.library_version""", + ( + self._project_id, + page_number, + html_path, + page_size_config, + message_count, + session_ids[0], + session_ids[-1], + first_timestamp, + last_timestamp, + total_input_tokens, + total_output_tokens, + total_cache_creation_tokens, + total_cache_read_tokens, + datetime.now().isoformat(), + self.library_version, + ), + ) + + # Get the page ID + row = conn.execute( + """SELECT id FROM html_pages + WHERE project_id = ? AND page_number = ?""", + (self._project_id, page_number), + ).fetchone() + page_id = row["id"] + + # Delete existing session mappings + conn.execute("DELETE FROM page_sessions WHERE page_id = ?", (page_id,)) + + # Insert session mappings + for order, session_id in enumerate(session_ids): + conn.execute( + """INSERT INTO page_sessions (page_id, session_id, session_order) + VALUES (?, ?, ?)""", + (page_id, session_id, order), + ) + + conn.commit() + + def is_page_stale( + self, page_number: int, page_size_config: int + ) -> tuple[bool, str]: + """Check if a page needs regeneration. + + Args: + page_number: The page number to check + page_size_config: The current page size configuration + + Returns: + Tuple of (is_stale: bool, reason: str) + """ + from .renderer import is_html_outdated + + if self._project_id is None: + return True, "no_cache" + + page_data = self.get_page_data(page_number) + if page_data is None: + return True, "not_cached" + + # Check if page size config changed + if page_data.page_size_config != page_size_config: + return True, "page_size_changed" + + # Check library version + if page_data.library_version != self.library_version: + return True, "version_mismatch" + + # Check if HTML file exists and has correct version + actual_file = self.project_path / page_data.html_path + if not actual_file.exists(): + return True, "file_missing" + if is_html_outdated(actual_file): + return True, "file_version_mismatch" + + # Check if any session on this page has changed + with self._get_connection() as conn: + # Build placeholders for IN clause + placeholders = ",".join("?" for _ in page_data.session_ids) + params = [self._project_id, *page_data.session_ids] + + row = conn.execute( + f"""SELECT COUNT(*) as session_count, + COALESCE(SUM(message_count), 0) as total_messages, + MAX(last_timestamp) as max_timestamp + FROM sessions + WHERE project_id = ? AND session_id IN ({placeholders})""", + params, + ).fetchone() + + # Check if any sessions are missing + if row["session_count"] != len(page_data.session_ids): + return True, "session_missing" + + # Check if message count changed + if row["total_messages"] != page_data.message_count: + return True, "message_count_changed" + + # Check if last timestamp changed (session content updated) + if row["max_timestamp"] != page_data.last_timestamp: + return True, "timestamp_changed" + + return False, "up_to_date" + + def invalidate_all_pages(self) -> List[str]: + """Delete all page cache entries for this project. + + Returns: + List of HTML file paths that were invalidated (for cleanup) + """ + if self._project_id is None: + return [] + + html_paths: List[str] = [] + + with self._get_connection() as conn: + # Get all page paths before deleting + rows = conn.execute( + """SELECT html_path FROM html_pages WHERE project_id = ?""", + (self._project_id,), + ).fetchall() + html_paths = [row["html_path"] for row in rows] + + # Delete all pages (cascade deletes page_sessions) + conn.execute( + "DELETE FROM html_pages WHERE project_id = ?", (self._project_id,) + ) + conn.commit() + + return html_paths + + def get_page_count(self) -> int: + """Get the number of cached pages for this project.""" + if self._project_id is None: + return 0 + + with self._get_connection() as conn: + row = conn.execute( + """SELECT COUNT(*) as cnt FROM html_pages WHERE project_id = ?""", + (self._project_id,), + ).fetchone() + + return row["cnt"] if row else 0 + + def delete_session(self, session_id: str) -> bool: + """Delete a session and its messages from cache. + + Args: + session_id: The session ID to delete + + Returns: + True if session was deleted, False if not found + """ + if self._project_id is None: + return False + + with self._get_connection() as conn: + # Check if session exists + row = conn.execute( + "SELECT id FROM sessions WHERE project_id = ? AND session_id = ?", + (self._project_id, session_id), + ).fetchone() + + if not row: + return False + + # Delete messages for this session + conn.execute( + "DELETE FROM messages WHERE project_id = ? AND session_id = ?", + (self._project_id, session_id), + ) + + # Delete HTML cache entries for this session + conn.execute( + "DELETE FROM html_cache WHERE project_id = ? AND source_session_id = ?", + (self._project_id, session_id), + ) + + # Delete page_sessions entries referencing this session + conn.execute( + """DELETE FROM page_sessions WHERE session_id = ? + AND page_id IN (SELECT id FROM html_pages WHERE project_id = ?)""", + (session_id, self._project_id), + ) + + # Delete cached_files entry for this session's JSONL file + # File name pattern is {session_id}.jsonl + conn.execute( + "DELETE FROM cached_files WHERE project_id = ? AND file_name = ?", + (self._project_id, f"{session_id}.jsonl"), + ) + + # Delete the session record + conn.execute( + "DELETE FROM sessions WHERE project_id = ? AND session_id = ?", + (self._project_id, session_id), + ) + + self._update_last_updated(conn) + conn.commit() + + return True + + def delete_project(self) -> bool: + """Delete this project and all its data from cache. + + Returns: + True if project was deleted, False if not found + """ + if self._project_id is None: + return False + + with self._get_connection() as conn: + # Cascade delete handles messages, sessions, cached_files, html_cache, html_pages + conn.execute("DELETE FROM projects WHERE id = ?", (self._project_id,)) + conn.commit() + + self._project_id = None + return True + + +def get_all_cached_projects( + projects_dir: Path, + db_path: Optional[Path] = None, +) -> List[tuple[str, bool]]: + """Get all projects from cache, indicating which are archived. + + This is a standalone function that queries the cache database directly + to find all project paths, without needing to instantiate CacheManager + for each project. + + Args: + projects_dir: Path to the projects directory (e.g., ~/.claude/projects) + db_path: Optional explicit path to the cache database. If not provided, + uses CLAUDE_CODE_LOG_CACHE_PATH env var or default location. + + Returns: + List of (project_path, is_archived) tuples. + is_archived is True if the project has no JSONL files but exists in cache. + """ + # Priority: explicit db_path > env var > default location + if db_path: + actual_db_path = db_path + else: + actual_db_path = get_cache_db_path(projects_dir) + + if not actual_db_path.exists(): + return [] + + result: List[tuple[str, bool]] = [] + + try: + conn = sqlite3.connect(actual_db_path, timeout=30.0) + conn.row_factory = sqlite3.Row + try: + rows = conn.execute( + "SELECT project_path FROM projects ORDER BY project_path" + ).fetchall() + + for row in rows: + project_path = Path(row["project_path"]) + # Check if project has JSONL files (non-archived) + has_jsonl = ( + bool(list(project_path.glob("*.jsonl"))) + if project_path.exists() + else False + ) + # is_archived = project exists in cache but has no JSONL files + is_archived = not has_jsonl + result.append((row["project_path"], is_archived)) + finally: + conn.close() + except (sqlite3.Error, OSError) as e: + logger.debug("Failed to read cached projects from %s: %s", actual_db_path, e) + + return result + + +__all__ = [ + "CacheManager", + "CachedFileInfo", + "HtmlCacheEntry", + "PageCacheData", + "ProjectCache", + "SessionCacheData", + "get_all_cached_projects", + "get_cache_db_path", + "get_library_version", +] diff --git a/claude_code_log/cli.py b/claude_code_log/cli.py index cef23725..2c6e3afc 100644 --- a/claude_code_log/cli.py +++ b/claude_code_log/cli.py @@ -17,7 +17,12 @@ get_file_extension, process_projects_hierarchy, ) -from .cache import CacheManager, get_library_version +from .cache import ( + CacheManager, + get_all_cached_projects, + get_cache_db_path, + get_library_version, +) def get_default_projects_dir() -> Path: @@ -25,36 +30,75 @@ def get_default_projects_dir() -> Path: return Path.home() / ".claude" / "projects" -def _launch_tui_with_cache_check(project_path: Path) -> Optional[str]: +def _discover_projects( + projects_dir: Path, +) -> tuple[list[Path], set[Path]]: + """Discover active and archived projects in the projects directory. + + Returns: + Tuple of (all_project_dirs, archived_projects_set) + """ + # Find active projects (directories with JSONL files) + project_dirs = [ + d for d in projects_dir.iterdir() if d.is_dir() and list(d.glob("*.jsonl")) + ] + + # Find archived projects (in cache but without JSONL files) + archived_projects: set[Path] = set() + cached_projects = get_all_cached_projects(projects_dir) + active_project_paths = {str(p) for p in project_dirs} + for project_path_str, is_archived in cached_projects: + if is_archived and project_path_str not in active_project_paths: + archived_path = Path(project_path_str) + archived_projects.add(archived_path) + project_dirs.append(archived_path) + + return project_dirs, archived_projects + + +def _launch_tui_with_cache_check( + project_path: Path, is_archived: bool = False +) -> Optional[str]: """Launch TUI with proper cache checking and user feedback.""" click.echo("Checking cache and loading session data...") # Check if we need to rebuild cache cache_manager = CacheManager(project_path, get_library_version()) - jsonl_files = list(project_path.glob("*.jsonl")) - modified_files = cache_manager.get_modified_files(jsonl_files) project_cache = cache_manager.get_cached_project_data() - if not (project_cache and project_cache.sessions and not modified_files): - # Need to rebuild cache - if modified_files: + if is_archived: + # Archived projects have no JSONL files, just load from cache + if project_cache and project_cache.sessions: click.echo( - f"Found {len(modified_files)} modified files, rebuilding cache..." + f"[ARCHIVED] Found {len(project_cache.sessions)} sessions in cache. Launching TUI..." ) else: - click.echo("Building session cache...") - - # Pre-build the cache before launching TUI (no HTML generation) - try: - ensure_fresh_cache(project_path, cache_manager, silent=True) - click.echo("Cache ready! Launching TUI...") - except Exception as e: - click.echo(f"Error building cache: {e}", err=True) + click.echo("Error: No cached sessions found for archived project", err=True) return None else: - click.echo( - f"Cache up to date. Found {len(project_cache.sessions)} sessions. Launching TUI..." - ) + jsonl_files = list(project_path.glob("*.jsonl")) + modified_files = cache_manager.get_modified_files(jsonl_files) + + if not (project_cache and project_cache.sessions and not modified_files): + # Need to rebuild cache + if modified_files: + click.echo( + f"Found {len(modified_files)} modified files, rebuilding cache..." + ) + else: + click.echo("Building session cache...") + + # Pre-build the cache before launching TUI (no HTML generation) + try: + ensure_fresh_cache(project_path, cache_manager, silent=True) + click.echo("Cache ready! Launching TUI...") + except Exception as e: + click.echo(f"Error building cache: {e}", err=True) + return None + else: + click.echo( + f"Cache up to date. Found {len(project_cache.sessions)} sessions. Launching TUI..." + ) # Small delay to let user see the message before TUI clears screen import time @@ -63,7 +107,7 @@ def _launch_tui_with_cache_check(project_path: Path) -> Optional[str]: from .tui import run_session_browser - result = run_session_browser(project_path) + result = run_session_browser(project_path, is_archived=is_archived) return result @@ -193,24 +237,23 @@ def _find_relative_matches( try: # Load cache to check for working directories cache_manager = CacheManager(project_dir, get_library_version()) - project_cache = cache_manager.get_cached_project_data() + working_directories = cache_manager.get_working_directories() # Build cache if needed - if not project_cache or not project_cache.working_directories: + if not working_directories: jsonl_files = list(project_dir.glob("*.jsonl")) if jsonl_files: try: convert_jsonl_to_html(project_dir, silent=True) - project_cache = cache_manager.get_cached_project_data() + working_directories = cache_manager.get_working_directories() except Exception as e: logging.warning( f"Failed to build cache for project {project_dir.name}: {e}" ) - project_cache = None - if project_cache and project_cache.working_directories: + if working_directories: # Check for relative matches - for cwd in project_cache.working_directories: + for cwd in working_directories: cwd_path = Path(cwd).resolve() if current_cwd_path.is_relative_to(cwd_path): relative_matches.append(project_dir) @@ -263,6 +306,17 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None: if all_projects: # Clear cache for all project directories click.echo("Clearing caches for all projects...") + + # Delete the SQLite cache database (respects CLAUDE_CODE_LOG_CACHE_PATH env var) + cache_db = get_cache_db_path(input_path) + if cache_db.exists(): + try: + cache_db.unlink() + click.echo(f" Deleted SQLite cache database: {cache_db}") + except Exception as e: + click.echo(f" Warning: Failed to delete cache database: {e}") + + # Also clean up old JSON cache directories (migration cleanup) project_dirs = [ d for d in input_path.iterdir() @@ -271,12 +325,16 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None: for project_dir in project_dirs: try: - cache_manager = CacheManager(project_dir, library_version) - cache_manager.clear_cache() - click.echo(f" Cleared cache for {project_dir.name}") + # Clean up old JSON cache directory if it exists + old_cache_dir = project_dir / "cache" + if old_cache_dir.exists(): + import shutil + + shutil.rmtree(old_cache_dir) + click.echo(f" Cleared old JSON cache for {project_dir.name}") except Exception as e: click.echo( - f" Warning: Failed to clear cache for {project_dir.name}: {e}" + f" Warning: Failed to clear old cache for {project_dir.name}: {e}" ) elif input_path.is_dir(): @@ -284,6 +342,14 @@ def _clear_caches(input_path: Path, all_projects: bool) -> None: click.echo(f"Clearing cache for {input_path}...") cache_manager = CacheManager(input_path, library_version) cache_manager.clear_cache() + + # Also clean up old JSON cache directory if it exists + old_cache_dir = input_path / "cache" + if old_cache_dir.exists(): + import shutil + + shutil.rmtree(old_cache_dir) + click.echo(" Cleared old JSON cache directory") else: # Single file - no cache to clear click.echo("Cache clearing not applicable for single files.") @@ -434,6 +500,12 @@ def _clear_output_files(input_path: Path, all_projects: bool, file_ext: str) -> default=None, help="Image export mode: placeholder (mark position), embedded (base64), referenced (PNG files). Default: embedded for HTML, referenced for Markdown.", ) +@click.option( + "--page-size", + type=int, + default=2000, + help="Maximum messages per page for combined transcript (default: 2000). Sessions are never split across pages.", +) @click.option( "--debug", is_flag=True, @@ -455,6 +527,7 @@ def main( projects_dir: Optional[Path], output_format: str, image_export_mode: Optional[str], + page_size: int, debug: bool, ) -> None: """Convert Claude transcript JSONL files to HTML or Markdown. @@ -482,11 +555,8 @@ def main( click.echo(f"Error: Projects directory not found: {input_path}") return - project_dirs = [ - d - for d in input_path.iterdir() - if d.is_dir() and list(d.glob("*.jsonl")) - ] + # Initial project discovery + project_dirs, archived_projects = _discover_projects(input_path) if not project_dirs: click.echo(f"No projects with JSONL files found in {input_path}") @@ -495,7 +565,7 @@ def main( # Try to find projects that match current working directory matching_projects = find_projects_by_cwd(input_path) - if len(project_dirs) == 1: + if len(project_dirs) == 1 and not archived_projects: # Only one project, open it directly result = _launch_tui_with_cache_check(project_dirs[0]) if result == "back_to_projects": @@ -503,14 +573,21 @@ def main( from .tui import run_project_selector while True: + # Re-discover projects (may have changed after restore) + project_dirs, archived_projects = _discover_projects( + input_path + ) selected_project = run_project_selector( - project_dirs, matching_projects + project_dirs, matching_projects, archived_projects ) if not selected_project: # User cancelled return - result = _launch_tui_with_cache_check(selected_project) + is_archived = selected_project in archived_projects + result = _launch_tui_with_cache_check( + selected_project, is_archived=is_archived + ) if result != "back_to_projects": # User quit normally return @@ -526,14 +603,21 @@ def main( from .tui import run_project_selector while True: + # Re-discover projects (may have changed after restore) + project_dirs, archived_projects = _discover_projects( + input_path + ) selected_project = run_project_selector( - project_dirs, matching_projects + project_dirs, matching_projects, archived_projects ) if not selected_project: # User cancelled return - result = _launch_tui_with_cache_check(selected_project) + is_archived = selected_project in archived_projects + result = _launch_tui_with_cache_check( + selected_project, is_archived=is_archived + ) if result != "back_to_projects": # User quit normally return @@ -543,14 +627,19 @@ def main( from .tui import run_project_selector while True: + # Re-discover projects each iteration (may have changed after restore) + project_dirs, archived_projects = _discover_projects(input_path) selected_project = run_project_selector( - project_dirs, matching_projects + project_dirs, matching_projects, archived_projects ) if not selected_project: # User cancelled return - result = _launch_tui_with_cache_check(selected_project) + is_archived = selected_project in archived_projects + result = _launch_tui_with_cache_check( + selected_project, is_archived=is_archived + ) if result != "back_to_projects": # User quit normally return @@ -595,6 +684,7 @@ def main( not no_individual_sessions, output_format, image_export_mode, + page_size=page_size, ) # Count processed projects @@ -646,6 +736,7 @@ def main( not no_individual_sessions, not no_cache, image_export_mode=image_export_mode, + page_size=page_size, ) if input_path.is_file(): click.echo(f"Successfully converted {input_path} to {output_path}") diff --git a/claude_code_log/converter.py b/claude_code_log/converter.py index b6175992..d05791a6 100644 --- a/claude_code_log/converter.py +++ b/claude_code_log/converter.py @@ -3,9 +3,10 @@ import json import re +from dataclasses import dataclass, field from pathlib import Path import traceback -from typing import Optional, Any, TYPE_CHECKING +from typing import Any, Dict, List, Optional, TYPE_CHECKING, cast import dateparser @@ -17,10 +18,14 @@ get_project_display_name, should_use_as_session_starter, create_session_preview, - extract_working_directories, get_warmup_session_ids, ) -from .cache import CacheManager, SessionCacheData, get_library_version +from .cache import ( + CacheManager, + SessionCacheData, + get_all_cached_projects, + get_library_version, +) from .parser import parse_timestamp from .factories import create_transcript_entry from .models import ( @@ -31,7 +36,7 @@ UserTranscriptEntry, ToolResultContent, ) -from .renderer import get_renderer +from .renderer import get_renderer, is_html_outdated def get_file_extension(format: str) -> str: @@ -149,7 +154,16 @@ def load_transcript( messages: list[TranscriptEntry] = [] agent_ids: set[str] = set() # Collect agentId references while parsing - with open(jsonl_path, "r", encoding="utf-8", errors="replace") as f: + try: + f = open(jsonl_path, "r", encoding="utf-8", errors="replace") + except FileNotFoundError: + # Handle race condition: file may have been deleted between glob and open + # (e.g., Claude Code session cleanup) + if not silent: + print(f"Warning: File not found (may have been deleted): {jsonl_path}") + return [] + + with f: if not silent: print(f"Processing {jsonl_path}...") for line_no, line in enumerate(f, 1): # Start counting from 1 @@ -176,7 +190,7 @@ def load_transcript( isinstance(tool_use_result, dict) and "agentId" in tool_use_result ): - agent_id_value = tool_use_result.get("agentId") # type: ignore[reportUnknownVariableType, reportUnknownMemberType] + agent_id_value = cast(Any, tool_use_result).get("agentId") if isinstance(agent_id_value, str): agent_ids.add(agent_id_value) # Copy agentId to top level for Pydantic to preserve @@ -198,13 +212,15 @@ def load_transcript( entry_type in [ "file-history-snapshot", # Internal Claude Code file backup metadata + "progress", # Real-time progress updates (hook_progress, bash_progress) ] ): # Silently skip internal message types we don't render pass else: + display_line = line[:1000] + "..." if len(line) > 1000 else line print( - f"Line {line_no} of {jsonl_path} is not a recognised message type: {line}" + f"Line {line_no} of {jsonl_path} is not a recognised message type: {display_line}" ) except json.JSONDecodeError as e: print( @@ -403,6 +419,450 @@ def deduplicate_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntr return deduplicated +@dataclass +class GenerationStats: + """Track statistics for HTML generation across a project.""" + + # Cache statistics + files_loaded_from_cache: int = 0 + files_updated: int = 0 + + # HTML generation statistics + sessions_total: int = 0 + sessions_regenerated: int = 0 + combined_regenerated: bool = False + + # Timing (seconds) + cache_time: float = 0.0 + render_time: float = 0.0 + total_time: float = 0.0 + + # Errors/warnings collected during processing + warnings: List[str] = field(default_factory=lambda: []) + errors: List[str] = field(default_factory=lambda: []) + + def add_warning(self, msg: str) -> None: + """Add a warning message.""" + self.warnings.append(msg) + + def add_error(self, msg: str) -> None: + """Add an error message.""" + self.errors.append(msg) + + def summary(self, project_name: str) -> str: + """Generate a concise summary line for this project.""" + parts: List[str] = [f"Project: {project_name}"] + + # Cache info + cache_parts: List[str] = [] + if self.files_loaded_from_cache > 0: + cache_parts.append(f"{self.files_loaded_from_cache} cached") + if self.files_updated > 0: + cache_parts.append(f"{self.files_updated} updated") + if cache_parts: + parts.append(f" Cache: {', '.join(cache_parts)}") + + # HTML info + html_parts: List[str] = [] + if self.sessions_total > 0: + html_parts.append( + f"{self.sessions_regenerated}/{self.sessions_total} sessions" + ) + if self.combined_regenerated: + html_parts.append("combined") + if html_parts: + parts.append(f" HTML: {', '.join(html_parts)} regenerated") + elif self.sessions_total > 0: + parts.append(" HTML: up to date") + + # Timing + if self.total_time > 0: + time_str = f" Time: {self.total_time:.1f}s" + if self.cache_time > 0 or self.render_time > 0: + time_str += ( + f" (cache: {self.cache_time:.1f}s, render: {self.render_time:.1f}s)" + ) + parts.append(time_str) + + return "\n".join(parts) + + +def _get_page_html_path(page_number: int) -> str: + """Get the HTML filename for a given page number. + + Page 1 is combined_transcripts.html, page 2+ are combined_transcripts_N.html + """ + if page_number == 1: + return "combined_transcripts.html" + return f"combined_transcripts_{page_number}.html" + + +# Regex pattern to match and update the next link marker block +_NEXT_LINK_PATTERN = re.compile( + r'(.*?class="page-nav-link next) last-page(".*?)', + re.DOTALL, +) + + +def _enable_next_link_on_previous_page(output_dir: Path, page_number: int) -> bool: + """Enable the next link on a previous page by removing the last-page class. + + When a new page is created, the previous page's "Next" link (which was hidden + with the last-page CSS class) needs to be revealed. This function performs + an in-place edit to remove that class. + + Args: + output_dir: Directory containing the HTML files + page_number: The page number whose next link should be enabled + + Returns: + True if the file was modified, False otherwise + """ + if page_number < 1: + return False + + page_path = output_dir / _get_page_html_path(page_number) + if not page_path.exists(): + return False + + content = page_path.read_text(encoding="utf-8") + + # Check if there's a last-page class to remove + if "last-page" not in content: + return False + + # Replace the pattern to remove last-page class + new_content, count = _NEXT_LINK_PATTERN.subn(r"\1\2", content) + + if count > 0: + page_path.write_text(new_content, encoding="utf-8") + return True + + return False + + +def _assign_sessions_to_pages( + sessions: Dict[str, SessionCacheData], page_size: int +) -> List[List[str]]: + """Assign sessions to pages, never splitting sessions across pages. + + Args: + sessions: Dict mapping session_id to SessionCacheData + page_size: Maximum messages per page (overflow allowed to keep sessions intact) + + Returns: + List of pages, each containing a list of session_ids + """ + pages: List[List[str]] = [] + current_page: List[str] = [] + current_count = 0 + + # Sort sessions chronologically by first_timestamp + sorted_sessions = sorted(sessions.values(), key=lambda s: s.first_timestamp or "") + + for session in sorted_sessions: + # Add session to current page (never split sessions) + current_page.append(session.session_id) + current_count += session.message_count + + # If page now exceeds limit, close it and start fresh + if current_count > page_size: + pages.append(current_page) + current_page = [] + current_count = 0 + + # Don't forget the last page + if current_page: + pages.append(current_page) + + return pages + + +def _build_session_data_from_messages( + messages: List[TranscriptEntry], +) -> Dict[str, SessionCacheData]: + """Build session data from messages when cache is unavailable. + + This is a fallback for pagination when get_cached_project_data() returns None. + + Args: + messages: All messages (deduplicated) + + Returns: + Dict mapping session_id to SessionCacheData + """ + from .parser import extract_text_content + + # Pre-compute warmup session IDs to filter them out + warmup_session_ids = get_warmup_session_ids(messages) + + # Group messages by session + sessions: Dict[str, Dict[str, Any]] = {} + for message in messages: + if not hasattr(message, "sessionId") or isinstance( + message, SummaryTranscriptEntry + ): + continue + + session_id = getattr(message, "sessionId", "") + if not session_id or session_id in warmup_session_ids: + continue + + if session_id not in sessions: + sessions[session_id] = { + "first_timestamp": getattr(message, "timestamp", ""), + "last_timestamp": getattr(message, "timestamp", ""), + "message_count": 0, + "first_user_message": "", + "total_input_tokens": 0, + "total_output_tokens": 0, + "total_cache_creation_tokens": 0, + "total_cache_read_tokens": 0, + } + + sessions[session_id]["message_count"] += 1 + current_timestamp = getattr(message, "timestamp", "") + if current_timestamp: + sessions[session_id]["last_timestamp"] = current_timestamp + + # Get first user message for preview + if ( + isinstance(message, UserTranscriptEntry) + and not sessions[session_id]["first_user_message"] + and hasattr(message, "message") + ): + first_user_content = extract_text_content(message.message.content) + if should_use_as_session_starter(first_user_content): + sessions[session_id]["first_user_message"] = create_session_preview( + first_user_content + ) + + # Extract token usage from assistant messages + if isinstance(message, AssistantTranscriptEntry) and hasattr( + message, "message" + ): + msg_data = message.message + if hasattr(msg_data, "usage") and msg_data.usage: + usage = msg_data.usage + sessions[session_id]["total_input_tokens"] += ( + getattr(usage, "input_tokens", 0) or 0 + ) + sessions[session_id]["total_output_tokens"] += ( + getattr(usage, "output_tokens", 0) or 0 + ) + sessions[session_id]["total_cache_creation_tokens"] += ( + getattr(usage, "cache_creation_input_tokens", 0) or 0 + ) + sessions[session_id]["total_cache_read_tokens"] += ( + getattr(usage, "cache_read_input_tokens", 0) or 0 + ) + + # Convert to Dict[str, SessionCacheData] + result: Dict[str, SessionCacheData] = {} + for session_id, data in sessions.items(): + result[session_id] = SessionCacheData( + session_id=session_id, + first_timestamp=data["first_timestamp"], + last_timestamp=data["last_timestamp"], + message_count=data["message_count"], + first_user_message=data["first_user_message"], + total_input_tokens=data["total_input_tokens"], + total_output_tokens=data["total_output_tokens"], + total_cache_creation_tokens=data["total_cache_creation_tokens"], + total_cache_read_tokens=data["total_cache_read_tokens"], + ) + + return result + + +def _generate_paginated_html( + messages: List[TranscriptEntry], + output_dir: Path, + title: str, + page_size: int, + cache_manager: "CacheManager", + session_data: Dict[str, SessionCacheData], + working_directories: List[str], + silent: bool = False, +) -> Path: + """Generate paginated HTML files for combined transcript. + + Args: + messages: All messages (deduplicated) + output_dir: Directory to write HTML files + title: Base title for the pages + page_size: Maximum messages per page + cache_manager: Cache manager for the project + session_data: Session metadata from cache + working_directories: Working directories for project display name + silent: Suppress verbose output + + Returns: + Path to the first page (combined_transcripts.html) + """ + from .html.renderer import generate_html + from .utils import format_timestamp + + # Check if page size changed - if so, invalidate all pages + cached_page_size = cache_manager.get_page_size_config() + if cached_page_size is not None and cached_page_size != page_size: + if not silent: + print( + f"Page size changed from {cached_page_size} to {page_size}, regenerating all pages" + ) + old_paths = cache_manager.invalidate_all_pages() + # Delete old page files + for html_path in old_paths: + page_file = output_dir / html_path + if page_file.exists(): + page_file.unlink() + + # Assign sessions to pages + pages: List[List[str]] = _assign_sessions_to_pages(session_data, page_size) + + if not pages: + # No sessions, generate empty page + pages = [[]] + + # Clean up orphan pages if page count decreased + old_page_count = cache_manager.get_page_count() + new_page_count = len(pages) + if old_page_count > new_page_count: + for orphan_page_num in range(new_page_count + 1, old_page_count + 1): + orphan_path = output_dir / _get_page_html_path(orphan_page_num) + if orphan_path.exists(): + orphan_path.unlink() + + # Group messages by session for fast lookup + messages_by_session: Dict[str, List[TranscriptEntry]] = {} + for msg in messages: + session_id = getattr(msg, "sessionId", None) + if session_id: + if session_id not in messages_by_session: + messages_by_session[session_id] = [] + messages_by_session[session_id].append(msg) + + first_page_path = output_dir / _get_page_html_path(1) + + # Generate each page + for page_num, page_session_ids in enumerate(pages, start=1): + html_path = _get_page_html_path(page_num) + page_file = output_dir / html_path + + # Check if page is stale + is_stale, reason = cache_manager.is_page_stale(page_num, page_size) + + if not is_stale and page_file.exists(): + if not silent: + print(f"Page {page_num} is current, skipping regeneration") + continue + + if not silent: + print(f"Generating page {page_num} ({reason})...") + + # Collect messages for this page + page_messages: List[TranscriptEntry] = [] + for session_id in page_session_ids: + if session_id in messages_by_session: + page_messages.extend(messages_by_session[session_id]) + + # Calculate page stats + page_message_count = len(page_messages) + first_timestamp = None + last_timestamp = None + total_input_tokens = 0 + total_output_tokens = 0 + total_cache_creation_tokens = 0 + total_cache_read_tokens = 0 + + for session_id in page_session_ids: + if session_id in session_data: + s = session_data[session_id] + if s.first_timestamp and ( + first_timestamp is None or s.first_timestamp < first_timestamp + ): + first_timestamp = s.first_timestamp + if s.last_timestamp and ( + last_timestamp is None or s.last_timestamp > last_timestamp + ): + last_timestamp = s.last_timestamp + total_input_tokens += s.total_input_tokens + total_output_tokens += s.total_output_tokens + total_cache_creation_tokens += s.total_cache_creation_tokens + total_cache_read_tokens += s.total_cache_read_tokens + + # Build page_info for navigation + has_prev = page_num > 1 + is_last_page = page_num == len(pages) + + page_info = { + "page_number": page_num, + "prev_link": _get_page_html_path(page_num - 1) if has_prev else None, + "next_link": _get_page_html_path(page_num + 1), # Always provide + "is_last_page": is_last_page, + } + + # Enable previous page's next link when creating a new page + if page_num > 1: + _enable_next_link_on_previous_page(output_dir, page_num - 1) + + # Build page_stats + date_range = "" + if first_timestamp and last_timestamp: + first_fmt = format_timestamp(first_timestamp) + last_fmt = format_timestamp(last_timestamp) + if first_fmt == last_fmt: + date_range = first_fmt + else: + date_range = f"{first_fmt} - {last_fmt}" + elif first_timestamp: + date_range = format_timestamp(first_timestamp) + + token_parts: List[str] = [] + if total_input_tokens: + token_parts.append(f"Input: {total_input_tokens:,}") + if total_output_tokens: + token_parts.append(f"Output: {total_output_tokens:,}") + if total_cache_creation_tokens: + token_parts.append(f"Cache Create: {total_cache_creation_tokens:,}") + if total_cache_read_tokens: + token_parts.append(f"Cache Read: {total_cache_read_tokens:,}") + token_summary = " | ".join(token_parts) if token_parts else None + + page_stats = { + "message_count": page_message_count, + "date_range": date_range, + "token_summary": token_summary, + } + + # Generate HTML for this page + page_title = f"{title} - Page {page_num}" if page_num > 1 else title + html_content = generate_html( + page_messages, + page_title, + page_info=page_info, + page_stats=page_stats, + ) + page_file.write_text(html_content, encoding="utf-8") + + # Update cache + cache_manager.update_page_cache( + page_number=page_num, + html_path=html_path, + page_size_config=page_size, + session_ids=page_session_ids, + message_count=page_message_count, + first_timestamp=first_timestamp, + last_timestamp=last_timestamp, + total_input_tokens=total_input_tokens, + total_output_tokens=total_output_tokens, + total_cache_creation_tokens=total_cache_creation_tokens, + total_cache_read_tokens=total_cache_read_tokens, + ) + + return first_page_path + + def convert_jsonl_to_html( input_path: Path, output_path: Optional[Path] = None, @@ -411,6 +871,7 @@ def convert_jsonl_to_html( generate_individual_sessions: bool = True, use_cache: bool = True, silent: bool = False, + page_size: int = 2000, ) -> Path: """Convert JSONL transcript(s) to HTML file(s). @@ -425,6 +886,7 @@ def convert_jsonl_to_html( generate_individual_sessions, use_cache, silent, + page_size=page_size, ) @@ -438,6 +900,7 @@ def convert_jsonl_to( use_cache: bool = True, silent: bool = False, image_export_mode: Optional[str] = None, + page_size: int = 2000, ) -> Path: """Convert JSONL transcript(s) to the specified format. @@ -451,6 +914,7 @@ def convert_jsonl_to( use_cache: Whether to use caching. silent: Whether to suppress output. image_export_mode: Image export mode ("placeholder", "embedded", "referenced"). + page_size: Maximum messages per page for combined transcript pagination. If None, uses format default (embedded for HTML, referenced for Markdown). """ if not input_path.exists(): @@ -466,6 +930,10 @@ def convert_jsonl_to( print(f"Warning: Failed to initialize cache manager: {e}") ext = get_file_extension(format) + + # Initialize working_directories for both branches (used by pagination in directory mode) + working_directories: List[str] = [] + if input_path.is_file(): # Single file mode - cache only available for directory mode if output_path is None: @@ -483,13 +951,37 @@ def convert_jsonl_to( input_path, cache_manager, from_date, to_date, silent ) + # Phase 1b: Early exit if nothing needs regeneration + # Skip expensive message loading if all HTML is up to date + if ( + cache_manager is not None + and not cache_was_updated + and from_date is None + and to_date is None + ): + # Check if combined HTML is stale + combined_stale, _ = cache_manager.is_html_stale(output_path.name, None) + if not combined_stale and not is_html_outdated(output_path): + # Check if any session HTML is stale + stale_sessions = cache_manager.get_stale_sessions() + if not stale_sessions or not generate_individual_sessions: + # Nothing needs regeneration - skip loading + if not silent: + print( + f"All HTML files are current for {input_path.name}, " + "skipping regeneration" + ) + return output_path + # Phase 2: Load messages (will use fresh cache when available) messages = load_directory_transcripts( input_path, cache_manager, from_date, to_date, silent ) - # Extract working directories directly from parsed messages - working_directories = extract_working_directories(messages) + # Get working directories from cache + working_directories = ( + cache_manager.get_working_directories() if cache_manager else [] + ) project_title = get_project_display_name(input_path.name, working_directories) title = f"Claude Transcripts - {project_title}" @@ -513,26 +1005,91 @@ def convert_jsonl_to( # Generate combined output file (check if regeneration needed) assert output_path is not None renderer = get_renderer(format, image_export_mode) - should_regenerate = ( - renderer.is_outdated(output_path) - or from_date is not None - or to_date is not None - or not output_path.exists() - or ( - input_path.is_dir() and cache_was_updated - ) # Regenerate if JSONL files changed - ) - if should_regenerate: - # For referenced images, pass the output directory - output_dir = output_path.parent - content = renderer.generate(messages, title, output_dir=output_dir) - assert content is not None - output_path.write_text(content, encoding="utf-8") - else: - print( - f"{format.upper()} file {output_path.name} is current, skipping regeneration" + # Decide whether to use pagination (HTML only, directory mode, no date filter) + use_pagination = False + cached_data = cache_manager.get_cached_project_data() if cache_manager else None + total_message_count = ( + cached_data.total_message_count if cached_data else len(messages) + ) + existing_page_count = cache_manager.get_page_count() if cache_manager else 0 + + if ( + format == "html" + and cache_manager is not None + and input_path.is_dir() + and from_date is None + and to_date is None + ): + # Use pagination if total messages exceed page_size or there are existing pages + use_pagination = total_message_count > page_size or existing_page_count > 1 + + if use_pagination: + # Use paginated HTML generation + assert cache_manager is not None # Ensured by use_pagination condition + # Use cached session data if available, otherwise build from messages + if cached_data is not None: + warmup_session_ids = get_warmup_session_ids(messages) + current_session_ids: set[str] = set() + for message in messages: + session_id = getattr(message, "sessionId", "") + if session_id and session_id not in warmup_session_ids: + current_session_ids.add(session_id) + session_data = { + session_id: session_cache + for session_id, session_cache in cached_data.sessions.items() + if session_id in current_session_ids + } + else: + session_data = _build_session_data_from_messages(messages) + output_path = _generate_paginated_html( + messages, + input_path, + title, + page_size, + cache_manager, + session_data, + working_directories, + silent=silent, ) + else: + # Use single-file generation for small projects or filtered views + # Use incremental regeneration via html_cache when available + if cache_manager is not None and input_path.is_dir(): + is_stale, _reason = cache_manager.is_html_stale(output_path.name, None) + should_regenerate = ( + is_stale + or renderer.is_outdated(output_path) + or from_date is not None + or to_date is not None + or not output_path.exists() + ) + else: + # Fallback: old logic for single file mode or no cache + should_regenerate = ( + renderer.is_outdated(output_path) + or from_date is not None + or to_date is not None + or not output_path.exists() + or (input_path.is_dir() and cache_was_updated) + ) + + if should_regenerate: + # For referenced images, pass the output directory + output_dir = output_path.parent + content = renderer.generate(messages, title, output_dir=output_dir) + assert content is not None + output_path.write_text(content, encoding="utf-8") + + # Update html_cache for combined transcript (HTML only) + if format == "html" and cache_manager is not None: + cache_manager.update_html_cache( + output_path.name, None, total_message_count + ) + elif not silent: + print( + f"{format.upper()} file {output_path.name} is current, skipping regeneration" + ) # Generate individual session files if requested and in directory mode if generate_individual_sessions and input_path.is_dir(): @@ -545,11 +1102,45 @@ def convert_jsonl_to( cache_manager, cache_was_updated, image_export_mode, + silent=silent, ) return output_path +def has_cache_changes( + project_dir: Path, + cache_manager: Optional[CacheManager], + from_date: Optional[str] = None, + to_date: Optional[str] = None, +) -> bool: + """Check if cache needs updating (fast mtime comparison only). + + Returns True if there are modified files or cache is stale. + Does NOT load any messages - that's deferred to ensure_fresh_cache. + """ + if cache_manager is None: + return True # No cache means we need to process + + jsonl_files = list(project_dir.glob("*.jsonl")) + if not jsonl_files: + return False + + # Get cached project data + cached_project_data = cache_manager.get_cached_project_data() + + # Check various invalidation conditions + modified_files = cache_manager.get_modified_files(jsonl_files) + + return ( + cached_project_data is None + or from_date is not None + or to_date is not None + or bool(modified_files) + or (cached_project_data.total_message_count == 0 and bool(jsonl_files)) + ) + + def ensure_fresh_cache( project_dir: Path, cache_manager: Optional[CacheManager], @@ -557,7 +1148,11 @@ def ensure_fresh_cache( to_date: Optional[str] = None, silent: bool = False, ) -> bool: - """Ensure cache is fresh and populated. Returns True if cache was updated.""" + """Ensure cache is fresh and populated. Returns True if cache was updated. + + This does the heavy lifting of loading and parsing files. + Call has_cache_changes() first for a fast check. + """ if cache_manager is None: return False @@ -744,11 +1339,6 @@ def _update_cache_with_session_data( # Update cache with filtered session data cache_manager.update_session_cache(sessions_cache_data) - # Update cache with working directories (from filtered sessions) - cache_manager.update_working_directories( - extract_working_directories(list(sessions_cache_data.values())) - ) - # Update cache with project aggregates cache_manager.update_project_aggregates( total_message_count=total_message_count, @@ -874,8 +1464,13 @@ def _generate_individual_session_files( cache_manager: Optional["CacheManager"] = None, cache_was_updated: bool = False, image_export_mode: Optional[str] = None, -) -> None: - """Generate individual files for each session in the specified format.""" + silent: bool = False, +) -> int: + """Generate individual files for each session in the specified format. + + Returns: + Number of sessions regenerated + """ ext = get_file_extension(format) # Pre-compute warmup sessions to exclude them warmup_session_ids = get_warmup_session_ids(messages) @@ -890,19 +1485,23 @@ def _generate_individual_session_files( # Get session data from cache for better titles session_data: dict[str, Any] = {} - working_directories = None + working_directories: list[str] = [] if cache_manager is not None: project_cache = cache_manager.get_cached_project_data() if project_cache: session_data = {s.session_id: s for s in project_cache.sessions.values()} - # Get working directories for project title - if project_cache.working_directories: - working_directories = project_cache.working_directories + # Get working directories for project title + working_directories = cache_manager.get_working_directories() + + # Only generate HTML for sessions that are tracked in the sessions table + # (filters out warmup-only and sessions without user messages) + session_ids = session_ids & set(session_data.keys()) project_title = get_project_display_name(output_dir.name, working_directories) # Get renderer once outside the loop renderer = get_renderer(format, image_export_mode) + regenerated_count = 0 # Generate HTML file for each session for session_id in session_ids: @@ -937,15 +1536,29 @@ def _generate_individual_session_files( # Check if session file needs regeneration session_file_path = output_dir / f"session-{session_id}.{ext}" + session_file_name = f"session-{session_id}.{ext}" - # Only regenerate if outdated, doesn't exist, or date filtering is active - should_regenerate_session = ( - renderer.is_outdated(session_file_path) - or from_date is not None - or to_date is not None - or not session_file_path.exists() - or cache_was_updated # Regenerate if JSONL files changed - ) + # Use incremental regeneration: check per-session staleness via html_cache + if cache_manager is not None and format == "html": + is_stale, _reason = cache_manager.is_html_stale( + session_file_name, session_id + ) + should_regenerate_session = ( + is_stale + or renderer.is_outdated(session_file_path) + or from_date is not None + or to_date is not None + or not session_file_path.exists() + ) + else: + # Fallback without cache or non-HTML formats + should_regenerate_session = ( + renderer.is_outdated(session_file_path) + or from_date is not None + or to_date is not None + or not session_file_path.exists() + or cache_was_updated + ) if should_regenerate_session: # Generate session content @@ -955,11 +1568,75 @@ def _generate_individual_session_files( assert session_content is not None # Write session file session_file_path.write_text(session_content, encoding="utf-8") - else: + regenerated_count += 1 + + # Update html_cache to track this generation (HTML only) + if cache_manager is not None and format == "html": + # Use message count from cache (pre-deduplication) to match + # the count used in is_html_stale() + if session_id in session_data: + session_message_count = session_data[session_id].message_count + else: + # Fallback: count from messages list (less accurate due to dedup) + session_message_count = sum( + 1 + for m in messages + if hasattr(m, "sessionId") + and getattr(m, "sessionId") == session_id + ) + cache_manager.update_html_cache( + session_file_name, session_id, session_message_count + ) + elif not silent: print( f"Session file {session_file_path.name} is current, skipping regeneration" ) + return regenerated_count + + +def _get_cleanup_period_days() -> Optional[int]: + """Read cleanupPeriodDays from Claude Code settings. + + Checks ~/.claude/settings.json for the cleanupPeriodDays setting. + + Returns: + The configured cleanup period in days, or None if not set/readable. + """ + import json + + settings_path = Path.home() / ".claude" / "settings.json" + if not settings_path.exists(): + return None + + try: + with open(settings_path, "r", encoding="utf-8") as f: + settings = json.load(f) + return settings.get("cleanupPeriodDays") + except (json.JSONDecodeError, OSError): + return None + + +def _print_archived_sessions_note(total_archived: int) -> None: + """Print a note about archived sessions and how to restore them. + + Args: + total_archived: Total number of archived sessions across all projects. + """ + cleanup_days = _get_cleanup_period_days() + cleanup_info = ( + f" (cleanupPeriodDays: {cleanup_days})" + if cleanup_days is not None + else " (cleanupPeriodDays: 30 default)" + ) + + print( + f"\nNote: {total_archived} archived session(s) found{cleanup_info}.\n" + " These sessions were cached before their JSONL files were deleted.\n" + " To restore them or adjust cleanup settings, see:\n" + " https://github.com/daaain/claude-code-log/blob/main/dev-docs/restoring-archived-sessions.md" + ) + def process_projects_hierarchy( projects_path: Path, @@ -969,8 +1646,26 @@ def process_projects_hierarchy( generate_individual_sessions: bool = True, output_format: str = "html", image_export_mode: Optional[str] = None, + silent: bool = True, + page_size: int = 2000, ) -> Path: - """Process the entire ~/.claude/projects/ hierarchy and create linked output files.""" + """Process the entire ~/.claude/projects/ hierarchy and create linked output files. + + Args: + projects_path: Path to the projects directory + from_date: Optional date filter start + to_date: Optional date filter end + use_cache: Whether to use SQLite cache + generate_individual_sessions: Whether to generate per-session HTML files + output_format: Output format (html, md, markdown) + image_export_mode: Image export mode for markdown + silent: If True, suppress verbose per-file logging (show summary only) + page_size: Maximum messages per page for combined transcript pagination + """ + import time + + start_time = time.time() + if not projects_path.exists(): raise FileNotFoundError(f"Projects path not found: {projects_path}") @@ -980,7 +1675,16 @@ def process_projects_hierarchy( if child.is_dir() and list(child.glob("*.jsonl")): project_dirs.append(child) - if not project_dirs: + # Find archived projects (projects in cache but without JSONL files) + archived_project_dirs: list[Path] = [] + if use_cache: + cached_projects = get_all_cached_projects(projects_path) + active_project_paths = {str(p) for p in project_dirs} + for project_path_str, is_archived in cached_projects: + if is_archived and project_path_str not in active_project_paths: + archived_project_dirs.append(Path(project_path_str)) + + if not project_dirs and not archived_project_dirs: raise FileNotFoundError( f"No project directories with JSONL files found in {projects_path}" ) @@ -991,7 +1695,20 @@ def process_projects_hierarchy( # Process each project directory project_summaries: list[dict[str, Any]] = [] any_cache_updated = False # Track if any project had cache updates + + # Aggregated stats + total_projects = len(project_dirs) + projects_with_updates = 0 + total_sessions = 0 + total_archived = 0 + + # Per-project stats for summary output + project_stats: List[tuple[str, GenerationStats]] = [] + for project_dir in sorted(project_dirs): + project_start_time = time.time() + stats = GenerationStats() + try: # Initialize cache manager for this project cache_manager = None @@ -999,27 +1716,109 @@ def process_projects_hierarchy( try: cache_manager = CacheManager(project_dir, library_version) except Exception as e: - print(f"Warning: Failed to initialize cache for {project_dir}: {e}") + stats.add_warning(f"Failed to initialize cache: {e}") - # Phase 1: Ensure cache is fresh and populated - cache_was_updated = ensure_fresh_cache( - project_dir, cache_manager, from_date, to_date + # Phase 1: Fast check if anything needs updating (mtime comparison only) + # Exclude agent files - they are loaded via session references, not directly + jsonl_files = [ + f + for f in project_dir.glob("*.jsonl") + if not f.name.startswith("agent-") + ] + # Valid session IDs are from existing JSONL files (file stem = session ID) + valid_session_ids = {f.stem for f in jsonl_files} + modified_files = ( + cache_manager.get_modified_files(jsonl_files) if cache_manager else [] + ) + # Pass valid_session_ids to skip archived sessions (JSONL deleted) + stale_sessions = ( + cache_manager.get_stale_sessions(valid_session_ids) + if cache_manager + else [] ) - if cache_was_updated: - any_cache_updated = True - - # Phase 2: Generate output for this project (optionally individual session files) - output_path = convert_jsonl_to( - output_format, - project_dir, - None, - from_date, - to_date, - generate_individual_sessions, - use_cache, - image_export_mode=image_export_mode, + # Count archived sessions (cached but JSONL deleted) + archived_count = ( + cache_manager.get_archived_session_count(valid_session_ids) + if cache_manager + else 0 + ) + total_archived += archived_count + output_path = project_dir / "combined_transcripts.html" + # Check combined_stale using the appropriate cache: + # - Paginated projects store data in html_pages table (via save_page_cache) + # - Non-paginated projects store data in html_cache table (via update_html_cache) + if cache_manager is not None: + existing_page_count = cache_manager.get_page_count() + if existing_page_count > 0: + # Paginated project: check page 1 staleness + combined_stale = cache_manager.is_page_stale(1, page_size)[0] + else: + # Non-paginated project: check html_cache + combined_stale = cache_manager.is_html_stale( + output_path.name, None + )[0] + else: + combined_stale = True + + # Determine if we need to do any work + needs_work = ( + bool(modified_files) + or bool(stale_sessions) + or combined_stale + or not output_path.exists() ) + # Build archived suffix for output (shown on both cached and work paths) + archived_suffix = ( + f", {archived_count} archived" if archived_count > 0 else "" + ) + + if not needs_work: + # Fast path: nothing to do, just collect stats for index + stats.files_loaded_from_cache = len(jsonl_files) + stats.total_time = time.time() - project_start_time + # Show progress + print( + f" {project_dir.name}: cached{archived_suffix} ({stats.total_time:.1f}s)" + ) + else: + # Slow path: update cache and regenerate output + stats.files_updated = len(modified_files) if modified_files else 0 + stats.files_loaded_from_cache = len(jsonl_files) - stats.files_updated + stats.sessions_regenerated = len(stale_sessions) + + # Track if cache was updated (for index regeneration) + if modified_files: + any_cache_updated = True + projects_with_updates += 1 + + # Generate output for this project (handles cache updates internally) + output_path = convert_jsonl_to( + output_format, + project_dir, + None, + from_date, + to_date, + generate_individual_sessions, + use_cache, + silent=silent, + image_export_mode=image_export_mode, + page_size=page_size, + ) + + # Track timing + stats.total_time = time.time() - project_start_time + # Show progress + progress_parts: List[str] = [] + if stats.files_updated > 0: + progress_parts.append(f"{stats.files_updated} files updated") + if stats.sessions_regenerated > 0: + progress_parts.append(f"{stats.sessions_regenerated} sessions") + detail = ", ".join(progress_parts) if progress_parts else "regenerated" + print( + f" {project_dir.name}: {detail}{archived_suffix} ({stats.total_time:.1f}s)" + ) + # Get project info for index - use cached data if available # Exclude agent files (they are loaded via session references) jsonl_files = [ @@ -1036,6 +1835,8 @@ def process_projects_hierarchy( if cache_manager is not None: cached_project_data = cache_manager.get_cached_project_data() if cached_project_data is not None: + # Track total sessions for stats + stats.sessions_total = len(cached_project_data.sessions) # Use cached aggregation data project_summaries.append( { @@ -1051,7 +1852,8 @@ def process_projects_hierarchy( "total_cache_read_tokens": cached_project_data.total_cache_read_tokens, "latest_timestamp": cached_project_data.latest_timestamp, "earliest_timestamp": cached_project_data.earliest_timestamp, - "working_directories": cached_project_data.working_directories, + "working_directories": cache_manager.get_working_directories(), + "is_archived": False, "sessions": [ { "id": session_data.session_id, @@ -1073,6 +1875,8 @@ def process_projects_hierarchy( ], } ) + # Add project stats + project_stats.append((project_dir.name, stats)) continue # Fallback for when cache is not available (should be rare) @@ -1080,8 +1884,11 @@ def process_projects_hierarchy( f"Warning: No cached data available for {project_dir.name}, using fallback processing" ) messages = load_directory_transcripts( - project_dir, cache_manager, from_date, to_date + project_dir, cache_manager, from_date, to_date, silent=silent ) + # Ensure cache is populated with session data (including working directories) + if cache_manager: + _update_cache_with_session_data(cache_manager, messages) if from_date or to_date: messages = filter_messages_by_date(messages, from_date, to_date) @@ -1153,12 +1960,21 @@ def process_projects_hierarchy( "total_cache_read_tokens": total_cache_read_tokens, "latest_timestamp": latest_timestamp, "earliest_timestamp": earliest_timestamp, - "working_directories": extract_working_directories(messages), + "working_directories": cache_manager.get_working_directories() + if cache_manager + else [], + "is_archived": False, "sessions": sessions_data, } ) + # Track session count in stats for fallback path + stats.sessions_total = len(sessions_data) + project_stats.append((project_dir.name, stats)) + except Exception as e: prev_project = project_summaries[-1] if project_summaries else "(none)" + stats.add_error(str(e)) + project_stats.append((project_dir.name, stats)) print( f"Warning: Failed to process {project_dir}: {e}\n" f"Previous (in alphabetical order) project before error: {prev_project}" @@ -1166,17 +1982,106 @@ def process_projects_hierarchy( ) continue + # Process archived projects (projects in cache but without JSONL files) + archived_project_count = 0 + for archived_dir in sorted(archived_project_dirs): + try: + # Initialize cache manager for archived project + cache_manager = CacheManager(archived_dir, library_version) + cached_project_data = cache_manager.get_cached_project_data() + + if cached_project_data is None: + continue + + archived_project_count += 1 + print( + f" {archived_dir.name}: [ARCHIVED] ({len(cached_project_data.sessions)} sessions)" + ) + + # Add archived project to summaries + project_summaries.append( + { + "name": archived_dir.name, + "path": archived_dir, + "html_file": f"{archived_dir.name}/combined_transcripts.html", + "jsonl_count": 0, + "message_count": cached_project_data.total_message_count, + "last_modified": 0.0, + "total_input_tokens": cached_project_data.total_input_tokens, + "total_output_tokens": cached_project_data.total_output_tokens, + "total_cache_creation_tokens": cached_project_data.total_cache_creation_tokens, + "total_cache_read_tokens": cached_project_data.total_cache_read_tokens, + "latest_timestamp": cached_project_data.latest_timestamp, + "earliest_timestamp": cached_project_data.earliest_timestamp, + "working_directories": cache_manager.get_working_directories(), + "is_archived": True, + "sessions": [ + { + "id": session_data.session_id, + "summary": session_data.summary, + "timestamp_range": format_timestamp_range( + session_data.first_timestamp, + session_data.last_timestamp, + ), + "first_timestamp": session_data.first_timestamp, + "last_timestamp": session_data.last_timestamp, + "message_count": session_data.message_count, + "first_user_message": session_data.first_user_message + or "[No user message found in session.]", + } + for session_data in cached_project_data.sessions.values() + if session_data.first_user_message + and session_data.first_user_message != "Warmup" + ], + } + ) + except Exception as e: + print(f"Warning: Failed to process archived project {archived_dir}: {e}") + continue + + # Update total projects count to include archived + total_projects = len(project_dirs) + archived_project_count + # Generate index (always regenerate if outdated) ext = get_file_extension(output_format) index_path = projects_path / f"index.{ext}" renderer = get_renderer(output_format, image_export_mode) + index_regenerated = False if renderer.is_outdated(index_path) or from_date or to_date or any_cache_updated: index_content = renderer.generate_projects_index( project_summaries, from_date, to_date ) assert index_content is not None index_path.write_text(index_content, encoding="utf-8") - else: + index_regenerated = True + elif not silent: print(f"Index {ext.upper()} is current, skipping regeneration") + # Count total sessions from project summaries + for summary in project_summaries: + total_sessions += len(summary.get("sessions", [])) + + # Print summary + elapsed = time.time() - start_time + + # Print any errors/warnings that occurred + for project_name, stats in project_stats: + for warning in stats.warnings: + print(f" Warning ({project_name}): {warning}") + for error in stats.errors: + print(f" Error ({project_name}): {error}") + + # Global summary + summary_parts: List[str] = [] + summary_parts.append(f"Processed {total_projects} projects in {elapsed:.1f}s") + if projects_with_updates > 0: + summary_parts.append(f" {projects_with_updates} projects updated") + if index_regenerated: + summary_parts.append(" Index regenerated") + print("\n".join(summary_parts)) + + # Show archived sessions note if any exist + if total_archived > 0: + _print_archived_sessions_note(total_archived) + return index_path diff --git a/claude_code_log/factories/user_factory.py b/claude_code_log/factories/user_factory.py index f14a4e4d..02cb01fa 100644 --- a/claude_code_log/factories/user_factory.py +++ b/claude_code_log/factories/user_factory.py @@ -445,7 +445,7 @@ def create_user_message( for item in content_list: # Check for text content if hasattr(item, "text"): - item_text: str = getattr(item, "text") # type: ignore[assignment] + item_text: str = getattr(item, "text") if ide_content := create_ide_notification_content(item_text): # Add IDE notification item first @@ -462,6 +462,6 @@ def create_user_message( items.append(item) elif hasattr(item, "source") and getattr(item, "type", None) == "image": # Duck-typed image content - convert to our Pydantic model - items.append(ImageContent.model_validate(item.model_dump())) # type: ignore[union-attr] + items.append(ImageContent.model_validate(item.model_dump())) return UserTextMessage(items=items, meta=meta) diff --git a/claude_code_log/html/renderer.py b/claude_code_log/html/renderer.py index 747671b0..0a47375f 100644 --- a/claude_code_log/html/renderer.py +++ b/claude_code_log/html/renderer.py @@ -478,8 +478,19 @@ def generate( title: Optional[str] = None, combined_transcript_link: Optional[str] = None, output_dir: Optional[Path] = None, + page_info: Optional[dict[str, Any]] = None, + page_stats: Optional[dict[str, Any]] = None, ) -> str: - """Generate HTML from transcript messages.""" + """Generate HTML from transcript messages. + + Args: + messages: List of transcript entries to render. + title: Optional title for the output. + combined_transcript_link: Optional link to combined transcript. + output_dir: Optional output directory for referenced images. + page_info: Optional pagination info (page_number, prev_link, next_link). + page_stats: Optional page statistics (message_count, date_range, token_summary). + """ import time t_start = time.time() @@ -516,6 +527,8 @@ def generate( css_class_from_message=css_class_from_message, get_message_emoji=get_message_emoji, is_session_header=is_session_header, + page_info=page_info, + page_stats=page_stats, ) ) @@ -592,12 +605,27 @@ def generate_html( messages: list[TranscriptEntry], title: Optional[str] = None, combined_transcript_link: Optional[str] = None, + page_info: Optional[dict[str, Any]] = None, + page_stats: Optional[dict[str, Any]] = None, ) -> str: """Generate HTML from transcript messages using Jinja2 templates. This is a convenience function that delegates to HtmlRenderer.generate. + + Args: + messages: List of transcript entries to render. + title: Optional title for the output. + combined_transcript_link: Optional link to combined transcript. + page_info: Optional pagination info (page_number, prev_link, next_link). + page_stats: Optional page statistics (message_count, date_range, token_summary). """ - return HtmlRenderer().generate(messages, title, combined_transcript_link) + return HtmlRenderer().generate( + messages, + title, + combined_transcript_link, + page_info=page_info, + page_stats=page_stats, + ) def generate_session_html( diff --git a/claude_code_log/html/renderer_code.py b/claude_code_log/html/renderer_code.py index 7a633d45..8878d82b 100644 --- a/claude_code_log/html/renderer_code.py +++ b/claude_code_log/html/renderer_code.py @@ -12,10 +12,12 @@ import re from typing import Callable, Optional -from pygments import highlight # type: ignore[reportUnknownVariableType] -from pygments.lexers import TextLexer, get_lexer_by_name, get_all_lexers # type: ignore[reportUnknownVariableType] -from pygments.formatters import HtmlFormatter # type: ignore[reportUnknownVariableType] -from pygments.util import ClassNotFound # type: ignore[reportUnknownVariableType] +from pygments import highlight +from pygments.lexer import Lexer +from pygments.lexers import TextLexer, get_lexer_by_name, get_all_lexers +from pygments.formatter import Formatter +from pygments.formatters import HtmlFormatter +from pygments.util import ClassNotFound from ..renderer_timings import timing_stat @@ -49,7 +51,7 @@ def _init_lexer_caches() -> tuple[dict[str, str], dict[str, str]]: extension_cache: dict[str, str] = {} # Use public API: get_all_lexers() returns (name, aliases, patterns, mimetypes) tuples - for name, aliases, patterns, mimetypes in get_all_lexers(): # type: ignore[reportUnknownVariableType] + for _name, aliases, patterns, _mimetypes in get_all_lexers(): if aliases and patterns: # Use first alias as the lexer name lexer_alias = aliases[0] @@ -93,6 +95,9 @@ def highlight_code_with_pygments( # Get basename for matching (patterns are like "*.py") basename = os.path.basename(file_path).lower() + # Default to plain text lexer + lexer: Lexer = TextLexer() + try: # OPTIMIZATION: Try fast extension lookup first (O(1) dict lookup) lexer_alias = None @@ -107,18 +112,16 @@ def highlight_code_with_pygments( lexer_alias = lex_alias break - # Get lexer or use TextLexer as fallback + # Get lexer based on file extension # Note: stripall=False preserves leading whitespace (important for code indentation) if lexer_alias: - lexer = get_lexer_by_name(lexer_alias, stripall=False) # type: ignore[reportUnknownVariableType] - else: - lexer = TextLexer() # type: ignore[reportUnknownVariableType] + lexer = get_lexer_by_name(lexer_alias, stripall=False) except ClassNotFound: - # Fall back to plain text lexer - lexer = TextLexer() # type: ignore[reportUnknownVariableType] + # Fall back to plain text lexer (already set as default) + pass # Create formatter with line numbers in table format - formatter = HtmlFormatter( # type: ignore[reportUnknownVariableType] + formatter: Formatter = HtmlFormatter( linenos="table" if show_linenos else False, cssclass="highlight", wrapcode=True, @@ -127,7 +130,7 @@ def highlight_code_with_pygments( # Highlight the code with timing if enabled with timing_stat("_pygments_timings"): - return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType] + return str(highlight(code, lexer, formatter)) def truncate_highlighted_preview(highlighted_html: str, max_lines: int) -> str: diff --git a/claude_code_log/html/templates/components/page_nav_styles.css b/claude_code_log/html/templates/components/page_nav_styles.css new file mode 100644 index 00000000..dfa81398 --- /dev/null +++ b/claude_code_log/html/templates/components/page_nav_styles.css @@ -0,0 +1,79 @@ +/* Page navigation styles for paginated combined transcripts */ + +.page-navigation { + text-align: center; + margin-bottom: 20px; + padding: 15px; + background-color: #ffffff66; + border-radius: 8px; + box-shadow: -7px -7px 10px #eeeeee44, 7px 7px 10px #00000011; + border-left: #ffffff66 1px solid; + border-top: #ffffff66 1px solid; + border-bottom: #00000017 1px solid; + border-right: #00000017 1px solid; +} + +.page-header { + display: flex; + flex-direction: column; + align-items: center; + gap: 10px; +} + +.page-title { + font-size: 1.3em; + font-weight: 600; + color: var(--system-warning-color); +} + +.page-stats { + display: flex; + gap: 20px; + flex-wrap: wrap; + justify-content: center; + color: var(--text-muted); + font-size: 0.9em; +} + +.page-stats .stat { + display: flex; + align-items: center; + gap: 5px; +} + +.page-nav-links { + display: flex; + justify-content: center; + gap: 30px; + margin-top: 12px; + padding-top: 12px; + border-top: 1px solid var(--border-light); +} + +.page-nav-link { + text-decoration: none; + color: var(--system-warning-color); + font-weight: 500; + padding: 6px 16px; + border-radius: 4px; + background-color: var(--bg-hover); + transition: all 0.2s ease; +} + +.page-nav-link:hover { + background-color: var(--session-bg-dimmed); + transform: translateY(-1px); +} + +.page-nav-link.prev::before { + content: ''; +} + +.page-nav-link.next::after { + content: ''; +} + +/* Hide next link on last page (will be revealed via in-place editing when new page is created) */ +.page-nav-link.next.last-page { + display: none; +} diff --git a/claude_code_log/html/templates/components/project_card_styles.css b/claude_code_log/html/templates/components/project_card_styles.css index ebae9931..c6d40f2f 100644 --- a/claude_code_log/html/templates/components/project_card_styles.css +++ b/claude_code_log/html/templates/components/project_card_styles.css @@ -111,4 +111,28 @@ .project-sessions details[open] summary { margin-bottom: 10px; +} + +/* Archived project styling */ +.project-card.archived { + opacity: 0.6; + background-color: #f5f5f522; +} + +.project-card.archived:hover { + opacity: 0.8; +} + +.archived-badge { + display: inline-block; + background-color: #888; + color: white; + font-size: 0.65em; + font-weight: 600; + padding: 2px 8px; + border-radius: 4px; + margin-left: 10px; + vertical-align: middle; + text-transform: uppercase; + letter-spacing: 0.5px; } \ No newline at end of file diff --git a/claude_code_log/html/templates/index.html b/claude_code_log/html/templates/index.html index a539386a..4b2bf430 100644 --- a/claude_code_log/html/templates/index.html +++ b/claude_code_log/html/templates/index.html @@ -59,10 +59,14 @@
{escaped_value}"
)
except (TypeError, ValueError):
- escaped_value = escape_html(str(value)) # type: ignore[arg-type]
+ # Fallback: convert to string when JSON serialization fails
+ escaped_value = escape_html(str(cast(object, value)))
value_html = escaped_value
else:
# Simple value, render as-is (or collapsible if long)
diff --git a/claude_code_log/html/utils.py b/claude_code_log/html/utils.py
index 8822ab3e..613bab43 100644
--- a/claude_code_log/html/utils.py
+++ b/claude_code_log/html/utils.py
@@ -200,10 +200,12 @@ def escape_html(text: str) -> str:
def _create_pygments_plugin() -> Any:
"""Create a mistune plugin that uses Pygments for code block syntax highlighting."""
- from pygments import highlight # type: ignore[reportUnknownVariableType]
- from pygments.lexers import get_lexer_by_name, TextLexer # type: ignore[reportUnknownVariableType]
- from pygments.formatters import HtmlFormatter # type: ignore[reportUnknownVariableType]
- from pygments.util import ClassNotFound # type: ignore[reportUnknownVariableType]
+ from pygments import highlight
+ from pygments.lexer import Lexer
+ from pygments.lexers import get_lexer_by_name, TextLexer
+ from pygments.formatter import Formatter
+ from pygments.formatters import HtmlFormatter
+ from pygments.util import ClassNotFound
def plugin_pygments(md: Any) -> None:
"""Plugin to add Pygments syntax highlighting to code blocks."""
@@ -214,19 +216,21 @@ def block_code(code: str, info: Optional[str] = None) -> str:
if info:
# Language hint provided, use Pygments
lang = info.split()[0] if info else ""
+ # Default to plain text lexer
+ lexer: Lexer = TextLexer()
try:
- lexer = get_lexer_by_name(lang, stripall=False) # type: ignore[reportUnknownVariableType]
+ lexer = get_lexer_by_name(lang, stripall=False)
except ClassNotFound:
- lexer = TextLexer() # type: ignore[reportUnknownVariableType]
+ pass # Already have default
- formatter = HtmlFormatter( # type: ignore[reportUnknownVariableType]
+ formatter: Formatter = HtmlFormatter(
linenos=False, # No line numbers in markdown code blocks
cssclass="highlight",
wrapcode=True,
)
# Track Pygments timing if enabled
with timing_stat("_pygments_timings"):
- return str(highlight(code, lexer, formatter)) # type: ignore[reportUnknownArgumentType]
+ return str(highlight(code, lexer, formatter))
else:
# No language hint, use default rendering
return original_render(code, info)
@@ -438,5 +442,7 @@ def get_template_environment() -> Environment:
autoescape=select_autoescape(["html", "xml"]),
)
# Add custom filters/functions
- env.globals["starts_with_emoji"] = starts_with_emoji # type: ignore[index]
+ # Cast to Any to bypass Jinja2's overly strict globals type
+ globals_dict: Any = env.globals
+ globals_dict["starts_with_emoji"] = starts_with_emoji
return env
diff --git a/claude_code_log/migrations/001_initial_schema.sql b/claude_code_log/migrations/001_initial_schema.sql
new file mode 100644
index 00000000..b90a6d6c
--- /dev/null
+++ b/claude_code_log/migrations/001_initial_schema.sql
@@ -0,0 +1,114 @@
+-- Initial schema for SQLite cache
+-- Migration: 001
+-- Description: Creates all tables and indexes for the cache system
+
+-- Project metadata
+CREATE TABLE IF NOT EXISTS projects (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ project_path TEXT UNIQUE NOT NULL,
+ version TEXT NOT NULL,
+ cache_created TEXT NOT NULL,
+ last_updated TEXT NOT NULL,
+ total_message_count INTEGER DEFAULT 0,
+ total_input_tokens INTEGER DEFAULT 0,
+ total_output_tokens INTEGER DEFAULT 0,
+ total_cache_creation_tokens INTEGER DEFAULT 0,
+ total_cache_read_tokens INTEGER DEFAULT 0,
+ earliest_timestamp TEXT DEFAULT '',
+ latest_timestamp TEXT DEFAULT ''
+);
+
+CREATE INDEX IF NOT EXISTS idx_projects_path ON projects(project_path);
+
+-- File tracking for invalidation
+CREATE TABLE IF NOT EXISTS cached_files (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ project_id INTEGER NOT NULL,
+ file_name TEXT NOT NULL,
+ file_path TEXT NOT NULL,
+ source_mtime REAL NOT NULL,
+ cached_mtime REAL NOT NULL,
+ message_count INTEGER DEFAULT 0,
+ FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+ UNIQUE(project_id, file_name)
+);
+
+CREATE INDEX IF NOT EXISTS idx_cached_files_project ON cached_files(project_id);
+CREATE INDEX IF NOT EXISTS idx_cached_files_name ON cached_files(file_name);
+
+-- Session aggregates
+CREATE TABLE IF NOT EXISTS sessions (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ project_id INTEGER NOT NULL,
+ session_id TEXT NOT NULL,
+ summary TEXT,
+ first_timestamp TEXT NOT NULL DEFAULT '',
+ last_timestamp TEXT NOT NULL DEFAULT '',
+ message_count INTEGER DEFAULT 0,
+ first_user_message TEXT DEFAULT '',
+ cwd TEXT,
+ total_input_tokens INTEGER DEFAULT 0,
+ total_output_tokens INTEGER DEFAULT 0,
+ total_cache_creation_tokens INTEGER DEFAULT 0,
+ total_cache_read_tokens INTEGER DEFAULT 0,
+ FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+ UNIQUE(project_id, session_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project_id);
+CREATE INDEX IF NOT EXISTS idx_sessions_session_id ON sessions(session_id);
+CREATE INDEX IF NOT EXISTS idx_sessions_first_timestamp ON sessions(first_timestamp);
+CREATE INDEX IF NOT EXISTS idx_sessions_cwd ON sessions(cwd);
+
+-- Fully normalised messages
+CREATE TABLE IF NOT EXISTS messages (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ project_id INTEGER NOT NULL,
+ file_id INTEGER NOT NULL,
+
+ -- Core fields
+ type TEXT NOT NULL,
+ timestamp TEXT,
+ session_id TEXT,
+
+ -- BaseTranscriptEntry fields (prefixed)
+ _uuid TEXT,
+ _parent_uuid TEXT,
+ _is_sidechain INTEGER DEFAULT 0,
+ _user_type TEXT,
+ _cwd TEXT,
+ _version TEXT,
+ _is_meta INTEGER,
+ _agent_id TEXT,
+
+ -- AssistantTranscriptEntry
+ _request_id TEXT,
+
+ -- Flattened usage tokens
+ input_tokens INTEGER,
+ output_tokens INTEGER,
+ cache_creation_tokens INTEGER,
+ cache_read_tokens INTEGER,
+
+ -- SummaryTranscriptEntry
+ _leaf_uuid TEXT,
+
+ -- SystemTranscriptEntry
+ _level TEXT,
+
+ -- QueueOperationTranscriptEntry
+ _operation TEXT,
+
+ -- Message content as compressed JSON (zlib)
+ content BLOB NOT NULL,
+
+ FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+ FOREIGN KEY (file_id) REFERENCES cached_files(id) ON DELETE CASCADE
+);
+
+-- Indexes for performance
+CREATE INDEX IF NOT EXISTS idx_messages_timestamp ON messages(timestamp);
+CREATE INDEX IF NOT EXISTS idx_messages_project_timestamp ON messages(project_id, timestamp);
+CREATE INDEX IF NOT EXISTS idx_messages_file ON messages(file_id);
+CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id);
+CREATE INDEX IF NOT EXISTS idx_messages_uuid ON messages(_uuid);
diff --git a/claude_code_log/migrations/002_html_cache.sql b/claude_code_log/migrations/002_html_cache.sql
new file mode 100644
index 00000000..00db64ec
--- /dev/null
+++ b/claude_code_log/migrations/002_html_cache.sql
@@ -0,0 +1,18 @@
+-- HTML cache for incremental regeneration
+-- Migration: 002
+-- Description: Tracks when HTML files were generated to enable incremental regeneration
+
+CREATE TABLE IF NOT EXISTS html_cache (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ project_id INTEGER NOT NULL,
+ html_path TEXT NOT NULL, -- e.g., "session-abc123.html" or "combined_transcripts.html"
+ generated_at TEXT NOT NULL, -- ISO timestamp when HTML was generated
+ source_session_id TEXT, -- session_id for individual files, NULL for combined
+ message_count INTEGER, -- for sanity checking
+ library_version TEXT NOT NULL, -- which version generated it
+ FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+ UNIQUE(project_id, html_path)
+);
+
+CREATE INDEX IF NOT EXISTS idx_html_cache_project ON html_cache(project_id);
+CREATE INDEX IF NOT EXISTS idx_html_cache_session ON html_cache(source_session_id);
diff --git a/claude_code_log/migrations/003_html_pagination.sql b/claude_code_log/migrations/003_html_pagination.sql
new file mode 100644
index 00000000..61bd6f9c
--- /dev/null
+++ b/claude_code_log/migrations/003_html_pagination.sql
@@ -0,0 +1,39 @@
+-- HTML pagination for combined transcripts
+-- Migration: 003
+-- Description: Tracks page assignments for paginated combined transcript HTML files
+
+-- Pages table: tracks each generated page file
+CREATE TABLE IF NOT EXISTS html_pages (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ project_id INTEGER NOT NULL,
+ page_number INTEGER NOT NULL,
+ html_path TEXT NOT NULL, -- e.g., "combined_transcripts.html" or "combined_transcripts_2.html"
+ page_size_config INTEGER NOT NULL, -- the --page-size value used
+ message_count INTEGER NOT NULL, -- total messages on this page
+ first_session_id TEXT NOT NULL,
+ last_session_id TEXT NOT NULL,
+ first_timestamp TEXT,
+ last_timestamp TEXT,
+ total_input_tokens INTEGER DEFAULT 0,
+ total_output_tokens INTEGER DEFAULT 0,
+ total_cache_creation_tokens INTEGER DEFAULT 0,
+ total_cache_read_tokens INTEGER DEFAULT 0,
+ generated_at TEXT NOT NULL, -- ISO timestamp when page was generated
+ library_version TEXT NOT NULL,
+ FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+ UNIQUE(project_id, page_number)
+);
+
+-- Page-session mapping: tracks which sessions are on which page
+CREATE TABLE IF NOT EXISTS page_sessions (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ page_id INTEGER NOT NULL,
+ session_id TEXT NOT NULL,
+ session_order INTEGER NOT NULL, -- order of session within the page
+ FOREIGN KEY (page_id) REFERENCES html_pages(id) ON DELETE CASCADE,
+ UNIQUE(page_id, session_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_html_pages_project ON html_pages(project_id);
+CREATE INDEX IF NOT EXISTS idx_page_sessions_page ON page_sessions(page_id);
+CREATE INDEX IF NOT EXISTS idx_page_sessions_session ON page_sessions(session_id);
diff --git a/claude_code_log/migrations/__init__.py b/claude_code_log/migrations/__init__.py
new file mode 100644
index 00000000..db9bb5bb
--- /dev/null
+++ b/claude_code_log/migrations/__init__.py
@@ -0,0 +1,5 @@
+"""Database migrations for Claude Code Log cache."""
+
+from .runner import run_migrations
+
+__all__ = ["run_migrations"]
diff --git a/claude_code_log/migrations/runner.py b/claude_code_log/migrations/runner.py
new file mode 100644
index 00000000..40e2b29d
--- /dev/null
+++ b/claude_code_log/migrations/runner.py
@@ -0,0 +1,163 @@
+"""Migration runner for SQLite cache database."""
+
+import hashlib
+import re
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+from typing import List, Tuple
+
+
+def _get_migrations_dir() -> Path:
+ """Get the migrations directory path."""
+ return Path(__file__).parent
+
+
+def _compute_checksum(content: str) -> str:
+ """Compute SHA256 checksum of migration content."""
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
+
+
+def _parse_migration_number(filename: str) -> int:
+ """Extract migration number from filename (e.g., '001_initial.sql' -> 1)."""
+ match = re.match(r"^(\d+)_", filename)
+ if match:
+ return int(match.group(1))
+ raise ValueError(f"Invalid migration filename: {filename}")
+
+
+def _ensure_schema_version_table(conn: sqlite3.Connection) -> None:
+ """Create _schema_version table if it doesn't exist or upgrade from old format."""
+ # Check if table exists
+ row = conn.execute(
+ "SELECT name FROM sqlite_master WHERE type='table' AND name='_schema_version'"
+ ).fetchone()
+
+ if row:
+ # Check if it has the new schema (with checksum column)
+ columns = conn.execute("PRAGMA table_info(_schema_version)").fetchall()
+ column_names = {col[1] for col in columns}
+
+ if "checksum" not in column_names:
+ # Old format table - drop it and recreate
+ # This triggers a fresh start as per migration plan
+ conn.execute("DROP TABLE _schema_version")
+ conn.commit()
+
+ # Create table with new schema
+ conn.execute("""
+ CREATE TABLE IF NOT EXISTS _schema_version (
+ version INTEGER PRIMARY KEY,
+ filename TEXT NOT NULL,
+ applied_at TEXT NOT NULL,
+ checksum TEXT NOT NULL
+ )
+ """)
+ conn.commit()
+
+
+def get_applied_migrations(conn: sqlite3.Connection) -> List[Tuple[int, str]]:
+ """Get list of applied migrations as (version, checksum) tuples."""
+ _ensure_schema_version_table(conn)
+ rows = conn.execute(
+ "SELECT version, checksum FROM _schema_version ORDER BY version"
+ ).fetchall()
+ return [(row[0], row[1]) for row in rows]
+
+
+def get_available_migrations() -> List[Tuple[int, Path]]:
+ """Get list of available migration files as (version, path) tuples."""
+ migrations_dir = _get_migrations_dir()
+ sql_files = sorted(migrations_dir.glob("*.sql"))
+
+ migrations: List[Tuple[int, Path]] = []
+ for sql_file in sql_files:
+ try:
+ version = _parse_migration_number(sql_file.name)
+ migrations.append((version, sql_file))
+ except ValueError:
+ # Skip files that don't match the naming convention
+ continue
+
+ return migrations
+
+
+def get_pending_migrations(conn: sqlite3.Connection) -> List[Tuple[int, Path]]:
+ """Get list of migrations that haven't been applied yet."""
+ applied = {v for v, _ in get_applied_migrations(conn)}
+ available = get_available_migrations()
+ return [(v, p) for v, p in available if v not in applied]
+
+
+def apply_migration(
+ conn: sqlite3.Connection, version: int, migration_path: Path
+) -> None:
+ """Apply a single migration and record it in _schema_version."""
+ content = migration_path.read_text(encoding="utf-8")
+ checksum = _compute_checksum(content)
+
+ # Execute the migration SQL
+ conn.executescript(content)
+
+ # Record the migration
+ conn.execute(
+ """
+ INSERT INTO _schema_version (version, filename, applied_at, checksum)
+ VALUES (?, ?, ?, ?)
+ """,
+ (version, migration_path.name, datetime.now().isoformat(), checksum),
+ )
+ conn.commit()
+
+
+def verify_migrations(conn: sqlite3.Connection) -> List[str]:
+ """Verify applied migrations match their checksums.
+
+ Returns list of warnings for any mismatches.
+ """
+ warnings: List[str] = []
+ applied = get_applied_migrations(conn)
+ available = {v: p for v, p in get_available_migrations()}
+
+ for version, stored_checksum in applied:
+ if version in available:
+ current_content = available[version].read_text(encoding="utf-8")
+ current_checksum = _compute_checksum(current_content)
+ if current_checksum != stored_checksum:
+ warnings.append(
+ f"Migration {version} ({available[version].name}) has been modified "
+ f"since it was applied. This may indicate database inconsistency."
+ )
+
+ return warnings
+
+
+def run_migrations(db_path: Path) -> int:
+ """Apply all pending migrations to the database.
+
+ Args:
+ db_path: Path to the SQLite database file
+
+ Returns:
+ Number of migrations applied
+ """
+ conn = sqlite3.connect(db_path, timeout=30.0)
+ conn.execute("PRAGMA foreign_keys = ON")
+
+ try:
+ _ensure_schema_version_table(conn)
+ pending = get_pending_migrations(conn)
+
+ for version, migration_path in sorted(pending):
+ apply_migration(conn, version, migration_path)
+
+ return len(pending)
+ finally:
+ conn.close()
+
+
+def get_current_version(conn: sqlite3.Connection) -> int:
+ """Get the current schema version (highest applied migration number)."""
+ _ensure_schema_version_table(conn)
+ row = conn.execute("SELECT MAX(version) FROM _schema_version").fetchone()
+ return row[0] if row[0] is not None else 0
diff --git a/claude_code_log/renderer.py b/claude_code_log/renderer.py
index 78e9c5f7..e54594a2 100644
--- a/claude_code_log/renderer.py
+++ b/claude_code_log/renderer.py
@@ -1531,7 +1531,7 @@ def _filter_messages(messages: list[TranscriptEntry]) -> list[TranscriptEntry]:
content = message.content
message_content = content if isinstance(content, list) else []
else:
- message_content = message.message.content # type: ignore[union-attr]
+ message_content = message.message.content
text_content = extract_text_content(message_content)
@@ -1735,16 +1735,16 @@ def _render_messages(
)
effective_type = "user"
else:
- message_content = message.message.content # type: ignore
+ message_content = message.message.content
meta = create_meta(message)
effective_type = message_type
# Chunk content: regular items (text/image) accumulate, special items (tool/thinking) separate
if isinstance(message_content, list):
- chunks = chunk_message_content(message_content) # type: ignore[arg-type]
+ chunks = chunk_message_content(message_content)
else:
# String content - wrap in list with single TextContent
- content_str: str = message_content.strip() if message_content else "" # type: ignore[union-attr]
+ content_str: str = message_content.strip() if message_content else ""
if content_str:
chunks: list[ContentChunk] = [
[TextContent(type="text", text=content_str)] # pyright: ignore[reportUnknownArgumentType]
@@ -2303,3 +2303,18 @@ def get_renderer(format: str, image_export_mode: Optional[str] = None) -> Render
mode = image_export_mode or "referenced"
return MarkdownRenderer(image_export_mode=mode)
raise ValueError(f"Unsupported format: {format}")
+
+
+def is_html_outdated(html_file_path: Path) -> bool:
+ """Check if an HTML file is outdated based on its version comment.
+
+ This is a convenience function that uses the HtmlRenderer's is_outdated method.
+
+ Returns:
+ True if the file should be regenerated (missing version, different version, or file doesn't exist).
+ False if the file is current.
+ """
+ from .html.renderer import HtmlRenderer
+
+ renderer = HtmlRenderer()
+ return renderer.is_outdated(html_file_path)
diff --git a/claude_code_log/tui.py b/claude_code_log/tui.py
index 760dd3d3..1eb1e233 100644
--- a/claude_code_log/tui.py
+++ b/claude_code_log/tui.py
@@ -4,8 +4,8 @@
import os
import webbrowser
from datetime import datetime
-from pathlib import Path
-from typing import Any, ClassVar, Optional, cast
+from pathlib import Path, PurePath
+from typing import Any, ClassVar, List, Optional, cast
from textual.app import App, ComposeResult
from textual.binding import Binding, BindingType
@@ -41,7 +41,7 @@ class ProjectSelector(App[Path]):
border: solid $primary;
margin-bottom: 1;
}
-
+
DataTable {
height: auto;
}
@@ -50,7 +50,11 @@ class ProjectSelector(App[Path]):
TITLE = "Claude Code Log - Project Selector"
BINDINGS: ClassVar[list[BindingType]] = [
Binding("q", "quit", "Quit"),
+ Binding("escape", "quit", "Quit", show=False),
Binding("s", "select_project", "Select Project"),
+ Binding("a", "archive_project", "Archive Project"),
+ Binding("d", "delete_project", "Delete Project"),
+ Binding("r", "restore_project", "Restore Project"),
]
selected_project_path: reactive[Optional[Path]] = reactive(
@@ -58,13 +62,20 @@ class ProjectSelector(App[Path]):
)
projects: list[Path]
matching_projects: list[Path]
-
- def __init__(self, projects: list[Path], matching_projects: list[Path]):
+ archived_projects: set[Path]
+
+ def __init__(
+ self,
+ projects: list[Path],
+ matching_projects: list[Path],
+ archived_projects: Optional[set[Path]] = None,
+ ):
"""Initialize the project selector."""
super().__init__()
self.theme = "gruvbox"
self.projects = projects
self.matching_projects = matching_projects
+ self.archived_projects = archived_projects or set()
def compose(self) -> ComposeResult:
"""Create the UI layout."""
@@ -100,23 +111,26 @@ def populate_table(self) -> None:
table.clear(columns=True)
# Add columns
- table.add_column("Project", width=self.size.width - 13)
+ table.add_column("Project", width=max(20, self.size.width - 13))
table.add_column("Sessions", width=10)
# Add rows
for project_path in self.projects:
+ is_archived = project_path in self.archived_projects
try:
cache_manager = CacheManager(project_path, get_library_version())
project_cache = cache_manager.get_cached_project_data()
if not project_cache or not project_cache.sessions:
- try:
- ensure_fresh_cache(project_path, cache_manager, silent=True)
- # Reload cache after ensuring it's fresh
- project_cache = cache_manager.get_cached_project_data()
- except Exception:
- # If cache building fails, continue with empty cache
- project_cache = None
+ if not is_archived:
+ # Only try to build cache for non-archived projects
+ try:
+ ensure_fresh_cache(project_path, cache_manager, silent=True)
+ # Reload cache after ensuring it's fresh
+ project_cache = cache_manager.get_cached_project_data()
+ except Exception:
+ # If cache building fails, continue with empty cache
+ project_cache = None
# Get project info
session_count = (
@@ -132,6 +146,10 @@ def populate_table(self) -> None:
if project_path in self.matching_projects:
project_display = f"→ {project_display[2:]}"
+ # Add archived indicator
+ if is_archived:
+ project_display = f"{project_display} [ARCHIVED]"
+
table.add_row(
project_display,
str(session_count),
@@ -141,6 +159,8 @@ def populate_table(self) -> None:
project_display = f" {project_path.name}"
if project_path in self.matching_projects:
project_display = f"→ {project_display[2:]}"
+ if is_archived:
+ project_display = f"{project_display} [ARCHIVED]"
table.add_row(
project_display,
@@ -151,6 +171,10 @@ def on_data_table_row_highlighted(self, _event: DataTable.RowHighlighted) -> Non
"""Handle row highlighting (cursor movement) in the projects table."""
self._update_selected_project_from_cursor()
+ def on_data_table_row_selected(self, _event: DataTable.RowSelected) -> None:
+ """Handle row selection (Enter key) in the projects table."""
+ self.action_select_project()
+
def _update_selected_project_from_cursor(self) -> None:
"""Update the selected project based on the current cursor position."""
try:
@@ -164,6 +188,10 @@ def _update_selected_project_from_cursor(self) -> None:
if project_display.startswith("→"):
project_display = project_display[1:].strip()
+ # Remove the archived indicator if present
+ if project_display.endswith(" [ARCHIVED]"):
+ project_display = project_display[:-11].strip()
+
# Find the matching project path
for project_path in self.projects:
if project_path.name == project_display:
@@ -186,10 +214,303 @@ async def action_quit(self) -> None:
"""Quit the application with proper cleanup."""
self.exit(None)
+ def _get_project_session_count(self, project_path: Path) -> int:
+ """Get the number of sessions in a project from cache."""
+ try:
+ cache_manager = CacheManager(project_path, get_library_version())
+ project_cache = cache_manager.get_cached_project_data()
+ if project_cache and project_cache.sessions:
+ return len(project_cache.sessions)
+ except Exception:
+ pass
+ return 0
+
+ def _is_project_archived(self, project_path: Path) -> bool:
+ """Check if a project is archived (no JSONL files exist)."""
+ return project_path in self.archived_projects
+
+ def check_action(
+ self,
+ action: str,
+ parameters: tuple[object, ...], # noqa: ARG002
+ ) -> bool | None:
+ """Control which actions are available based on context."""
+ project_path = self.selected_project_path
+ is_archived = project_path in self.archived_projects if project_path else False
+
+ if action == "archive_project":
+ # Can only archive non-archived projects
+ return project_path is not None and not is_archived
+ elif action == "restore_project":
+ # Can only restore archived projects
+ return project_path is not None and is_archived
+ elif action == "delete_project":
+ # Can delete any project
+ return project_path is not None
+
+ # Allow all other actions (quit, select_project, etc.)
+ return True
+
+ def action_archive_project(self) -> None:
+ """Archive all sessions in the selected project."""
+ if not self.selected_project_path:
+ self.notify("No project selected", severity="warning")
+ return
+
+ if self._is_project_archived(self.selected_project_path):
+ self.notify("Project is already archived", severity="warning")
+ return
+
+ session_count = self._get_project_session_count(self.selected_project_path)
+ self.push_screen(
+ ArchiveProjectConfirmScreen(self.selected_project_path.name, session_count),
+ self._handle_archive_project_confirm,
+ )
+
+ def _handle_archive_project_confirm(self, confirmed: bool | None) -> None:
+ """Handle the result of the archive project confirmation dialog."""
+ if not confirmed or not self.selected_project_path:
+ return
+
+ project_path = self.selected_project_path
+
+ # Collect all JSONL files first
+ jsonl_files = list(project_path.glob("*.jsonl"))
+ if not jsonl_files:
+ self.notify("No sessions to archive", severity="warning")
+ return
+
+ # Track successes and failures
+ succeeded: list[str] = []
+ failed: list[tuple[str, str]] = [] # (filename, error message)
+
+ # Delete all JSONL files in the project
+ for jsonl_file in jsonl_files:
+ try:
+ jsonl_file.unlink()
+ succeeded.append(jsonl_file.name)
+ except Exception as e:
+ failed.append((jsonl_file.name, str(e)))
+
+ # Report results clearly
+ total = len(jsonl_files)
+ if failed:
+ # Show detailed failure information
+ failed_names = ", ".join(f[0] for f in failed[:3])
+ if len(failed) > 3:
+ failed_names += f" and {len(failed) - 3} more"
+ self.notify(
+ f"Archive incomplete: {len(succeeded)}/{total} sessions deleted. "
+ f"Failed: {failed_names}",
+ severity="error",
+ )
+ else:
+ self.notify(f"Archived {len(succeeded)} sessions")
+
+ # Only mark as fully archived if ALL files were deleted
+ if not failed and succeeded:
+ self.archived_projects.add(project_path)
+
+ # Always refresh to show current state
+ if succeeded:
+ self.populate_table()
+
+ def action_delete_project(self) -> None:
+ """Delete the selected project from cache (and optionally JSONL files)."""
+ if not self.selected_project_path:
+ self.notify("No project selected", severity="warning")
+ return
+
+ is_archived = self._is_project_archived(self.selected_project_path)
+ session_count = self._get_project_session_count(self.selected_project_path)
+ self.push_screen(
+ DeleteProjectConfirmScreen(
+ self.selected_project_path.name, session_count, is_archived
+ ),
+ self._handle_delete_project_confirm,
+ )
+
+ def _handle_delete_project_confirm(self, result: Optional[str]) -> None:
+ """Handle the result of the delete project confirmation dialog."""
+ if not result or not self.selected_project_path:
+ return
+
+ project_path = self.selected_project_path
+
+ # Delete cache
+ cache_manager = CacheManager(project_path, get_library_version())
+ cache_manager.clear_cache()
+
+ # If deleting both, also delete JSONL files
+ file_delete_failed = False
+ if result == "both":
+ jsonl_files = list(project_path.glob("*.jsonl"))
+ if jsonl_files:
+ succeeded: list[str] = []
+ failed: list[tuple[str, str]] = []
+
+ for jsonl_file in jsonl_files:
+ try:
+ jsonl_file.unlink()
+ succeeded.append(jsonl_file.name)
+ except Exception as e:
+ failed.append((jsonl_file.name, str(e)))
+
+ if failed:
+ file_delete_failed = True
+ failed_names = ", ".join(f[0] for f in failed[:3])
+ if len(failed) > 3:
+ failed_names += f" and {len(failed) - 3} more"
+ self.notify(
+ f"Cache deleted but {len(failed)}/{len(jsonl_files)} "
+ f"session files failed to delete: {failed_names}",
+ severity="error",
+ )
+
+ # Remove from projects list
+ if project_path in self.projects:
+ self.projects.remove(project_path)
+ if project_path in self.matching_projects:
+ self.matching_projects.remove(project_path)
+ if project_path in self.archived_projects:
+ self.archived_projects.discard(project_path)
+
+ if not file_delete_failed:
+ self.notify(f"Deleted project: {project_path.name}")
+ self.selected_project_path = None
+ self.populate_table()
+
+ def action_restore_project(self) -> None:
+ """Restore all archived sessions in the selected project."""
+ if not self.selected_project_path:
+ self.notify("No project selected", severity="warning")
+ return
+
+ if not self._is_project_archived(self.selected_project_path):
+ self.notify("Project is not archived", severity="warning")
+ return
+
+ session_count = self._get_project_session_count(self.selected_project_path)
+ self.push_screen(
+ RestoreProjectConfirmScreen(self.selected_project_path.name, session_count),
+ self._handle_restore_project_confirm,
+ )
+
+ def _handle_restore_project_confirm(self, confirmed: bool | None) -> None:
+ """Handle the result of the restore project confirmation dialog."""
+ if not confirmed or not self.selected_project_path:
+ return
+
+ project_path = self.selected_project_path
+ cache_manager = CacheManager(project_path, get_library_version())
+ project_cache = cache_manager.get_cached_project_data()
+
+ if not project_cache or not project_cache.sessions:
+ self.notify("No sessions to restore", severity="warning")
+ return
+
+ # Ensure project directory exists
+ project_path.mkdir(parents=True, exist_ok=True)
+
+ # Identify sessions that need restoration (don't already exist as files)
+ sessions_to_restore = [
+ session_id
+ for session_id in project_cache.sessions
+ if not (project_path / f"{session_id}.jsonl").exists()
+ ]
+
+ if not sessions_to_restore:
+ self.notify("All sessions already exist as files", severity="warning")
+ return
+
+ # Track successes and failures
+ succeeded: list[str] = []
+ failed: list[tuple[str, str]] = [] # (session_id, error message)
+
+ for session_id in sessions_to_restore:
+ jsonl_path = project_path / f"{session_id}.jsonl"
+ try:
+ messages = cache_manager.export_session_to_jsonl(session_id)
+ if messages:
+ with open(jsonl_path, "w", encoding="utf-8") as f:
+ for msg in messages:
+ f.write(msg + "\n")
+ succeeded.append(session_id)
+ else:
+ failed.append((session_id, "No messages found in cache"))
+ except Exception as e:
+ failed.append((session_id, str(e)))
+
+ # Report results clearly
+ total = len(sessions_to_restore)
+ if failed:
+ # Show detailed failure information
+ failed_ids = ", ".join(f[0][:8] for f in failed[:3]) # Truncate UUIDs
+ if len(failed) > 3:
+ failed_ids += f" and {len(failed) - 3} more"
+ self.notify(
+ f"Restore incomplete: {len(succeeded)}/{total} sessions restored. "
+ f"Failed: {failed_ids}",
+ severity="error",
+ )
+ else:
+ self.notify(f"Restored {len(succeeded)} sessions")
+
+ # Only mark as fully restored if ALL sessions were restored
+ if not failed and succeeded:
+ self.archived_projects.discard(project_path)
+
+ # Always refresh to show current state
+ if succeeded:
+ self.populate_table()
+
+
+class SafeMarkdownViewer(MarkdownViewer):
+ """MarkdownViewer that handles link clicks safely.
+
+ Intercepts link clicks to prevent crashes from file/external links
+ while still allowing anchor navigation for ToC.
+ """
+
+ # Allow maximizing the viewer (screen will redirect children to this)
+ ALLOW_MAXIMIZE = True
+
+ def on_mount(self) -> None:
+ """Configure document for proper keyboard navigation."""
+ # Enable focus on the document so keys work after focus changes
+ self.document.can_focus = True
+
+ async def go(self, location: str | PurePath) -> None:
+ """Navigate to a new location - intercept non-anchor links.
+
+ Override parent's go() method to handle links appropriately:
+ - Anchor links (#section): allow default scrolling
+ - HTTP/HTTPS URLs: open in browser
+ - Relative file links: show warning (not supported)
+ """
+ location_str = str(location)
+
+ if location_str.startswith("#"):
+ # Anchor link - allow default scroll behaviour
+ await super().go(location)
+ elif location_str.startswith(("http://", "https://")):
+ # External URL - open in browser
+ webbrowser.open(location_str)
+ self.notify(f"Opening in browser: {location_str[:50]}...")
+ else:
+ # Relative file link - not supported in embedded viewer
+ self.notify(
+ "File links not supported in embedded viewer",
+ severity="warning",
+ )
+
class MarkdownViewerScreen(ModalScreen[None]):
"""Modal screen for viewing Markdown content with table of contents."""
+ # Character-based pagination - ~50KB per page for good scroll performance
+ PAGE_SIZE_CHARS = 50_000
+
CSS = """
MarkdownViewerScreen {
align: center middle;
@@ -220,6 +541,14 @@ class MarkdownViewerScreen(ModalScreen[None]):
max-width: 60;
}
+ #pagination-controls {
+ dock: top;
+ height: 1;
+ background: $warning;
+ color: $text;
+ text-align: center;
+ }
+
#md-footer {
dock: bottom;
height: 1;
@@ -232,20 +561,91 @@ class MarkdownViewerScreen(ModalScreen[None]):
BINDINGS: ClassVar[list[BindingType]] = [
Binding("escape", "dismiss", "Close", show=True),
Binding("q", "dismiss", "Close", show=False),
+ Binding("t", "toggle_toc", "Toggle ToC"),
+ Binding("tab", "switch_focus", "Switch focus", show=False, priority=True),
+ Binding("shift+tab", "switch_focus", "Switch focus", show=False, priority=True),
+ Binding("n", "next_page", "Next page"),
+ Binding("right", "next_page", "Next page", show=False),
+ Binding("p", "prev_page", "Prev page"),
+ Binding("left", "prev_page", "Prev page", show=False),
]
def __init__(self, content: str, title: str = "Markdown Viewer") -> None:
super().__init__()
self.md_content = content
self.md_title = title
+ self._pages = self._split_into_pages(content)
+ self._current_page = 0
+ self._is_paginated = len(self._pages) > 1
+
+ def _split_into_pages(self, content: str) -> list[str]:
+ """Split markdown content into pages by character count.
+
+ Splits at section boundaries (## ) when possible to avoid
+ cutting mid-section, but will split within sections if
+ a single section exceeds PAGE_SIZE_CHARS.
+ """
+ import re
+
+ if len(content) <= self.PAGE_SIZE_CHARS:
+ return [content]
+
+ pages: list[str] = []
+ current_page = ""
+
+ # Split by level 2 headings, keeping the delimiter
+ sections = re.split(r"(\n(?=## ))", content)
+
+ for section in sections:
+ if not section:
+ continue
+
+ # If adding this section exceeds page size
+ if len(current_page) + len(section) > self.PAGE_SIZE_CHARS:
+ # If current page has content, save it
+ if current_page.strip():
+ pages.append(current_page)
+ current_page = ""
+
+ # If section itself exceeds page size, split it by lines
+ if len(section) > self.PAGE_SIZE_CHARS:
+ lines = section.split("\n")
+ for line in lines:
+ if len(current_page) + len(line) + 1 > self.PAGE_SIZE_CHARS:
+ if current_page.strip():
+ pages.append(current_page)
+ current_page = line + "\n"
+ else:
+ current_page += line + "\n"
+ else:
+ current_page = section
+ else:
+ current_page += section
+
+ # Don't forget the last page
+ if current_page.strip():
+ pages.append(current_page)
+
+ return pages if pages else [content]
def compose(self) -> ComposeResult:
with Container(id="md-container"):
yield Static(self.md_title, id="md-header")
- yield MarkdownViewer(
- self.md_content, id="md-viewer", show_table_of_contents=True
+ if self._is_paginated:
+ yield Static(
+ f"Page {self._current_page + 1}/{len(self._pages)} | "
+ "← or p: prev | → or n: next",
+ id="pagination-controls",
+ )
+ yield SafeMarkdownViewer(
+ self._pages[self._current_page],
+ id="md-viewer",
+ show_table_of_contents=True,
)
- yield Static("Press ESC or q to close | t: toggle ToC", id="md-footer")
+ footer_text = "Press ESC or q to close | t: toggle ToC"
+ if self._is_paginated:
+ footer_text += " | n/p: navigate pages"
+ yield Static(footer_text, id="md-footer")
def on_mount(self) -> None:
"""Customize ToC tree after mount."""
@@ -308,6 +708,513 @@ def _clean_toc_labels(self, node: Any) -> None:
async def action_dismiss(self, result: None = None) -> None:
self.dismiss(result)
+ def action_maximize(self) -> None:
+ """Maximize the MarkdownViewer (not individual children)."""
+ try:
+ viewer = self.query_one("#md-viewer", SafeMarkdownViewer)
+ self.maximize(viewer)
+ except Exception:
+ pass
+
+ def on_key(self, event: Any) -> None:
+ """Intercept Tab keys to handle focus switching without scroll."""
+ if event.key in ("tab", "shift+tab"):
+ event.prevent_default()
+ event.stop()
+ self.action_switch_focus()
+
+ def _focus_viewer_content(self, viewer: SafeMarkdownViewer) -> None:
+ """Focus the viewer's document content without scrolling."""
+ # MarkdownViewer is a container; focus its document widget
+ try:
+ viewer.document.focus(scroll_visible=False)
+ except Exception:
+ viewer.focus(scroll_visible=False)
+
+ def action_toggle_toc(self) -> None:
+ """Toggle table of contents visibility, preserving scroll position."""
+ viewer = self.query_one("#md-viewer", SafeMarkdownViewer)
+ scroll_y = viewer.scroll_y
+ will_show_toc = not viewer.show_table_of_contents
+ viewer.show_table_of_contents = will_show_toc
+
+ def restore_and_focus() -> None:
+ viewer.scroll_to(y=scroll_y, animate=False)
+ if will_show_toc:
+ # Focus the Tree inside TOC when showing
+ try:
+ toc = viewer.table_of_contents
+ tree = cast("Tree[Any]", toc.query_one(Tree))
+ tree.focus(scroll_visible=False)
+ except Exception:
+ pass
+ else:
+ # Focus the document content when hiding TOC
+ self._focus_viewer_content(viewer)
+
+ self.call_later(restore_and_focus)
+
+ def action_switch_focus(self) -> None:
+ """Switch focus between TOC and content without scrolling."""
+ viewer = self.query_one("#md-viewer", SafeMarkdownViewer)
+ if not viewer.show_table_of_contents:
+ # TOC hidden, just focus the document
+ self._focus_viewer_content(viewer)
+ return
+
+ try:
+ toc = viewer.table_of_contents
+ # Get the Tree widget inside the TOC
+ tree = cast("Tree[Any]", toc.query_one(Tree))
+ if tree.has_focus:
+ # Currently in TOC tree, switch to document
+ self._focus_viewer_content(viewer)
+ else:
+ # Currently in document, switch to TOC tree
+ tree.focus(scroll_visible=False)
+ except Exception as e:
+ self.notify(f"Focus switch error: {e}", severity="warning")
+ self._focus_viewer_content(viewer)
+
+ def action_next_page(self) -> None:
+ """Navigate to next page (if paginated)."""
+ if not self._is_paginated:
+ return
+ if self._current_page < len(self._pages) - 1:
+ self._current_page += 1
+ if self.is_mounted:
+ self._update_viewer_content()
+
+ def action_prev_page(self) -> None:
+ """Navigate to previous page (if paginated)."""
+ if not self._is_paginated:
+ return
+ if self._current_page > 0:
+ self._current_page -= 1
+ if self.is_mounted:
+ self._update_viewer_content()
+
+ def _update_viewer_content(self) -> None:
+ """Update the markdown viewer with current page content."""
+ try:
+ # Update pagination controls
+ controls = self.query_one("#pagination-controls", Static)
+ controls.update(
+ f"Page {self._current_page + 1}/{len(self._pages)} | ← or p: prev | → or n: next"
+ )
+
+ # Update the markdown content directly
+ viewer = self.query_one("#md-viewer", SafeMarkdownViewer)
+ viewer.document.update(self._pages[self._current_page])
+
+ # Scroll to top of content
+ viewer.scroll_home(animate=False)
+
+ # Re-customize ToC after content loads
+ self.call_later(self._customize_toc_tree)
+ except Exception as e:
+ self.notify(f"Error updating page: {e}", severity="error")
+
+
+class ArchiveConfirmScreen(ModalScreen[bool]):
+ """Modal screen for confirming session archiving (delete JSONL, keep cache)."""
+
+ CSS = """
+ ArchiveConfirmScreen {
+ align: center middle;
+ }
+
+ #archive-container {
+ width: 65;
+ height: auto;
+ border: solid $warning;
+ background: $surface;
+ padding: 1 2;
+ }
+
+ #archive-title {
+ text-align: center;
+ text-style: bold;
+ color: $warning;
+ margin-bottom: 1;
+ }
+
+ #archive-message {
+ margin-bottom: 1;
+ }
+
+ #archive-info {
+ color: $text-muted;
+ margin-bottom: 1;
+ }
+
+ #archive-buttons {
+ text-align: center;
+ height: auto;
+ }
+ """
+
+ BINDINGS: ClassVar[list[BindingType]] = [
+ Binding("y", "confirm", "Yes"),
+ Binding("enter", "confirm", "Confirm", show=False),
+ Binding("n", "cancel", "No"),
+ Binding("escape", "cancel", "Cancel", show=False),
+ ]
+
+ def __init__(self, session_id: str) -> None:
+ super().__init__()
+ self.session_id = session_id
+
+ def compose(self) -> ComposeResult:
+ with Container(id="archive-container"):
+ yield Static("Archive Session", id="archive-title")
+ yield Static(
+ f"Session: {self.session_id[:8]}...",
+ id="archive-message",
+ )
+ yield Static(
+ "This will delete the JSONL file.\n"
+ "The session will be archived and can be restored from cache.",
+ id="archive-info",
+ )
+ yield Static("\\[Enter/y] Yes \\[Esc/n] No", id="archive-buttons")
+
+ def action_confirm(self) -> None:
+ self.dismiss(True)
+
+ def action_cancel(self) -> None:
+ self.dismiss(False)
+
+
+class DeleteConfirmScreen(ModalScreen[Optional[str]]):
+ """Modal screen for confirming session deletion with smart options."""
+
+ CSS = """
+ DeleteConfirmScreen {
+ align: center middle;
+ }
+
+ #delete-container {
+ width: 65;
+ height: auto;
+ border: solid $error;
+ background: $surface;
+ padding: 1 2;
+ }
+
+ #delete-title {
+ text-align: center;
+ text-style: bold;
+ color: $error;
+ margin-bottom: 1;
+ }
+
+ #delete-message {
+ margin-bottom: 1;
+ }
+
+ #delete-warning {
+ color: $warning;
+ margin-bottom: 1;
+ }
+
+ #delete-buttons {
+ text-align: center;
+ height: auto;
+ }
+ """
+
+ BINDINGS: ClassVar[list[BindingType]] = [
+ Binding("c", "delete_cache", "Cache only"),
+ Binding("b", "delete_both", "Both", show=False),
+ Binding("y", "delete_cache", "Yes", show=False),
+ Binding("enter", "delete_cache", "Confirm", show=False),
+ Binding("n", "cancel", "No"),
+ Binding("escape", "cancel", "Cancel", show=False),
+ ]
+
+ def __init__(self, session_id: str, is_archived: bool = False) -> None:
+ super().__init__()
+ self.session_id = session_id
+ self.is_archived = is_archived
+
+ def compose(self) -> ComposeResult:
+ with Container(id="delete-container"):
+ yield Static("Delete Session", id="delete-title")
+ yield Static(
+ f"Session: {self.session_id[:8]}...",
+ id="delete-message",
+ )
+ if self.is_archived:
+ yield Static(
+ "This is an archived session with no JSONL file.\n"
+ "Deletion is PERMANENT and cannot be undone!",
+ id="delete-warning",
+ )
+ yield Static(
+ "\\[Enter/y/c] Delete from cache \\[Esc/n] Cancel",
+ id="delete-buttons",
+ )
+ else:
+ yield Static(
+ "Choose what to delete:\n"
+ "• Cache only: JSONL file remains, session can be re-parsed\n"
+ "• Both: Delete JSONL file AND cache (permanent!)",
+ id="delete-warning",
+ )
+ yield Static(
+ "\\[c] Cache only \\[b] Both (permanent) \\[Esc/n] Cancel",
+ id="delete-buttons",
+ )
+
+ def action_delete_cache(self) -> None:
+ self.dismiss("cache_only")
+
+ def action_delete_both(self) -> None:
+ if not self.is_archived:
+ self.dismiss("both")
+
+ def action_cancel(self) -> None:
+ self.dismiss(None)
+
+
+class ArchiveProjectConfirmScreen(ModalScreen[bool]):
+ """Modal screen for confirming project archival."""
+
+ CSS = """
+ ArchiveProjectConfirmScreen {
+ align: center middle;
+ }
+
+ #archive-project-container {
+ width: 65;
+ height: auto;
+ border: solid $warning;
+ background: $surface;
+ padding: 1 2;
+ }
+
+ #archive-project-title {
+ text-align: center;
+ text-style: bold;
+ color: $warning;
+ margin-bottom: 1;
+ }
+
+ #archive-project-message {
+ margin-bottom: 1;
+ }
+
+ #archive-project-info {
+ color: $text-muted;
+ margin-bottom: 1;
+ }
+
+ #archive-project-buttons {
+ text-align: center;
+ height: auto;
+ }
+ """
+
+ BINDINGS: ClassVar[list[BindingType]] = [
+ Binding("y", "confirm", "Yes"),
+ Binding("enter", "confirm", "Confirm", show=False),
+ Binding("n", "cancel", "No"),
+ Binding("escape", "cancel", "Cancel", show=False),
+ ]
+
+ def __init__(self, project_name: str, session_count: int) -> None:
+ super().__init__()
+ self.project_name = project_name
+ self.session_count = session_count
+
+ def compose(self) -> ComposeResult:
+ with Container(id="archive-project-container"):
+ yield Static("Archive Project", id="archive-project-title")
+ yield Static(
+ f"Project: {self.project_name}\nSessions: {self.session_count}",
+ id="archive-project-message",
+ )
+ yield Static(
+ "This will delete ALL JSONL files in the project.\n"
+ "Sessions will be archived and can be restored from cache.",
+ id="archive-project-info",
+ )
+ yield Static("\\[Enter/y] Yes \\[Esc/n] No", id="archive-project-buttons")
+
+ def action_confirm(self) -> None:
+ self.dismiss(True)
+
+ def action_cancel(self) -> None:
+ self.dismiss(False)
+
+
+class DeleteProjectConfirmScreen(ModalScreen[Optional[str]]):
+ """Modal screen for confirming project deletion with smart options."""
+
+ CSS = """
+ DeleteProjectConfirmScreen {
+ align: center middle;
+ }
+
+ #delete-project-container {
+ width: 65;
+ height: auto;
+ border: solid $error;
+ background: $surface;
+ padding: 1 2;
+ }
+
+ #delete-project-title {
+ text-align: center;
+ text-style: bold;
+ color: $error;
+ margin-bottom: 1;
+ }
+
+ #delete-project-message {
+ margin-bottom: 1;
+ }
+
+ #delete-project-warning {
+ color: $warning;
+ margin-bottom: 1;
+ }
+
+ #delete-project-buttons {
+ text-align: center;
+ height: auto;
+ }
+ """
+
+ BINDINGS: ClassVar[list[BindingType]] = [
+ Binding("c", "delete_cache", "Cache only"),
+ Binding("b", "delete_both", "Both", show=False),
+ Binding("y", "delete_cache", "Yes", show=False),
+ Binding("enter", "delete_cache", "Confirm", show=False),
+ Binding("n", "cancel", "No"),
+ Binding("escape", "cancel", "Cancel", show=False),
+ ]
+
+ def __init__(
+ self, project_name: str, session_count: int, is_archived: bool = False
+ ) -> None:
+ super().__init__()
+ self.project_name = project_name
+ self.session_count = session_count
+ self.is_archived = is_archived
+
+ def compose(self) -> ComposeResult:
+ with Container(id="delete-project-container"):
+ yield Static("Delete Project", id="delete-project-title")
+ yield Static(
+ f"Project: {self.project_name}\nSessions: {self.session_count}",
+ id="delete-project-message",
+ )
+ if self.is_archived:
+ yield Static(
+ "This is an archived project with no JSONL files.\n"
+ "Deletion is PERMANENT and cannot be undone!",
+ id="delete-project-warning",
+ )
+ yield Static(
+ "\\[Enter/y/c] Delete from cache \\[Esc/n] Cancel",
+ id="delete-project-buttons",
+ )
+ else:
+ yield Static(
+ "Choose what to delete:\n"
+ "• Cache only: JSONL files remain, sessions can be re-parsed\n"
+ "• Both: Delete ALL JSONL files AND cache (permanent!)",
+ id="delete-project-warning",
+ )
+ yield Static(
+ "\\[c] Cache only \\[b] Both (permanent) \\[Esc/n] Cancel",
+ id="delete-project-buttons",
+ )
+
+ def action_delete_cache(self) -> None:
+ self.dismiss("cache_only")
+
+ def action_delete_both(self) -> None:
+ if not self.is_archived:
+ self.dismiss("both")
+
+ def action_cancel(self) -> None:
+ self.dismiss(None)
+
+
+class RestoreProjectConfirmScreen(ModalScreen[bool]):
+ """Modal screen for confirming project restoration."""
+
+ CSS = """
+ RestoreProjectConfirmScreen {
+ align: center middle;
+ }
+
+ #restore-project-container {
+ width: 65;
+ height: auto;
+ border: solid $success;
+ background: $surface;
+ padding: 1 2;
+ }
+
+ #restore-project-title {
+ text-align: center;
+ text-style: bold;
+ color: $success;
+ margin-bottom: 1;
+ }
+
+ #restore-project-message {
+ margin-bottom: 1;
+ }
+
+ #restore-project-info {
+ color: $text-muted;
+ margin-bottom: 1;
+ }
+
+ #restore-project-buttons {
+ text-align: center;
+ height: auto;
+ }
+ """
+
+ BINDINGS: ClassVar[list[BindingType]] = [
+ Binding("y", "confirm", "Yes"),
+ Binding("enter", "confirm", "Confirm", show=False),
+ Binding("n", "cancel", "No"),
+ Binding("escape", "cancel", "Cancel", show=False),
+ ]
+
+ def __init__(self, project_name: str, session_count: int) -> None:
+ super().__init__()
+ self.project_name = project_name
+ self.session_count = session_count
+
+ def compose(self) -> ComposeResult:
+ with Container(id="restore-project-container"):
+ yield Static("Restore Project", id="restore-project-title")
+ yield Static(
+ f"Project: {self.project_name}\n"
+ f"Archived sessions: {self.session_count}",
+ id="restore-project-message",
+ )
+ yield Static(
+ "This will restore ALL archived sessions by writing JSONL files.\n"
+ "The project directory will be created if it doesn't exist.",
+ id="restore-project-info",
+ )
+ yield Static("\\[Enter/y] Yes \\[Esc/n] No", id="restore-project-buttons")
+
+ def action_confirm(self) -> None:
+ self.dismiss(True)
+
+ def action_cancel(self) -> None:
+ self.dismiss(False)
+
class SessionBrowser(App[Optional[str]]):
"""Interactive TUI for browsing and managing Claude Code Log sessions."""
@@ -349,6 +1256,9 @@ class SessionBrowser(App[Optional[str]]):
TITLE = "Claude Code Log - Session Browser"
BINDINGS: ClassVar[list[BindingType]] = [
Binding("q", "quit", "Quit"),
+ Binding("escape", "back_to_projects", "Back", show=False),
+ Binding("enter", "export_selected", "Open HTML", show=False),
+ Binding("a", "archive_session", "Archive Session"),
Binding("h", "export_selected", "Open HTML page"),
Binding("m", "export_markdown", "Open Markdown"),
Binding("v", "view_markdown", "View Markdown"),
@@ -357,6 +1267,8 @@ class SessionBrowser(App[Optional[str]]):
Binding("M", "force_export_markdown", "Force Markdown", show=False),
Binding("V", "force_view_markdown", "Force View", show=False),
Binding("c", "resume_selected", "Resume in Claude Code"),
+ Binding("r", "restore_jsonl", "Restore JSONL"),
+ Binding("d", "delete_session", "Delete Session"),
Binding("e", "toggle_expanded", "Toggle Expanded View"),
Binding("p", "back_to_projects", "Open Project Selector"),
Binding("?", "toggle_help", "Help"),
@@ -367,14 +1279,17 @@ class SessionBrowser(App[Optional[str]]):
project_path: Path
cache_manager: CacheManager
sessions: dict[str, SessionCacheData]
+ archived_sessions: dict[str, SessionCacheData]
- def __init__(self, project_path: Path):
+ def __init__(self, project_path: Path, is_archived: bool = False):
"""Initialize the session browser with a project path."""
super().__init__()
self.theme = "gruvbox"
- self.project_path = project_path
- self.cache_manager = CacheManager(project_path, get_library_version())
+ self.project_path = project_path.resolve()
+ self.is_archived_project = is_archived
+ self.cache_manager = CacheManager(self.project_path, get_library_version())
self.sessions = {}
+ self.archived_sessions = {}
def compose(self) -> ComposeResult:
"""Create the UI layout."""
@@ -407,8 +1322,33 @@ def on_resize(self) -> None:
def load_sessions(self) -> None:
"""Load session information from cache or build cache if needed."""
+ # For archived projects, just load from cache (no JSONL files to check)
+ if self.is_archived_project:
+ project_cache = self.cache_manager.get_cached_project_data()
+ if project_cache and project_cache.sessions:
+ # All sessions are "archived" for fully archived projects
+ self.sessions = {}
+ self.archived_sessions = project_cache.sessions
+ else:
+ self.sessions = {}
+ self.archived_sessions = {}
+ # Update UI
+ try:
+ self.populate_table()
+ self.update_stats()
+ except Exception as e:
+ # UI components may not be mounted yet during initialization
+ self.log.debug(f"Skipped UI update for archived project: {e}")
+ return
+
# Check if we need to rebuild cache by checking for modified files
- jsonl_files = list(self.project_path.glob("*.jsonl"))
+ # Exclude agent files - they are loaded via session references
+ jsonl_files = [
+ f
+ for f in self.project_path.glob("*.jsonl")
+ if not f.name.startswith("agent-")
+ ]
+ valid_session_ids = {f.stem for f in jsonl_files}
modified_files = self.cache_manager.get_modified_files(jsonl_files)
# Get cached project data
@@ -430,17 +1370,36 @@ def load_sessions(self) -> None:
else:
self.sessions = {}
- except Exception:
- # Don't show notification during startup - just return
+ except Exception as e:
+ # Don't show notification during startup - log and return
+ self.log.debug(f"Cache building failed during startup: {e}")
return
+ # Only compute archived sessions if there are JSONL files to compare against
+ # (in test environments, there may be cached sessions but no JSONL files)
+ if valid_session_ids:
+ # Load archived sessions (cached but JSONL deleted)
+ self.archived_sessions = self.cache_manager.get_archived_sessions(
+ valid_session_ids
+ )
+
+ # Filter current sessions to only those with existing JSONL files
+ self.sessions = {
+ sid: data
+ for sid, data in self.sessions.items()
+ if sid in valid_session_ids
+ }
+ else:
+ # No JSONL files to compare - treat all sessions as current
+ self.archived_sessions = {}
+
# Only update UI if we're in app context
try:
self.populate_table()
self.update_stats()
- except Exception:
- # Not in app context, skip UI updates
- pass
+ except Exception as e:
+ # UI components may not be mounted yet during initialization
+ self.log.debug(f"Skipped UI update after session load: {e}")
def populate_table(self) -> None:
"""Populate the sessions table with session data."""
@@ -473,13 +1432,20 @@ def populate_table(self) -> None:
table.add_column("Messages", width=messages_width)
table.add_column("Tokens", width=tokens_width)
- # Sort sessions by start time (newest first)
+ # Combine current and archived sessions with archived flag
+ all_sessions: list[tuple[str, SessionCacheData, bool]] = []
+ for session_id, session_data in self.sessions.items():
+ all_sessions.append((session_id, session_data, False))
+ for session_id, session_data in self.archived_sessions.items():
+ all_sessions.append((session_id, session_data, True))
+
+ # Sort all sessions by start time (newest first)
sorted_sessions = sorted(
- self.sessions.items(), key=lambda x: x[1].first_timestamp, reverse=True
+ all_sessions, key=lambda x: x[1].first_timestamp, reverse=True
)
# Add rows
- for session_id, session_data in sorted_sessions:
+ for session_id, session_data, is_archived in sorted_sessions:
# Format timestamps - use short format for narrow terminals
use_short_format = terminal_width < 120
start_time = self.format_timestamp(
@@ -501,7 +1467,9 @@ def populate_table(self) -> None:
or session_data.first_user_message
or "No preview available"
)
- # Let Textual handle truncation based on column width
+ # Add [ARCHIVED] indicator for archived sessions
+ if is_archived:
+ preview = f"[ARCHIVED] {preview}"
table.add_row(
session_id[:8],
@@ -514,18 +1482,18 @@ def populate_table(self) -> None:
def update_stats(self) -> None:
"""Update the project statistics display."""
- total_sessions = len(self.sessions)
- total_messages = sum(s.message_count for s in self.sessions.values())
+ # Combine all sessions for stats
+ all_sessions = {**self.sessions, **self.archived_sessions}
+ total_sessions = len(all_sessions)
+ total_messages = sum(s.message_count for s in all_sessions.values())
total_tokens = sum(
- s.total_input_tokens + s.total_output_tokens for s in self.sessions.values()
+ s.total_input_tokens + s.total_output_tokens for s in all_sessions.values()
)
# Get project name using shared logic
- working_directories = None
+ working_directories: List[str] = []
try:
- project_cache = self.cache_manager.get_cached_project_data()
- if project_cache and project_cache.working_directories:
- working_directories = project_cache.working_directories
+ working_directories = self.cache_manager.get_working_directories()
except Exception:
# Fall back to directory name if cache fails
pass
@@ -535,16 +1503,14 @@ def update_stats(self) -> None:
)
# Find date range
- if self.sessions:
+ if all_sessions:
timestamps = [
- s.first_timestamp for s in self.sessions.values() if s.first_timestamp
+ s.first_timestamp for s in all_sessions.values() if s.first_timestamp
]
earliest = min(timestamps) if timestamps else ""
latest = (
- max(
- s.last_timestamp for s in self.sessions.values() if s.last_timestamp
- )
- if self.sessions
+ max(s.last_timestamp for s in all_sessions.values() if s.last_timestamp)
+ if all_sessions
else ""
)
@@ -562,8 +1528,17 @@ def update_stats(self) -> None:
# Create spaced layout: Project (left), Sessions info (center), Date range (right)
terminal_width = self.size.width
+ # Show archived count if any
+ archived_count = len(self.archived_sessions)
+ if archived_count > 0:
+ mode_indicator = f"({archived_count} archived)"
+ else:
+ mode_indicator = ""
+
# Project section (left aligned)
- project_section = f"[bold]Project:[/bold] {project_name}"
+ project_section = (
+ f"[bold]Project:[/bold] {project_name} {mode_indicator}".strip()
+ )
# Sessions info section (center)
sessions_section = f"[bold]Sessions:[/bold] {total_sessions:,} | [bold]Messages:[/bold] {total_messages:,} | [bold]Tokens:[/bold] {total_tokens:,}"
@@ -613,7 +1588,7 @@ def format_timestamp(
elif short_format:
return dt.strftime("%m-%d %H:%M")
else:
- return dt.strftime("%m-%d %H:%M")
+ return dt.strftime("%Y-%m-%d %H:%M")
except (ValueError, AttributeError):
return "Unknown"
@@ -633,11 +1608,15 @@ def _update_selected_session_from_cursor(self) -> None:
if row_data:
# Extract session ID from the first column (now just first 8 chars)
session_id_display = str(row_data[0])
- # Find the full session ID
+ # Find the full session ID in both dicts (current first, then archived)
for full_session_id in self.sessions.keys():
if full_session_id.startswith(session_id_display):
self.selected_session_id = full_session_id
- break
+ return
+ for full_session_id in self.archived_sessions.keys():
+ if full_session_id.startswith(session_id_display):
+ self.selected_session_id = full_session_id
+ return
except Exception:
# If widget not mounted yet or we can't get the row data, don't update selection
pass
@@ -766,14 +1745,17 @@ def _escape_rich_markup(self, text: str) -> str:
def _update_expanded_content(self) -> None:
"""Update the expanded content for the currently selected session."""
- if (
- not self.selected_session_id
- or self.selected_session_id not in self.sessions
- ):
+ if not self.selected_session_id:
+ return
+
+ # Get session data from either current or archived sessions
+ session_data = self.sessions.get(
+ self.selected_session_id
+ ) or self.archived_sessions.get(self.selected_session_id)
+ if not session_data:
return
expanded_content = self.query_one("#expanded-content", Static)
- session_data = self.sessions[self.selected_session_id]
# Build expanded content
content_parts: list[str] = []
@@ -843,16 +1825,24 @@ def _ensure_session_file(
if not needs_regeneration:
return session_file
- # Load messages from JSONL files
+ # Load messages - from cache for archived sessions, from JSONL otherwise
try:
- messages = load_directory_transcripts(
- self.project_path, self.cache_manager, silent=True
- )
+ is_archived = session_id in self.archived_sessions
+ if is_archived:
+ # Load from cache for archived sessions
+ messages = self.cache_manager.load_session_entries(session_id)
+ else:
+ # Load from JSONL files for current sessions
+ messages = load_directory_transcripts(
+ self.project_path, self.cache_manager, silent=True
+ )
if not messages:
return None
- # Build session title
- session_data = self.sessions.get(session_id)
+ # Build session title - check both dicts
+ session_data = self.sessions.get(session_id) or self.archived_sessions.get(
+ session_id
+ )
project_cache = self.cache_manager.get_cached_project_data()
project_name = get_project_display_name(
self.project_path.name,
@@ -886,9 +1876,12 @@ def _ensure_session_file(
def action_toggle_expanded(self) -> None:
"""Toggle the expanded view for the selected session."""
+ if not self.selected_session_id:
+ return
+ # Check if session exists in either current or archived sessions
if (
- not self.selected_session_id
- or self.selected_session_id not in self.sessions
+ self.selected_session_id not in self.sessions
+ and self.selected_session_id not in self.archived_sessions
):
return
@@ -911,8 +1904,12 @@ def action_toggle_help(self) -> None:
"Claude Code Log - Session Browser\n\n"
"Navigation:\n"
"- Use arrow keys to select sessions\n"
- "- Expanded content updates automatically when visible\n\n"
+ "- Expanded content updates automatically when visible\n"
+ "- [ARCHIVED] sessions have no JSONL file (cache only)\n\n"
"Actions:\n"
+ "- a: Archive session (delete JSONL, keep in cache)\n"
+ "- d: Delete session (with options)\n"
+ "- r: Restore archived session to JSONL\n"
"- e: Toggle expanded view for session\n"
"- h: Open selected session's HTML page\n"
"- m: Open selected session's Markdown file (in browser)\n"
@@ -923,6 +1920,181 @@ def action_toggle_help(self) -> None:
)
self.notify(help_text, timeout=10)
+ def check_action(self, action: str, parameters: tuple[object, ...]) -> bool | None:
+ """Conditionally enable/disable actions based on selected session type."""
+ if not self.selected_session_id:
+ return True # Allow action, it will handle missing selection
+
+ is_archived = self.selected_session_id in self.archived_sessions
+ is_current = self.selected_session_id in self.sessions
+
+ # Archive is only available for current sessions (has JSONL file)
+ if action == "archive_session" and not is_current:
+ return False
+ # Resume is only available for current sessions
+ if action == "resume_selected" and not is_current:
+ return False
+ # Restore is only available for archived sessions
+ if action == "restore_jsonl" and not is_archived:
+ return False
+ return True
+
+ def action_restore_jsonl(self) -> None:
+ """Restore the selected archived session to a JSONL file."""
+ if not self.selected_session_id:
+ self.notify("No session selected", severity="warning")
+ return
+
+ if self.selected_session_id not in self.archived_sessions:
+ self.notify(
+ "Selected session not found in archived sessions", severity="error"
+ )
+ return
+
+ try:
+ # Export messages from cache
+ messages = self.cache_manager.export_session_to_jsonl(
+ self.selected_session_id
+ )
+ if not messages:
+ self.notify("No messages found for session", severity="error")
+ return
+
+ # Ensure project directory exists (may have been deleted)
+ self.project_path.mkdir(parents=True, exist_ok=True)
+
+ # Write to JSONL file
+ output_path = self.project_path / f"{self.selected_session_id}.jsonl"
+ with open(output_path, "w", encoding="utf-8") as f:
+ for msg in messages:
+ f.write(msg + "\n")
+
+ self.notify(
+ f"Restored {len(messages)} messages to {output_path.name}",
+ severity="information",
+ )
+
+ # Refresh to show the restored session as current
+ self._refresh_after_restore()
+
+ except Exception as e:
+ self.notify(f"Error restoring session: {e}", severity="error")
+
+ def _refresh_after_restore(self) -> None:
+ """Refresh sessions after restoring an archived session."""
+ # If this was a fully archived project, it's no longer archived
+ # since we just restored a JSONL file
+ if self.is_archived_project:
+ self.is_archived_project = False
+
+ # Reload sessions - this will now detect the restored JSONL file
+ self.load_sessions()
+
+ self.notify(
+ "Session restored! It now appears as a current session.",
+ timeout=5,
+ )
+
+ def action_archive_session(self) -> None:
+ """Archive the selected session (delete JSONL file, keep in cache)."""
+ if not self.selected_session_id:
+ self.notify("No session selected", severity="warning")
+ return
+
+ # Archive only works for current sessions (those with JSONL files)
+ if self.selected_session_id not in self.sessions:
+ self.notify(
+ "Only current sessions can be archived (already archived or not found)",
+ severity="warning",
+ )
+ return
+
+ # Push archive confirmation screen
+ self.push_screen(
+ ArchiveConfirmScreen(session_id=self.selected_session_id),
+ callback=self._on_archive_confirm,
+ )
+
+ def _on_archive_confirm(self, confirmed: Optional[bool]) -> None:
+ """Handle archive confirmation result."""
+ if not confirmed or not self.selected_session_id:
+ return
+
+ try:
+ # Delete the JSONL file
+ jsonl_path = self.project_path / f"{self.selected_session_id}.jsonl"
+ if jsonl_path.exists():
+ jsonl_path.unlink()
+ self.notify(
+ f"Session {self.selected_session_id[:8]} archived",
+ severity="information",
+ )
+ # Reload sessions - this will move the session to archived
+ self.load_sessions()
+ else:
+ self.notify("JSONL file not found", severity="error")
+ except Exception as e:
+ self.notify(f"Error archiving session: {e}", severity="error")
+
+ def action_delete_session(self) -> None:
+ """Delete the selected session with smart options."""
+ if not self.selected_session_id:
+ self.notify("No session selected", severity="warning")
+ return
+
+ # Check if session exists in either current or archived sessions
+ if (
+ self.selected_session_id not in self.sessions
+ and self.selected_session_id not in self.archived_sessions
+ ):
+ self.notify("Selected session not found", severity="error")
+ return
+
+ # Determine if this is an archived session (no JSONL to fall back on)
+ is_archived_session = self.selected_session_id in self.archived_sessions
+
+ # Push confirmation screen
+ self.push_screen(
+ DeleteConfirmScreen(
+ session_id=self.selected_session_id,
+ is_archived=is_archived_session,
+ ),
+ callback=self._on_delete_confirm,
+ )
+
+ def _on_delete_confirm(self, delete_option: Optional[str]) -> None:
+ """Handle deletion confirmation result."""
+ if not delete_option or not self.selected_session_id:
+ return
+
+ try:
+ deleted_what: list[str] = []
+
+ # Delete JSONL file if requested
+ if delete_option == "both":
+ jsonl_path = self.project_path / f"{self.selected_session_id}.jsonl"
+ if jsonl_path.exists():
+ jsonl_path.unlink()
+ deleted_what.append("JSONL file")
+
+ # Delete from cache
+ success = self.cache_manager.delete_session(self.selected_session_id)
+ if success:
+ deleted_what.append("cache")
+
+ if deleted_what:
+ self.notify(
+ f"Session {self.selected_session_id[:8]} deleted ({', '.join(deleted_what)})",
+ severity="information",
+ )
+ # Clear selection and reload
+ self.selected_session_id = None
+ self.load_sessions()
+ else:
+ self.notify("Failed to delete session", severity="error")
+ except Exception as e:
+ self.notify(f"Error deleting session: {e}", severity="error")
+
def action_back_to_projects(self) -> None:
"""Navigate to the project selector."""
# Exit with a special return value to signal we want to go to project selector
@@ -934,14 +2106,16 @@ async def action_quit(self) -> None:
def run_project_selector(
- projects: list[Path], matching_projects: list[Path]
+ projects: list[Path],
+ matching_projects: list[Path],
+ archived_projects: Optional[set[Path]] = None,
) -> Optional[Path]:
"""Run the project selector TUI and return the selected project path."""
if not projects:
print("Error: No projects provided")
return None
- app = ProjectSelector(projects, matching_projects)
+ app = ProjectSelector(projects, matching_projects, archived_projects)
try:
return app.run()
except KeyboardInterrupt:
@@ -950,9 +2124,20 @@ def run_project_selector(
return None
-def run_session_browser(project_path: Path) -> Optional[str]:
+def run_session_browser(project_path: Path, is_archived: bool = False) -> Optional[str]:
"""Run the session browser TUI for the given project path."""
if not project_path.exists():
+ # For archived projects, the directory may not exist but cache may
+ if is_archived:
+ # Try to load from cache
+ try:
+ cache_manager = CacheManager(project_path, get_library_version())
+ project_cache = cache_manager.get_cached_project_data()
+ if project_cache and project_cache.sessions:
+ app = SessionBrowser(project_path, is_archived=True)
+ return app.run()
+ except Exception:
+ pass
print(f"Error: Project path {project_path} does not exist")
return None
@@ -963,10 +2148,20 @@ def run_session_browser(project_path: Path) -> Optional[str]:
# Check if there are any JSONL files
jsonl_files = list(project_path.glob("*.jsonl"))
if not jsonl_files:
+ # For archived projects, check if we have cached sessions
+ if is_archived:
+ try:
+ cache_manager = CacheManager(project_path, get_library_version())
+ project_cache = cache_manager.get_cached_project_data()
+ if project_cache and project_cache.sessions:
+ app = SessionBrowser(project_path, is_archived=True)
+ return app.run()
+ except Exception:
+ pass
print(f"Error: No JSONL transcript files found in {project_path}")
return None
- app = SessionBrowser(project_path)
+ app = SessionBrowser(project_path, is_archived=is_archived)
try:
return app.run()
except KeyboardInterrupt:
diff --git a/claude_code_log/utils.py b/claude_code_log/utils.py
index 9fe494c3..4a7241d0 100644
--- a/claude_code_log/utils.py
+++ b/claude_code_log/utils.py
@@ -4,9 +4,11 @@
import re
from datetime import datetime, timezone
from pathlib import Path
-from typing import Optional
+from typing import TYPE_CHECKING, Any, Optional
+
+if TYPE_CHECKING:
+ from .cache import SessionCacheData
-from claude_code_log.cache import SessionCacheData
from .models import ContentItem, TextContent, TranscriptEntry, UserTranscriptEntry
from .factories import (
IDE_DIAGNOSTICS_PATTERN,
@@ -54,6 +56,16 @@ def format_timestamp_range(first_timestamp: str, last_timestamp: str) -> str:
return ""
+def _is_temp_path(path_str: str) -> bool:
+ """Check if a path is a temporary/test path that should be filtered out."""
+ temp_patterns = [
+ "/private/var/folders/", # macOS temp
+ "/tmp/", # Unix temp
+ "/var/folders/", # macOS temp (alternate)
+ ]
+ return any(pattern in path_str for pattern in temp_patterns)
+
+
def get_project_display_name(
project_dir_name: str, working_directories: Optional[list[str]] = None
) -> str:
@@ -67,8 +79,18 @@ def get_project_display_name(
The project display name (e.g., "claude-code-log")
"""
if working_directories:
+ # Filter out temporary paths (pytest, macOS temp dirs, etc.)
+ real_dirs = [wd for wd in working_directories if not _is_temp_path(wd)]
+
+ # If all directories were filtered out, fall back to project_dir_name conversion
+ if not real_dirs:
+ display_name = project_dir_name
+ if display_name.startswith("-"):
+ display_name = display_name[1:].replace("-", "/")
+ return display_name
+
# Convert to Path objects with their original indices for tracking recency
- paths_with_indices = [(Path(wd), i) for i, wd in enumerate(working_directories)]
+ paths_with_indices = [(Path(wd), i) for i, wd in enumerate(real_dirs)]
# Sort by: 1) path depth (fewer parts = less nested), 2) recency (lower index = more recent)
# This gives us the least nested path, with ties broken by recency
@@ -167,18 +189,21 @@ def extract_text_content_length(content: list[ContentItem]) -> int:
def extract_working_directories(
- entries: list[TranscriptEntry] | list[SessionCacheData],
+ entries: "list[TranscriptEntry] | list[SessionCacheData] | list[Any]",
) -> list[str]:
"""Extract unique working directories from a list of entries.
Ordered by timestamp (most recent first).
Args:
- entries: List of entries to extract working directories from
+ entries: List of TranscriptEntry or SessionCacheData to extract working directories from
Returns:
List of unique working directory paths found in the entries
"""
+ # Import here to avoid circular dependency at runtime
+ from .cache import SessionCacheData
+
working_directories: dict[str, str] = {}
for entry in entries:
@@ -201,6 +226,9 @@ def extract_working_directories(
return [path for path, _ in sorted_dirs]
+# IDE tag patterns imported from factories for compact preview rendering
+
+
def _compact_ide_tags_for_preview(text_content: str) -> str:
"""Replace verbose IDE/system tags with compact emoji indicators for previews.
diff --git a/dev-docs/restoring-archived-sessions.md b/dev-docs/restoring-archived-sessions.md
new file mode 100644
index 00000000..38582deb
--- /dev/null
+++ b/dev-docs/restoring-archived-sessions.md
@@ -0,0 +1,100 @@
+# Restoring Archived Sessions
+
+When you run `claude-code-log`, you may see output like:
+
+```sh
+project-name: cached, 3 archived (0.0s)
+```
+
+This indicates that 3 sessions exist in the cache whose source JSONL files have been deleted.
+
+## What Are Archived Sessions?
+
+Archived sessions are sessions preserved in the SQLite cache (`~/.claude/projects/cache.db`) even after their source JSONL files have been deleted. This happens when:
+
+1. Claude Code automatically deletes old JSONL files based on the `cleanupPeriodDays` setting
+2. You manually delete JSONL files from `~/.claude/projects/*/`
+
+The cache stores the complete message data, so full restoration is possible.
+
+## Preventing Automatic Deletion
+
+Claude Code automatically deletes session logs after 30 days by default. To change this, add `cleanupPeriodDays` to your `~/.claude/settings.json`:
+
+```json
+{
+ "cleanupPeriodDays": 99999
+}
+```
+
+This effectively disables automatic cleanup (274 years). You can also set it to a specific number of days.
+
+See Claude Code's [settings documentation](https://docs.anthropic.com/en/docs/claude-code/settings) for more details.
+
+## Using the TUI to Manage Archived Sessions
+
+The easiest way to browse and restore archived sessions is through the interactive TUI.
+
+### Launch the TUI
+
+```bash
+claude-code-log --tui
+```
+
+### Toggle Archived View
+
+Press `a` to toggle between current and archived sessions. The header shows the current mode:
+
+```text
+┌─ Claude Code Log ─────────────────────────────────────────────────┐
+│ Project: my-project ARCHIVED (3) │
+│ Sessions: 3 │ Messages: 456 │ Tokens: 45,230 │
+├──────────┬───────────────────────────────────┬─────────┬──────────┤
+│ Session │ Title │ Start │ Messages │
+├──────────┼───────────────────────────────────┼─────────┼──────────┤
+│ abc123 │ Fix authentication bug │ 12-01 │ 45 │
+│ def456 │ Add user settings page │ 11-28 │ 123 │
+│ ghi789 │ Refactor database layer │ 11-15 │ 67 │
+└──────────┴───────────────────────────────────┴─────────┴──────────┘
+ [a] Current [r] Restore [h] HTML [v] View [q] Quit
+```
+
+### Restore a Session
+
+1. Switch to archived view with `a`
+2. Navigate to the session you want to restore
+3. Press `r` to restore the session to a JSONL file
+4. The session will be restored to `~/.claude/projects/{project}/{session-id}.jsonl`
+5. Press `a` again to switch back to current sessions and see the restored session
+
+### View Archived Sessions
+
+You can also view archived sessions as HTML or Markdown without restoring them:
+
+- `h` - Open HTML in browser
+- `m` - Open Markdown in browser
+- `v` - View Markdown in embedded viewer
+
+## Limitations
+
+- **Message order**: Messages are ordered by timestamp, which may differ slightly from original file order for same-timestamp entries
+- **Whitespace**: Original JSON formatting is not preserved (semantically identical)
+
+## Manual SQL Approach
+
+For advanced users, you can also query the cache database directly:
+
+```bash
+sqlite3 ~/.claude/projects/cache.db
+```
+
+```sql
+-- List all sessions
+SELECT p.project_path, s.session_id, s.first_timestamp, s.message_count
+FROM sessions s
+JOIN projects p ON s.project_id = p.id
+ORDER BY s.first_timestamp;
+
+-- Export a session's messages
+SELECT content FROM messages WHERE session_id = 'your-session-id' ORDER BY timestamp;
+```
diff --git a/pyproject.toml b/pyproject.toml
index dc27cb5b..ef3d53a7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,6 +54,10 @@ markers = [
"benchmark: Performance benchmarks that output to GitHub Job Summary",
]
+[tool.ty.environment]
+# Use custom stubs for untyped libraries
+extra-paths = ["stubs"]
+
[tool.pyright]
# Pyright configuration with strict settings
include = ["claude_code_log"] # TODO: , "test"
@@ -112,7 +116,7 @@ dev = [
"pytest-xdist[psutil]>=3.6.1",
"pyright>=1.1.350",
"vulture>=2.14",
- "ty>=0.0.1a12",
+ "ty>=0.0.11",
"pytest-playwright>=0.7.0",
"syrupy>=5.0.0",
]
diff --git a/stubs/pygments/__init__.pyi b/stubs/pygments/__init__.pyi
new file mode 100644
index 00000000..06d21785
--- /dev/null
+++ b/stubs/pygments/__init__.pyi
@@ -0,0 +1,5 @@
+"""Type stubs for pygments - minimal stubs for functions used in this project."""
+
+from typing import Any
+
+def highlight(code: str, lexer: Any, formatter: Any, outfile: Any = None) -> str: ...
diff --git a/stubs/pygments/formatter.pyi b/stubs/pygments/formatter.pyi
new file mode 100644
index 00000000..fdf8cc00
--- /dev/null
+++ b/stubs/pygments/formatter.pyi
@@ -0,0 +1,7 @@
+"""Type stubs for pygments.formatter - base formatter class."""
+
+from typing import Any
+
+class Formatter:
+ """Base class for formatters."""
+ def __init__(self, **options: Any) -> None: ...
diff --git a/stubs/pygments/formatters/__init__.pyi b/stubs/pygments/formatters/__init__.pyi
new file mode 100644
index 00000000..8f181ab2
--- /dev/null
+++ b/stubs/pygments/formatters/__init__.pyi
@@ -0,0 +1,16 @@
+"""Type stubs for pygments.formatters - minimal stubs for functions used in this project."""
+
+from typing import Any, Literal
+
+from ..formatter import Formatter
+
+class HtmlFormatter(Formatter):
+ """HTML formatter for syntax highlighted code."""
+ def __init__(
+ self,
+ linenos: bool | Literal["table", "inline"] = False,
+ cssclass: str = "highlight",
+ wrapcode: bool = False,
+ linenostart: int = 1,
+ **options: Any,
+ ) -> None: ...
diff --git a/stubs/pygments/lexer.pyi b/stubs/pygments/lexer.pyi
new file mode 100644
index 00000000..16f50b37
--- /dev/null
+++ b/stubs/pygments/lexer.pyi
@@ -0,0 +1,7 @@
+"""Type stubs for pygments.lexer - base lexer class."""
+
+from typing import Any
+
+class Lexer:
+ """Base class for lexers."""
+ def __init__(self, **options: Any) -> None: ...
diff --git a/stubs/pygments/lexers/__init__.pyi b/stubs/pygments/lexers/__init__.pyi
new file mode 100644
index 00000000..ad20345f
--- /dev/null
+++ b/stubs/pygments/lexers/__init__.pyi
@@ -0,0 +1,20 @@
+"""Type stubs for pygments.lexers - minimal stubs for functions used in this project."""
+
+from typing import Any, Iterator
+
+from ..lexer import Lexer
+
+class TextLexer(Lexer):
+ """Plain text lexer."""
+ def __init__(self, **options: Any) -> None: ...
+
+def get_lexer_by_name(name: str, **options: Any) -> Lexer: ...
+def get_all_lexers() -> Iterator[
+ tuple[str, tuple[str, ...], tuple[str, ...], tuple[str, ...]]
+]:
+ """Get all registered lexers.
+
+ Returns:
+ Iterator of (name, aliases, patterns, mimetypes) tuples
+ """
+ ...
diff --git a/stubs/pygments/util.pyi b/stubs/pygments/util.pyi
new file mode 100644
index 00000000..a2d52e07
--- /dev/null
+++ b/stubs/pygments/util.pyi
@@ -0,0 +1,6 @@
+"""Type stubs for pygments.util - minimal stubs for functions used in this project."""
+
+class ClassNotFound(Exception):
+ """Exception raised when a lexer or formatter class is not found."""
+
+ ...
diff --git a/test/__snapshots__/test_snapshot_html.ambr b/test/__snapshots__/test_snapshot_html.ambr
index bb6c3651..83cee00c 100644
--- a/test/__snapshots__/test_snapshot_html.ambr
+++ b/test/__snapshots__/test_snapshot_html.ambr
@@ -461,6 +461,30 @@
.project-sessions details[open] summary {
margin-bottom: 10px;
}
+
+ /* Archived project styling */
+ .project-card.archived {
+ opacity: 0.6;
+ background-color: #f5f5f522;
+ }
+
+ .project-card.archived:hover {
+ opacity: 0.8;
+ }
+
+ .archived-badge {
+ display: inline-block;
+ background-color: #888;
+ color: white;
+ font-size: 0.65em;
+ font-weight: 600;
+ padding: 2px 8px;
+ border-radius: 4px;
+ margin-left: 10px;
+ vertical-align: middle;
+ text-transform: uppercase;
+ letter-spacing: 0.5px;
+ }
/* Search Bar Styles */
.search-container {
position: relative;
@@ -1655,7 +1679,9 @@