From b313fd3253253f73971263bbc65a0205c5160cc9 Mon Sep 17 00:00:00 2001 From: Derek Parent Date: Thu, 5 Feb 2026 21:41:04 -0800 Subject: [PATCH 1/3] feat: add LLM-powered manuals assistant with SSE streaming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Conversational troubleshooting assistant for CAT engine manuals using Claude Sonnet 4.5 + existing FTS5 RAG search. Streams responses via SSE with source citations. Graceful fallback to search when API unavailable. New files: - src/services/llm_service.py — Anthropic SDK wrapper (sync, retry) - src/services/chat_service.py — RAG pipeline orchestration - src/prompts/manuals_assistant.py — system prompt + context formatting - src/routes/chat.py — chat endpoints with SSE streaming - templates/manuals/chat.html — mobile-first chat UI - tests/test_chat.py — 21 unit + integration tests Modified: - src/config.py — Anthropic API config vars - src/models.py — ChatSession model - src/services/manuals_service.py — get_context_for_llm() - src/app.py — blueprint registration + LLM init - requirements.txt — anthropic SDK Co-Authored-By: Claude Opus 4.6 --- docs/LLM-Powered-Manuals-Assistant-Plan.md | 231 ++++++ .../d4db138494c9_add_chat_sessions_table.py | 36 + requirements.txt | 3 + src/app.py | 8 +- src/config.py | 7 + src/models.py | 37 + src/prompts/__init__.py | 0 src/prompts/manuals_assistant.py | 96 +++ src/routes/chat.py | 185 +++++ src/services/chat_service.py | 161 ++++ src/services/llm_service.py | 177 +++++ src/services/manuals_service.py | 88 +++ templates/manuals/chat.html | 703 ++++++++++++++++++ tests/test_chat.py | 354 +++++++++ 14 files changed, 2085 insertions(+), 1 deletion(-) create mode 100644 docs/LLM-Powered-Manuals-Assistant-Plan.md create mode 100644 migrations/versions/d4db138494c9_add_chat_sessions_table.py create mode 100644 src/prompts/__init__.py create mode 100644 src/prompts/manuals_assistant.py create mode 100644 src/routes/chat.py create mode 100644 src/services/chat_service.py create mode 100644 src/services/llm_service.py create mode 100644 templates/manuals/chat.html create mode 100644 tests/test_chat.py diff --git a/docs/LLM-Powered-Manuals-Assistant-Plan.md b/docs/LLM-Powered-Manuals-Assistant-Plan.md new file mode 100644 index 0000000..6299cf3 --- /dev/null +++ b/docs/LLM-Powered-Manuals-Assistant-Plan.md @@ -0,0 +1,231 @@ +# LLM-Powered Manuals Assistant + +**Overview:** Add a conversational troubleshooting assistant to the manuals section, leveraging the existing FTS5 RAG infrastructure. Optimized for quality over cost — on-demand expert use at low volume, not high-throughput cheap queries. The assistant retrieves relevant manual excerpts, cites sources, and streams responses via SSE. + +**Model:** Claude Sonnet 4.5 (~$7/mo at 10 queries/day). Best technical reasoning, excellent instruction-following for RAG grounding, familiar SDK. + +**Usage profile:** On-demand, ~5-10 queries/day when actively troubleshooting. Cost is negligible at this volume — optimize for best possible answers. + +--- + +## Todos + +- [ ] **Phase 0:** Spike — validate Claude Sonnet 4.5 + FTS5 RAG pipeline with real manual queries +- [ ] **Phase 1.1:** Create `llm_service.py` (Anthropic SDK wrapper, sync, Sonnet 4.5) +- [ ] **Phase 1.2:** Create system prompt in `src/prompts/manuals_assistant.py` +- [ ] **Phase 1.3:** Add `get_context_for_llm()` to `manuals_service.py` +- [ ] **Phase 1.4:** Create `chat_service.py` (context assembly, conversation history, token management) +- [ ] **Phase 1.5:** Create chat routes + mobile-first chat UI with SSE streaming +- [ ] **Phase 1.6:** Error handling, tests + +--- + +## Architecture Overview + +```mermaid +flowchart TB + subgraph ui [Chat Interface] + ChatUI[Chat Component] + SearchUI[Existing Search] + end + + subgraph services [Service Layer] + ChatService[Chat Service] + LLMService[LLM Service] + ManualsService[Existing Manuals Service] + end + + subgraph storage [Data Layer] + ChatHistory[(chat_sessions)] + FTS5[(engine_search.db)] + end + + ChatUI --> ChatService + ChatService --> LLMService + ChatService --> ManualsService + LLMService -->|Claude Sonnet 4.5| AnthropicAPI + ManualsService --> FTS5 + ChatService --> ChatHistory +``` + +--- + +## Phase 0: Spike (1 session) + +**Goal:** Validate Claude Sonnet 4.5 with real manual content before building infrastructure. + +Build a standalone script that: +1. Takes a query (e.g., "3516 fuel rack actuator troubleshooting") +2. Calls `search_manuals()` to get top 5 FTS5 results +3. Formats results as structured context with citations +4. Sends to Claude Sonnet 4.5 via the Anthropic SDK with the marine engineering system prompt +5. Streams the response + +**Validate:** +- Response quality on technical content (torque specs, clearances, diagnostic codes) +- Citation accuracy (does it reference the right manual sections?) +- Handling of multi-step procedures (step-by-step clarity) +- Behavior when RAG context doesn't contain the answer (does it say so or hallucinate?) +- Streaming latency (time to first token) +- Anthropic SDK reliability (streaming, error messages, token counting) + +**Kill decision:** If response quality is poor with real manual content, reassess model choice before building infrastructure. + +--- + +## Phase 1: Core Chat + RAG + +### 1.1 LLM Service + +Create `src/services/llm_service.py`: + +- Wrapper around the Anthropic Python SDK (`anthropic`) +- **Synchronous** — Flask is sync, keep it simple +- Claude Sonnet 4.5 (`claude-sonnet-4-5-20250929`) +- Retry with exponential backoff (3 attempts) +- Configurable timeout (30s default) + +```python +class LLMService: + def complete(self, messages: list[dict], context: str) -> str + def stream(self, messages: list[dict], context: str) -> Iterator[str] + def count_tokens(self, text: str) -> int +``` + +One clean wrapper around the Anthropic SDK. If you ever need to swap providers, refactor one file. + +### 1.2 System Prompt + +Create `src/prompts/manuals_assistant.py`: + +This is where the feature quality lives. The prompt must handle: + +- **Identity:** Marine engineering assistant for CAT engines (3516, C18, C32, C4.4) +- **Grounding:** Use retrieved manual excerpts as authoritative source. Always cite document name and page number. +- **Honesty:** If the retrieved context doesn't contain the answer, say so explicitly. Never hallucinate specs, clearances, or procedures. +- **Safety:** For safety-critical values (torque specs, valve clearances, pressure limits), quote the manual verbatim and recommend verifying against the physical manual. +- **Clarification:** Ask about specific equipment model, symptoms, and operating conditions before diagnosing. +- **Scope:** Decline questions outside the indexed manual content. Redirect to search. + +```python +SYSTEM_PROMPT = """...""" + +def format_context(results: list[dict]) -> str: + """Format RAG results into structured context for the LLM.""" + ... + +def build_messages(system: str, context: str, history: list, query: str) -> list: + """Assemble full message list within token budget.""" + ... +``` + +### 1.3 RAG Integration + +Enhance `src/services/manuals_service.py`: + +- Add `get_context_for_llm(query: str, limit: int = 5) -> list[dict]` +- Return structured results with: content, source document, page number, authority level +- Leverage existing `search_manuals()` and `search_cards()` +- Format with clear citation markers for the LLM + +### 1.4 Chat Service + +Create `src/services/chat_service.py`: + +- Context assembly from RAG search results +- Conversation history (in-memory, max 10 turns per session) +- Token budget management (system prompt + RAG context + history fits within model limits) +- Formats the full prompt: system + context + history + user query + +### 1.5 Chat Routes + UI + +Create `src/routes/chat.py` and `templates/manuals/chat.html`: + +**Routes:** +- `GET /manuals/chat` — Chat interface +- `POST /api/chat/message` — Send message, get streamed response (SSE) + +**UI:** +- Mobile-first chat interface matching existing design system +- Streaming response display (tokens appear as they arrive) +- Source citations as tappable links to manual sections +- Clear conversation / new chat button + +**Streaming:** Use Flask's `Response(stream_with_context(generator))` with `text/event-stream` content type. Sync generator from `LLMService.stream()`. + +### 1.6 Error Handling + Tests + +**Degradation path:** + +| Condition | Behavior | +|-----------|----------| +| API works | Stream LLM response with citations | +| API slow (>30s) | Show "Thinking..." with cancel button, timeout | +| API fails (500) | Show error + FTS5 search results as fallback | +| API rate-limited (429) | Retry once after backoff, then show error | + +**Tests:** +- Unit tests for `get_context_for_llm()` — context assembly and formatting +- Unit tests for system prompt building and token budget management +- Integration tests with mocked LLM responses — full pipeline without API calls +- One end-to-end test hitting real API (marked `@pytest.mark.slow`) + +--- + +## Key Files to Create/Modify + +**New Files:** + +- `src/services/llm_service.py` — LLM wrapper +- `src/services/chat_service.py` — Chat logic + context assembly +- `src/prompts/manuals_assistant.py` — System prompt + formatting +- `src/routes/chat.py` — Chat endpoints +- `templates/manuals/chat.html` — Chat UI + +**Modified Files:** + +- `src/services/manuals_service.py` — Add `get_context_for_llm()` +- `src/models.py` — Add `ChatSession` model +- `src/app.py` — Register chat blueprint +- `src/config.py` — Add LLM API key + chat settings +- `requirements.txt` — Add `anthropic` + +--- + +## Security Considerations + +- Never send PII to LLM APIs +- Sanitize manual content before sending (strip any crew names from context) +- Rate limiting on chat endpoints (existing infrastructure) +- API key stored in environment variable, never in code + +--- + +## Database Migration + +New table: + +- `chat_sessions` — Conversation history (user_id, messages JSON, created_at, updated_at) + +--- + +## Session Plan + +| Session | Deliverable | Output | +|---------|-------------|--------| +| **1** | Spike | Validate Sonnet 4.5 + FTS5 RAG with real queries. Go/no-go. | +| **2** | Services | `llm_service.py`, `chat_service.py`, `get_context_for_llm()`, system prompt | +| **3** | UI + Routes | Chat route, SSE streaming, mobile chat UI | +| **4** | Hardening | Error handling, fallback behavior, tests | + +--- + +## Future Considerations (Post-Ship) + +Evaluate after using the assistant on real troubleshooting scenarios: + +- Conversation persistence across sessions +- Response caching for repeated questions +- User pattern tracking (search history, preferred docs) +- Guided troubleshooting workflows with step-by-step diagnosis +- Upgrade to Opus if deeper reasoning needed on complex diagnostics diff --git a/migrations/versions/d4db138494c9_add_chat_sessions_table.py b/migrations/versions/d4db138494c9_add_chat_sessions_table.py new file mode 100644 index 0000000..a6c59d8 --- /dev/null +++ b/migrations/versions/d4db138494c9_add_chat_sessions_table.py @@ -0,0 +1,36 @@ +"""add chat_sessions table + +Revision ID: d4db138494c9 +Revises: 2e194345a0a0 +Create Date: 2026-02-05 21:39:40.892152 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'd4db138494c9' +down_revision = '2e194345a0a0' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('chat_sessions', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('user_id', sa.Integer(), nullable=False), + sa.Column('messages', sa.Text(), nullable=False), + sa.Column('created_at', sa.DateTime(), nullable=False), + sa.Column('updated_at', sa.DateTime(), nullable=False), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), + sa.PrimaryKeyConstraint('id') + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('chat_sessions') + # ### end Alembic commands ### diff --git a/requirements.txt b/requirements.txt index 3a5ab85..34ad1b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,6 +25,9 @@ reportlab==4.2.5 # OCR / Google Cloud Vision google-cloud-vision==3.8.1 +# LLM +anthropic>=0.39.0 + # Production Server gunicorn==23.0.0 diff --git a/src/app.py b/src/app.py index aff3e60..63da172 100644 --- a/src/app.py +++ b/src/app.py @@ -169,16 +169,22 @@ def health_check(): from routes.auth import auth_bp from routes.secure_api import secure_api_bp, init_secure_api from routes.manuals import manuals_bp + from routes.chat import chat_bp # Register all APIs app.register_blueprint(api_bp, url_prefix="/api") app.register_blueprint(auth_bp, url_prefix="/auth") app.register_blueprint(secure_api_bp, url_prefix="/api/v1") app.register_blueprint(manuals_bp) # url_prefix already set in blueprint - + app.register_blueprint(chat_bp) # url_prefix set in blueprint (/manuals/chat) + # Initialize secure API rate limiter with app init_secure_api(app) + # Initialize LLM service (graceful if no API key) + from services.llm_service import create_llm_service + create_llm_service(app) + # Main routes @app.route("/") @login_required diff --git a/src/config.py b/src/config.py index c68da03..b7ab6ed 100644 --- a/src/config.py +++ b/src/config.py @@ -46,6 +46,13 @@ class Config: SESSION_COOKIE_HTTPONLY = True SESSION_COOKIE_SAMESITE = "Lax" + # LLM / Chat Assistant + ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "") + ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_MODEL", "claude-sonnet-4-5-20250929") + CHAT_MAX_TURNS = int(os.environ.get("CHAT_MAX_TURNS", "10")) + CHAT_TIMEOUT = int(os.environ.get("CHAT_TIMEOUT", "30")) + CHAT_MAX_CONTEXT_TOKENS = int(os.environ.get("CHAT_MAX_CONTEXT_TOKENS", "4000")) + # Logging configuration LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO") LOG_DIR = BASE_DIR / "logs" diff --git a/src/models.py b/src/models.py index 554d317..ff9089c 100644 --- a/src/models.py +++ b/src/models.py @@ -1,5 +1,6 @@ """Database models for Oil Record Book Tool.""" +import json from datetime import datetime, timezone from flask_sqlalchemy import SQLAlchemy from flask_login import UserMixin @@ -507,3 +508,39 @@ def to_dict(self) -> dict: "end_date": self.end_date.isoformat() if self.end_date else None, "created_at": self.created_at.isoformat(), } + + +class ChatSession(db.Model): + """LLM chat conversation session.""" + + __tablename__ = "chat_sessions" + + id: int = db.Column(db.Integer, primary_key=True) + user_id: int = db.Column(db.Integer, db.ForeignKey("users.id"), nullable=False) + messages: str = db.Column(db.Text, nullable=False, default="[]") + created_at: datetime = db.Column( + db.DateTime, nullable=False, default=lambda: datetime.now(UTC) + ) + updated_at: datetime = db.Column( + db.DateTime, nullable=False, default=lambda: datetime.now(UTC), + onupdate=lambda: datetime.now(UTC) + ) + + user = db.relationship("User") + + def get_messages(self) -> list[dict]: + """Deserialize messages JSON.""" + return json.loads(self.messages) if self.messages else [] + + def set_messages(self, msgs: list[dict]) -> None: + """Serialize messages to JSON.""" + self.messages = json.dumps(msgs) + + def to_dict(self) -> dict: + return { + "id": self.id, + "user_id": self.user_id, + "messages": self.get_messages(), + "created_at": self.created_at.isoformat(), + "updated_at": self.updated_at.isoformat(), + } diff --git a/src/prompts/__init__.py b/src/prompts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/prompts/manuals_assistant.py b/src/prompts/manuals_assistant.py new file mode 100644 index 0000000..4a86cb5 --- /dev/null +++ b/src/prompts/manuals_assistant.py @@ -0,0 +1,96 @@ +""" +System prompt and message building for the Manuals Assistant. + +This is where response quality lives. The prompt grounds the LLM +in retrieved manual content, enforces citation discipline, and +prevents hallucination of safety-critical specs. +""" + +SYSTEM_PROMPT = """\ +You are a marine engineering assistant specializing in Caterpillar diesel engines \ +(3516, C18, C32, C4.4) aboard commercial vessels. You help engineers troubleshoot \ +problems, find procedures, and look up specifications using the ship's indexed \ +technical manuals. + +## Rules + +1. **Use only the provided manual excerpts.** Every factual claim must reference \ +a specific source document and page number. Format citations as [Document Name, p.XX]. + +2. **Never hallucinate specifications.** If a torque value, clearance, pressure limit, \ +or any safety-critical number is not in the provided context, say so explicitly. \ +Do not guess or recall from training data. + +3. **Quote safety-critical values verbatim.** When citing torque specs, valve clearances, \ +pressure limits, or temperature thresholds, reproduce the exact wording from the manual \ +and add: "Verify against your physical manual before performing this procedure." + +4. **Ask for clarification when needed.** If the question is ambiguous, ask about: \ +the specific engine model, symptoms, operating conditions, or which system is affected. + +5. **Stay in scope.** Only answer questions related to the indexed manual content. \ +For questions outside this scope, say: "That's outside the manuals I have indexed. \ +Try searching the manuals directly for [suggested terms]." + +6. **Structure multi-step procedures clearly.** Use numbered steps. Include warnings \ +and cautions inline where the manual specifies them. + +7. **Be direct.** Engineers need answers, not disclaimers. Lead with the answer, \ +then provide supporting detail. + +## Context format + +You will receive manual excerpts in tags. Each excerpt includes the source \ +document name, page number, equipment model, and document type. Use these for citations.\ +""" + + +def format_context(results: list[dict]) -> str: + """Format RAG search results into structured context for the LLM. + + Args: + results: List of dicts from get_context_for_llm() with keys: + content, filename, page_num, equipment, doc_type, authority + + Returns: + Formatted context string with citation markers. + """ + if not results: + return "\nNo relevant manual excerpts found for this query.\n" + + parts = [""] + for i, r in enumerate(results, 1): + authority_note = f" [{r.get('authority', 'unset').upper()}]" if r.get("authority") not in ("unset", None) else "" + parts.append( + f"--- Excerpt {i}{authority_note} ---\n" + f"Source: {r['filename']}, Page {r['page_num']}\n" + f"Equipment: {r['equipment']} | Type: {r['doc_type']}\n\n" + f"{r['content']}\n" + ) + parts.append("") + return "\n".join(parts) + + +def build_messages( + context: str, + history: list[dict], + query: str, +) -> tuple[str, list[dict]]: + """Assemble system prompt + context and message list for the LLM. + + Args: + context: Formatted context string from format_context() + history: Previous conversation turns as [{"role": "user"|"assistant", "content": "..."}] + query: Current user query + + Returns: + Tuple of (system_prompt_with_context, messages_list) + """ + system = f"{SYSTEM_PROMPT}\n\n{context}" + + messages = [] + for msg in history: + messages.append({"role": msg["role"], "content": msg["content"]}) + messages.append({"role": "user", "content": query}) + + return system, messages diff --git a/src/routes/chat.py b/src/routes/chat.py new file mode 100644 index 0000000..7d731a0 --- /dev/null +++ b/src/routes/chat.py @@ -0,0 +1,185 @@ +""" +Chat Blueprint — LLM-powered manuals assistant endpoints. + +Provides the chat UI and SSE streaming endpoint. +""" + +import json + +from flask import ( + Blueprint, + Response, + current_app, + render_template, + request, + jsonify, + stream_with_context, +) +from flask_login import login_required, current_user + +from models import db, ChatSession +from services.chat_service import ( + stream_chat_response, + get_fallback_results, + ChatServiceError, +) +from services.llm_service import get_llm_service + +chat_bp = Blueprint("chat", __name__, url_prefix="/manuals/chat") + + +@chat_bp.route("/") +@login_required +def chat_page(): + """Chat interface.""" + llm_available = get_llm_service() is not None + return render_template("manuals/chat.html", llm_available=llm_available) + + +@chat_bp.route("/api/message", methods=["POST"]) +@login_required +def send_message(): + """Send a chat message and get a streamed SSE response. + + Request JSON: + query: str — the user's question + session_id: int|null — existing session ID, or null for new + equipment: str|null — optional equipment filter + + Response: SSE stream with events: + data: {"type": "token", "content": "..."} — streaming text deltas + data: {"type": "done", "session_id": 123} — stream complete + data: {"type": "error", "message": "..."} — error occurred + data: {"type": "fallback", "results": [...]} — FTS5 fallback results + """ + data = request.get_json(silent=True) + if not data or not data.get("query", "").strip(): + return jsonify({"error": "Query is required"}), 400 + + query = data["query"].strip() + session_id = data.get("session_id") + equipment = data.get("equipment") + + # Load or create session + session = None + history = [] + if session_id: + session = ChatSession.query.filter_by( + id=session_id, user_id=current_user.id + ).first() + if session: + history = session.get_messages() + + def generate(): + nonlocal session + + full_response = [] + + try: + for token in stream_chat_response( + query=query, + history=history, + equipment=equipment, + ): + full_response.append(token) + yield f"data: {json.dumps({'type': 'token', 'content': token})}\n\n" + + # Save to session + response_text = "".join(full_response) + new_messages = history + [ + {"role": "user", "content": query}, + {"role": "assistant", "content": response_text}, + ] + + if session: + session.set_messages(new_messages) + else: + session = ChatSession(user_id=current_user.id) + session.set_messages(new_messages) + db.session.add(session) + + db.session.commit() + + yield f"data: {json.dumps({'type': 'done', 'session_id': session.id})}\n\n" + + except ChatServiceError as e: + current_app.logger_instance.error(f"Chat error: {e}") + + # Fallback: return FTS5 search results + fallback = get_fallback_results(query, equipment=equipment) + yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n" + if fallback: + yield f"data: {json.dumps({'type': 'fallback', 'results': fallback})}\n\n" + + except Exception as e: + current_app.logger_instance.exception(f"Unexpected chat error: {e}") + yield f"data: {json.dumps({'type': 'error', 'message': 'An unexpected error occurred'})}\n\n" + + return Response( + stream_with_context(generate()), + mimetype="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "X-Accel-Buffering": "no", + }, + ) + + +@chat_bp.route("/api/sessions", methods=["GET"]) +@login_required +def list_sessions(): + """List user's chat sessions.""" + sessions = ChatSession.query.filter_by( + user_id=current_user.id + ).order_by(ChatSession.updated_at.desc()).limit(20).all() + + return jsonify([ + { + "id": s.id, + "preview": _session_preview(s), + "updated_at": s.updated_at.isoformat(), + } + for s in sessions + ]) + + +@chat_bp.route("/api/sessions/", methods=["GET"]) +@login_required +def get_session(session_id: int): + """Get a specific chat session.""" + session = ChatSession.query.filter_by( + id=session_id, user_id=current_user.id + ).first() + + if not session: + return jsonify({"error": "Session not found"}), 404 + + return jsonify(session.to_dict()) + + +@chat_bp.route("/api/sessions/", methods=["DELETE"]) +@login_required +def delete_session(session_id: int): + """Delete a chat session.""" + session = ChatSession.query.filter_by( + id=session_id, user_id=current_user.id + ).first() + + if not session: + return jsonify({"error": "Session not found"}), 404 + + db.session.delete(session) + db.session.commit() + + return jsonify({"status": "ok"}) + + +def _session_preview(session: ChatSession) -> str: + """Get a short preview of the session's first message.""" + messages = session.get_messages() + if messages: + first_user_msg = next( + (m["content"] for m in messages if m["role"] == "user"), "" + ) + return first_user_msg[:80] + ("..." if len(first_user_msg) > 80 else "") + return "Empty conversation" diff --git a/src/services/chat_service.py b/src/services/chat_service.py new file mode 100644 index 0000000..ae59593 --- /dev/null +++ b/src/services/chat_service.py @@ -0,0 +1,161 @@ +""" +Chat Service — Context assembly and conversation management. + +Orchestrates the RAG pipeline: query → search → context → LLM → response. +Manages conversation history and token budgets. +""" + +import logging +from collections.abc import Iterator +from typing import Optional + +from services.llm_service import LLMService, LLMServiceError, get_llm_service +from services.manuals_service import get_context_for_llm, search_manuals +from prompts.manuals_assistant import format_context, build_messages + +logger = logging.getLogger(__name__) + +# Token budget: Sonnet 4.5 has 200k context, but we stay lean +MAX_CONTEXT_TOKENS = 4000 # RAG excerpts budget +MAX_HISTORY_TOKENS = 3000 # Conversation history budget +MAX_TURNS = 10 # Max conversation turns kept + + +class ChatServiceError(Exception): + """Raised when the chat service encounters an error.""" + + +def get_chat_response( + query: str, + history: list[dict], + equipment: Optional[str] = None, + max_context_tokens: int = MAX_CONTEXT_TOKENS, + max_turns: int = MAX_TURNS, +) -> str: + """Get a complete (non-streaming) chat response. + + Args: + query: User's question + history: Previous conversation turns + equipment: Optional equipment filter for RAG search + max_context_tokens: Token budget for RAG context + max_turns: Max history turns to include + + Returns: + Full response text + + Raises: + ChatServiceError if LLM service unavailable or fails + """ + llm = get_llm_service() + if not llm: + raise ChatServiceError("Chat assistant is not configured (missing API key)") + + # Trim history to max turns + trimmed_history = _trim_history(history, max_turns, llm) + + # Retrieve RAG context + context_results = get_context_for_llm(query, equipment=equipment, limit=5) + context_str = format_context(context_results) + + # Trim context if over budget + context_str = _trim_to_token_budget(context_str, max_context_tokens, llm) + + # Build messages + system, messages = build_messages(context_str, trimmed_history, query) + + try: + return llm.complete(system, messages) + except LLMServiceError as e: + logger.error(f"LLM error: {e}") + raise ChatServiceError(str(e)) + + +def stream_chat_response( + query: str, + history: list[dict], + equipment: Optional[str] = None, + max_context_tokens: int = MAX_CONTEXT_TOKENS, + max_turns: int = MAX_TURNS, +) -> Iterator[str]: + """Stream a chat response token by token. + + Same pipeline as get_chat_response() but yields text deltas. + + Yields: + Text delta strings + + Raises: + ChatServiceError if LLM service unavailable or fails + """ + llm = get_llm_service() + if not llm: + raise ChatServiceError("Chat assistant is not configured (missing API key)") + + trimmed_history = _trim_history(history, max_turns, llm) + + context_results = get_context_for_llm(query, equipment=equipment, limit=5) + context_str = format_context(context_results) + context_str = _trim_to_token_budget(context_str, max_context_tokens, llm) + + system, messages = build_messages(context_str, trimmed_history, query) + + try: + yield from llm.stream(system, messages) + except LLMServiceError as e: + logger.error(f"LLM streaming error: {e}") + raise ChatServiceError(str(e)) + + +def get_fallback_results(query: str, equipment: Optional[str] = None) -> list[dict]: + """Get FTS5 search results as fallback when LLM is unavailable. + + Returns results formatted for display in the chat UI. + """ + results = search_manuals(query, equipment=equipment, limit=10, boost_primary=True) + return [ + { + "filename": r["filename"], + "page_num": r["page_num"], + "equipment": r["equipment"], + "doc_type": r["doc_type"], + "snippet": r["snippet"], + } + for r in results + ] + + +def _trim_history( + history: list[dict], + max_turns: int, + llm: LLMService, +) -> list[dict]: + """Trim conversation history to fit within turn and token limits. + + Keeps the most recent turns. Each turn is a user+assistant pair. + """ + if not history: + return [] + + # Keep last N messages (each turn = 2 messages: user + assistant) + max_messages = max_turns * 2 + trimmed = history[-max_messages:] + + # Further trim if over token budget + total_tokens = sum(llm.count_tokens(m["content"]) for m in trimmed) + while trimmed and total_tokens > MAX_HISTORY_TOKENS: + removed = trimmed.pop(0) + total_tokens -= llm.count_tokens(removed["content"]) + + return trimmed + + +def _trim_to_token_budget(text: str, max_tokens: int, llm: LLMService) -> str: + """Trim text to approximate token budget.""" + estimated = llm.count_tokens(text) + if estimated <= max_tokens: + return text + + # Rough trim: 4 chars per token + max_chars = max_tokens * 4 + return text[:max_chars] + "\n\n[Context truncated to fit token budget]" diff --git a/src/services/llm_service.py b/src/services/llm_service.py new file mode 100644 index 0000000..6899d7c --- /dev/null +++ b/src/services/llm_service.py @@ -0,0 +1,177 @@ +""" +LLM Service — Anthropic SDK wrapper for the Manuals Assistant. + +Sync-only (Flask is sync). Handles retries, streaming, and basic cost tracking. +If you ever swap providers, refactor this one file. +""" + +import logging +import time +from collections.abc import Iterator +from typing import Optional + +import anthropic + +logger = logging.getLogger(__name__) + + +class LLMServiceError(Exception): + """Raised when the LLM service encounters an error.""" + + +class LLMService: + """Thin wrapper around the Anthropic Python SDK.""" + + def __init__( + self, + api_key: str, + model: str = "claude-sonnet-4-5-20250929", + timeout: int = 30, + max_retries: int = 3, + ): + if not api_key: + raise LLMServiceError("ANTHROPIC_API_KEY is not set") + self.client = anthropic.Anthropic(api_key=api_key, timeout=timeout) + self.model = model + self.max_retries = max_retries + # Simple cost tracking (per-session, not persisted) + self.total_input_tokens = 0 + self.total_output_tokens = 0 + + def complete( + self, + system: str, + messages: list[dict], + max_tokens: int = 2048, + ) -> str: + """Send a message and return the full response text. + + Args: + system: System prompt (includes RAG context) + messages: Conversation messages + max_tokens: Max tokens in response + + Returns: + Response text + + Raises: + LLMServiceError on failure after retries + """ + for attempt in range(1, self.max_retries + 1): + try: + response = self.client.messages.create( + model=self.model, + max_tokens=max_tokens, + system=system, + messages=messages, + ) + self.total_input_tokens += response.usage.input_tokens + self.total_output_tokens += response.usage.output_tokens + return response.content[0].text + + except anthropic.RateLimitError: + if attempt < self.max_retries: + wait = 2 ** attempt + logger.warning(f"Rate limited, retrying in {wait}s (attempt {attempt})") + time.sleep(wait) + continue + raise LLMServiceError("Rate limited by Anthropic API after retries") + + except anthropic.APIStatusError as e: + if attempt < self.max_retries and e.status_code >= 500: + wait = 2 ** attempt + logger.warning(f"API error {e.status_code}, retrying in {wait}s") + time.sleep(wait) + continue + raise LLMServiceError(f"Anthropic API error: {e.message}") + + except anthropic.APIConnectionError as e: + if attempt < self.max_retries: + wait = 2 ** attempt + logger.warning(f"Connection error, retrying in {wait}s") + time.sleep(wait) + continue + raise LLMServiceError(f"Cannot reach Anthropic API: {e}") + + raise LLMServiceError("Max retries exceeded") + + def stream( + self, + system: str, + messages: list[dict], + max_tokens: int = 2048, + ) -> Iterator[str]: + """Stream response tokens as they arrive. + + Yields individual text delta strings. The caller is responsible + for assembling these into the full response. + + Raises: + LLMServiceError on failure + """ + try: + with self.client.messages.stream( + model=self.model, + max_tokens=max_tokens, + system=system, + messages=messages, + ) as stream: + for text in stream.text_stream: + yield text + + # Update token counts from final message + response = stream.get_final_message() + self.total_input_tokens += response.usage.input_tokens + self.total_output_tokens += response.usage.output_tokens + + except anthropic.RateLimitError: + raise LLMServiceError("Rate limited by Anthropic API") + except anthropic.APIStatusError as e: + raise LLMServiceError(f"Anthropic API error: {e.message}") + except anthropic.APIConnectionError as e: + raise LLMServiceError(f"Cannot reach Anthropic API: {e}") + + def count_tokens(self, text: str) -> int: + """Estimate token count for text. + + Uses a simple heuristic (~4 chars per token) since the Anthropic + SDK doesn't expose a standalone tokenizer. Good enough for budget + management. + """ + return len(text) // 4 + + @property + def cost_summary(self) -> dict: + """Return cumulative token usage for this service instance.""" + return { + "input_tokens": self.total_input_tokens, + "output_tokens": self.total_output_tokens, + } + + +# Module-level singleton, initialized by create_llm_service() +_service: Optional[LLMService] = None + + +def create_llm_service(app) -> Optional[LLMService]: + """Initialize the LLM service from Flask app config. + + Returns None if API key is not configured (graceful degradation). + """ + global _service + api_key = app.config.get("ANTHROPIC_API_KEY", "") + if not api_key: + logger.warning("ANTHROPIC_API_KEY not set — chat assistant disabled") + return None + + _service = LLMService( + api_key=api_key, + model=app.config.get("ANTHROPIC_MODEL", "claude-sonnet-4-5-20250929"), + timeout=app.config.get("CHAT_TIMEOUT", 30), + ) + return _service + + +def get_llm_service() -> Optional[LLMService]: + """Get the module-level LLM service instance.""" + return _service diff --git a/src/services/manuals_service.py b/src/services/manuals_service.py index aa0d48f..470d6e9 100644 --- a/src/services/manuals_service.py +++ b/src/services/manuals_service.py @@ -613,6 +613,94 @@ def search_manuals( conn.close() +# ============================================================================= +# LLM Context Functions +# ============================================================================= + +def get_context_for_llm( + query: str, + equipment: Optional[str] = None, + limit: int = 5, +) -> list[dict]: + """ + Retrieve structured manual context for the LLM assistant. + + Returns top search results with full page content (not snippets) + formatted for RAG context injection. + + Args: + query: User's question + equipment: Optional equipment filter + limit: Max excerpts to return + + Returns: + List of dicts with: content, filename, page_num, equipment, doc_type, authority + """ + conn = load_manuals_database() + if not conn: + return [] + + try: + cursor = conn.cursor() + fts_query = prepare_search_query(query) + + where_parts = [] + params = [] + + if equipment: + where_parts.append("p.equipment = ?") + params.append(equipment) + + where_clause = " AND ".join(where_parts) if where_parts else "1=1" + + sql = f""" + SELECT + p.filepath, + p.filename, + p.equipment, + p.doc_type, + p.page_num, + p.content, + bm25(pages_fts) as score + FROM pages_fts + JOIN pages p ON pages_fts.rowid = p.id + WHERE pages_fts MATCH ? + AND {where_clause} + ORDER BY bm25(pages_fts) + LIMIT ? + """ + params = [fts_query] + params + [limit * 2] + + cursor.execute(sql, params) + rows = cursor.fetchall() + + _init_authority_table(conn) + + results = [] + for row in rows: + authority = _get_authority_for_filepath(conn, row["filepath"]) + results.append({ + "content": row["content"].strip(), + "filename": row["filename"], + "page_num": row["page_num"], + "equipment": row["equipment"], + "doc_type": row["doc_type"], + "authority": authority, + "score": abs(row["score"]), + }) + + # Sort by authority (primary first) then by score + authority_order = {"primary": 0, "secondary": 1, "unset": 2, "mention": 3} + results.sort(key=lambda r: (authority_order.get(r["authority"], 2), r["score"])) + + return results[:limit] + + except sqlite3.OperationalError: + return [] + finally: + conn.close() + + # ============================================================================= # Cards Functions # ============================================================================= diff --git a/templates/manuals/chat.html b/templates/manuals/chat.html new file mode 100644 index 0000000..6c881b6 --- /dev/null +++ b/templates/manuals/chat.html @@ -0,0 +1,703 @@ +{% extends "base.html" %} + +{% block title %}Ask the Manuals - Engine Room{% endblock %} + +{% block head %} + +{% endblock %} + +{% block content %} +
+
+

Ask the Manuals

+
+ Search + +
+
+ + {% if not llm_available %} +
+

Assistant Not Available

+

The AI assistant requires an Anthropic API key. Set ANTHROPIC_API_KEY in your environment.

+

+ Use Manual Search Instead +

+
+ {% else %} + +
+
+

Ask about CAT engine manuals

+

I can help you find procedures, troubleshoot problems, and look up specs from the indexed technical manuals.

+
+
3516 fuel rack actuator troubleshooting
+
C18 valve lash adjustment procedure
+
3516 jacket water pump torque specs
+
+
+
+ +
+
+ + +
+
+
+ + {% endif %} +
+{% endblock %} + +{% block scripts %} +{% if llm_available %} + +{% endif %} +{% endblock %} diff --git a/tests/test_chat.py b/tests/test_chat.py new file mode 100644 index 0000000..55c9965 --- /dev/null +++ b/tests/test_chat.py @@ -0,0 +1,354 @@ +"""Tests for LLM Manuals Assistant feature.""" + +import json +import pytest +from unittest.mock import patch, MagicMock + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + + +# ───────────────────────────────────────────────────────────────── +# Unit Tests: Prompt building +# ───────────────────────────────────────────────────────────────── + +class TestFormatContext: + """Test context formatting for LLM.""" + + def test_empty_results(self): + from prompts.manuals_assistant import format_context + + ctx = format_context([]) + assert "" in ctx + assert "No relevant manual excerpts found" in ctx + + def test_formats_results_with_citations(self): + from prompts.manuals_assistant import format_context + + results = [ + { + "content": "Torque to 45 Nm", + "filename": "3516_testing.pdf", + "page_num": 12, + "equipment": "3516", + "doc_type": "testing", + "authority": "primary", + } + ] + ctx = format_context(results) + assert "Excerpt 1" in ctx + assert "[PRIMARY]" in ctx + assert "3516_testing.pdf" in ctx + assert "Page 12" in ctx + assert "Torque to 45 Nm" in ctx + + def test_multiple_results(self): + from prompts.manuals_assistant import format_context + + results = [ + { + "content": "Step 1", + "filename": "a.pdf", + "page_num": 1, + "equipment": "3516", + "doc_type": "testing", + "authority": "primary", + }, + { + "content": "Step 2", + "filename": "b.pdf", + "page_num": 5, + "equipment": "C18", + "doc_type": "service", + "authority": "unset", + }, + ] + ctx = format_context(results) + assert "Excerpt 1" in ctx + assert "Excerpt 2" in ctx + assert "a.pdf" in ctx + assert "b.pdf" in ctx + + def test_unset_authority_no_label(self): + from prompts.manuals_assistant import format_context + + results = [ + { + "content": "Content", + "filename": "doc.pdf", + "page_num": 1, + "equipment": "C32", + "doc_type": "O&M", + "authority": "unset", + } + ] + ctx = format_context(results) + assert "[UNSET]" not in ctx + assert "Excerpt 1 ---" in ctx + + +class TestBuildMessages: + """Test message assembly.""" + + def test_builds_system_with_context(self): + from prompts.manuals_assistant import build_messages + + system, messages = build_messages("test", [], "my query") + assert "test" in system + assert len(messages) == 1 + assert messages[0]["role"] == "user" + assert messages[0]["content"] == "my query" + + def test_includes_history(self): + from prompts.manuals_assistant import build_messages + + history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi there"}, + ] + system, messages = build_messages("", history, "follow up") + assert len(messages) == 3 + assert messages[0]["role"] == "user" + assert messages[0]["content"] == "hello" + assert messages[2]["content"] == "follow up" + + +# ───────────────────────────────────────────────────────────────── +# Unit Tests: LLM Service +# ───────────────────────────────────────────────────────────────── + +class TestLLMService: + """Test LLM service wrapper.""" + + def test_raises_without_api_key(self): + from services.llm_service import LLMService, LLMServiceError + + with pytest.raises(LLMServiceError, match="not set"): + LLMService(api_key="") + + @patch("services.llm_service.anthropic.Anthropic") + def test_count_tokens_heuristic(self, mock_cls): + from services.llm_service import LLMService + + svc = LLMService(api_key="test-key") + # ~4 chars per token + assert svc.count_tokens("a" * 100) == 25 + + @patch("services.llm_service.anthropic.Anthropic") + def test_cost_summary(self, mock_cls): + from services.llm_service import LLMService + + svc = LLMService(api_key="test-key") + assert svc.cost_summary == {"input_tokens": 0, "output_tokens": 0} + + @patch("services.llm_service.anthropic.Anthropic") + def test_complete_success(self, mock_cls): + from services.llm_service import LLMService + + mock_client = MagicMock() + mock_cls.return_value = mock_client + + mock_response = MagicMock() + mock_response.content = [MagicMock(text="Test response")] + mock_response.usage.input_tokens = 100 + mock_response.usage.output_tokens = 50 + mock_client.messages.create.return_value = mock_response + + svc = LLMService(api_key="test-key") + result = svc.complete("system", [{"role": "user", "content": "test"}]) + + assert result == "Test response" + assert svc.total_input_tokens == 100 + assert svc.total_output_tokens == 50 + + +# ───────────────────────────────────────────────────────────────── +# Unit Tests: get_context_for_llm +# ───────────────────────────────────────────────────────────────── + +class TestGetContextForLLM: + """Test RAG context retrieval.""" + + @patch("services.manuals_service.load_manuals_database") + def test_returns_empty_when_no_db(self, mock_db): + from services.manuals_service import get_context_for_llm + + mock_db.return_value = None + results = get_context_for_llm("test query") + assert results == [] + + @patch("services.manuals_service.load_manuals_database") + def test_returns_structured_results(self, mock_db): + from services.manuals_service import get_context_for_llm + + # Set up mock DB + mock_conn = MagicMock() + mock_db.return_value = mock_conn + mock_cursor = MagicMock() + mock_conn.cursor.return_value = mock_cursor + + # Mock row data + mock_row = { + "filepath": "/path/to/doc.pdf", + "filename": "doc.pdf", + "equipment": "3516", + "doc_type": "testing", + "page_num": 5, + "content": "Torque value is 45 Nm", + "score": -2.5, + } + mock_cursor.fetchall.return_value = [mock_row] + + # Mock authority lookup + mock_cursor.fetchone.side_effect = [ + {"name": "doc_authority"}, # table exists check + {"authority_level": "primary"}, # authority lookup + ] + + results = get_context_for_llm("torque specs") + + assert len(results) == 1 + assert results[0]["content"] == "Torque value is 45 Nm" + assert results[0]["filename"] == "doc.pdf" + assert results[0]["equipment"] == "3516" + + +# ───────────────────────────────────────────────────────────────── +# Integration Tests: Chat routes (mocked LLM) +# ───────────────────────────────────────────────────────────────── + +class TestChatRoutes: + """Test chat endpoints with mocked LLM service. + + Uses inline user/login to avoid pre-existing conftest DetachedInstanceError. + """ + + @staticmethod + def _login(app, client): + """Create and login a test user, returning user_id.""" + from models import db, User, UserRole + + with app.app_context(): + u = User(username="chatroute_user", role=UserRole.CHIEF_ENGINEER) + u.set_password("pass") + db.session.add(u) + db.session.commit() + uid = u.id + + with client.session_transaction() as sess: + sess["_user_id"] = str(uid) + sess["_fresh"] = True + return uid + + def test_chat_page_loads(self, app, client): + """Chat page should load for authenticated users.""" + self._login(app, client) + response = client.get("/manuals/chat/") + assert response.status_code == 200 + + def test_chat_page_requires_auth(self, client): + """Chat page should redirect unauthenticated users.""" + response = client.get("/manuals/chat/") + assert response.status_code == 302 + + def test_send_message_requires_query(self, app, client): + """POST without query should return 400.""" + self._login(app, client) + response = client.post( + "/manuals/chat/api/message", + json={}, + content_type="application/json", + ) + assert response.status_code == 400 + + @patch("routes.chat.get_llm_service") + @patch("routes.chat.stream_chat_response") + def test_send_message_streams(self, mock_stream, mock_get_llm, app, client): + """POST with query should return SSE stream.""" + self._login(app, client) + mock_get_llm.return_value = MagicMock() + mock_stream.return_value = iter(["Hello", " world"]) + + response = client.post( + "/manuals/chat/api/message", + json={"query": "test question"}, + content_type="application/json", + ) + assert response.status_code == 200 + assert response.content_type.startswith("text/event-stream") + + def test_list_sessions_empty(self, app, client): + """Should return empty list when no sessions exist.""" + self._login(app, client) + response = client.get("/manuals/chat/api/sessions") + assert response.status_code == 200 + data = json.loads(response.data) + assert data == [] + + def test_get_nonexistent_session(self, app, client): + """Should return 404 for missing session.""" + self._login(app, client) + response = client.get("/manuals/chat/api/sessions/999") + assert response.status_code == 404 + + def test_delete_nonexistent_session(self, app, client): + """Should return 404 for missing session.""" + self._login(app, client) + response = client.delete("/manuals/chat/api/sessions/999") + assert response.status_code == 404 + + +# ───────────────────────────────────────────────────────────────── +# Integration Test: ChatSession model +# ───────────────────────────────────────────────────────────────── + +class TestChatSessionModel: + """Test ChatSession model.""" + + def test_create_session(self, app): + """Should create a chat session.""" + from models import db, ChatSession, User, UserRole + + with app.app_context(): + user = User(username="chattest", role=UserRole.ENGINEER) + user.set_password("pass123") + db.session.add(user) + db.session.commit() + uid = user.id + + session = ChatSession(user_id=uid) + session.set_messages([ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ]) + db.session.add(session) + db.session.commit() + + fetched = ChatSession.query.first() + assert fetched is not None + msgs = fetched.get_messages() + assert len(msgs) == 2 + assert msgs[0]["role"] == "user" + + def test_session_to_dict(self, app): + """Should serialize session to dict.""" + from models import db, ChatSession, User, UserRole + + with app.app_context(): + user = User(username="chattest2", role=UserRole.ENGINEER) + user.set_password("pass123") + db.session.add(user) + db.session.commit() + uid = user.id + + session = ChatSession(user_id=uid) + session.set_messages([{"role": "user", "content": "test"}]) + db.session.add(session) + db.session.commit() + + d = session.to_dict() + assert "id" in d + assert "messages" in d + assert len(d["messages"]) == 1 From a9224b75d3e83b96d2c46822b6f341174a84dea2 Mon Sep 17 00:00:00 2001 From: Derek Parent Date: Thu, 5 Feb 2026 23:09:17 -0800 Subject: [PATCH 2/3] refactor: rewire chat to use search_manuals() with two-phase context Replace duplicate FTS5 query in get_context_for_llm() with thin wrapper around search_manuals(). Both search UI and LLM now share one search path with equipment filters, authority boost, and tag-aware ranking. - Rewrite system prompt for collaborative guide role (triage, not oracle) - Add format_search_results() and format_page_content() context formats - Add detect_equipment() auto-detection from query text - Add equipment filter dropdown to chat UI - Add get_pages_content() for future deep-dive phase - Expand tests from 21 to 40 covering all new functions Co-authored-by: Cursor --- src/prompts/manuals_assistant.py | 123 ++++++++-- src/services/chat_service.py | 85 ++++++- src/services/manuals_service.py | 111 ++++----- templates/base.html | 1 + templates/manuals/chat.html | 41 +++- tests/test_chat.py | 373 +++++++++++++++++++++++++------ 6 files changed, 587 insertions(+), 147 deletions(-) diff --git a/src/prompts/manuals_assistant.py b/src/prompts/manuals_assistant.py index 4a86cb5..3f7d929 100644 --- a/src/prompts/manuals_assistant.py +++ b/src/prompts/manuals_assistant.py @@ -4,15 +4,40 @@ This is where response quality lives. The prompt grounds the LLM in retrieved manual content, enforces citation discipline, and prevents hallucination of safety-critical specs. + +Two context formats: + - : Triage phase — snippets + page refs for grouping + - : Deep-dive phase — full page text for walkthrough """ +from typing import Optional + + SYSTEM_PROMPT = """\ -You are a marine engineering assistant specializing in Caterpillar diesel engines \ -(3516, C18, C32, C4.4) aboard commercial vessels. You help engineers troubleshoot \ -problems, find procedures, and look up specifications using the ship's indexed \ -technical manuals. +You are a marine engineering assistant helping an experienced Chief Engineer \ +navigate CAT engine manuals (3516, C18, C32, C4.4). The engineer knows these \ +engines well — your job is to help them find the right section quickly and \ +interpret technical content together. + +## How You Work + +1. **When given search results:** TRIAGE them. Group by topic (procedure vs \ +troubleshooting vs specifications vs parts). Identify the most relevant \ +pages and suggest directions: "Pages 48-49 cover the adjustment procedure, \ +pages 52-54 cover bridge adjustment specs. Which do you need?" + +2. **When given full page content:** WALK THROUGH it collaboratively. Summarize \ +the key steps or specs, highlight safety-critical values (torque, clearances, \ +pressures), and be ready to explain or clarify. Reference step numbers. + +3. **The engineer drives, you guide.** Suggest directions, don't decide. \ +Reference specific page numbers so they can follow along in their physical \ +manual. -## Rules +4. **Be specific about what you see.** Say "I found 13 results, 8 are from \ +the Testing & Adjusting manual" not just "I found some results." + +## Citation Rules 1. **Use only the provided manual excerpts.** Every factual claim must reference \ a specific source document and page number. Format citations as [Document Name, p.XX]. @@ -25,6 +50,8 @@ pressure limits, or temperature thresholds, reproduce the exact wording from the manual \ and add: "Verify against your physical manual before performing this procedure." +## Scope Rules + 4. **Ask for clarification when needed.** If the question is ambiguous, ask about: \ the specific engine model, symptoms, operating conditions, or which system is affected. @@ -38,36 +65,86 @@ 7. **Be direct.** Engineers need answers, not disclaimers. Lead with the answer, \ then provide supporting detail. -## Context format +## Context Format -You will receive manual excerpts in tags. Each excerpt includes the source \ -document name, page number, equipment model, and document type. Use these for citations.\ +You receive context in two formats: +- : Summary list with snippets. Use for triage — suggest directions. +- : Full page text. Use for deep-dive — walk through together.\ """ -def format_context(results: list[dict]) -> str: - """Format RAG search results into structured context for the LLM. +def format_search_results( + results: list[dict], + query: str, + equipment: Optional[str] = None, +) -> str: + """Format search results as triage context for the LLM. + + Produces a numbered list inside tags. The LLM + groups these by topic and suggests directions to the engineer. Args: results: List of dicts from get_context_for_llm() with keys: - content, filename, page_num, equipment, doc_type, authority + filename, page_num, equipment, doc_type, snippet, authority, score + query: Original search query + equipment: Equipment filter used (if any) Returns: - Formatted context string with citation markers. + Formatted search results string with numbered entries. """ if not results: - return "\nNo relevant manual excerpts found for this query.\n" + return ( + f'\n' + "No results found.\n" + "" + ) + + equip_attr = f' equipment="{equipment}"' if equipment else "" + parts = [f''] - parts = [""] for i, r in enumerate(results, 1): - authority_note = f" [{r.get('authority', 'unset').upper()}]" if r.get("authority") not in ("unset", None) else "" + authority_tag = "" + if r.get("authority") not in ("unset", None): + authority_tag = f" [{r['authority'].upper()}]" + + # Strip HTML tags from snippet — LLM gets plain text + snippet = r.get("snippet", "").replace("", "").replace("", "") + doc_type_label = r.get("doc_type", "").upper() + + parts.append( + f"{i}. {r['filename']} | Page {r['page_num']}" + f" | {doc_type_label}{authority_tag}\n" + f' "{snippet}"' + ) + + parts.append("") + return "\n".join(parts) + + +def format_page_content(pages: list[dict]) -> str: + """Format full page content for deep-dive context. + + Used when the engineer picks specific pages after triage. + The LLM walks through the content collaboratively. + + Args: + pages: List of dicts from get_pages_content() with keys: + content, filename, page_num, equipment, doc_type + + Returns: + Formatted page content string inside tags. + """ + if not pages: + return "\nNo page content available.\n" + + parts = [""] + for p in pages: parts.append( - f"--- Excerpt {i}{authority_note} ---\n" - f"Source: {r['filename']}, Page {r['page_num']}\n" - f"Equipment: {r['equipment']} | Type: {r['doc_type']}\n\n" - f"{r['content']}\n" + f"--- {p['filename']}, Page {p['page_num']} " + f"({p['equipment']} | {p['doc_type']}) ---\n\n" + f"{p['content']}\n" ) - parts.append("") + parts.append("") return "\n".join(parts) @@ -79,8 +156,10 @@ def build_messages( """Assemble system prompt + context and message list for the LLM. Args: - context: Formatted context string from format_context() - history: Previous conversation turns as [{"role": "user"|"assistant", "content": "..."}] + context: Formatted context string from format_search_results() + or format_page_content() + history: Previous conversation turns as + [{"role": "user"|"assistant", "content": "..."}] query: Current user query Returns: diff --git a/src/services/chat_service.py b/src/services/chat_service.py index ae59593..18f0a4c 100644 --- a/src/services/chat_service.py +++ b/src/services/chat_service.py @@ -3,28 +3,82 @@ Orchestrates the RAG pipeline: query → search → context → LLM → response. Manages conversation history and token budgets. + +Two-phase context: + Phase 1 (triage): search_manuals() snippets → LLM groups & suggests directions + Phase 2 (follow-up): re-search with refined query → LLM narrows focus """ import logging +import re from collections.abc import Iterator from typing import Optional from services.llm_service import LLMService, LLMServiceError, get_llm_service from services.manuals_service import get_context_for_llm, search_manuals -from prompts.manuals_assistant import format_context, build_messages +from prompts.manuals_assistant import format_search_results, build_messages logger = logging.getLogger(__name__) -# Token budget: Sonnet 4.5 has 200k context, but we stay lean +# Token budget: Sonnet has 200k context, but we stay lean MAX_CONTEXT_TOKENS = 4000 # RAG excerpts budget MAX_HISTORY_TOKENS = 3000 # Conversation history budget MAX_TURNS = 10 # Max conversation turns kept +# Equipment detection: case-insensitive match for known engine models +_EQUIPMENT_PATTERN = re.compile(r"\b(3516|C18|C32|C4\.4)\b", re.IGNORECASE) + +# Canonical equipment names (normalize case from regex matches) +_EQUIPMENT_CANONICAL = { + "3516": "3516", + "c18": "C18", + "c32": "C32", + "c4.4": "C4.4", +} + class ChatServiceError(Exception): """Raised when the chat service encounters an error.""" +def detect_equipment(query: str) -> Optional[str]: + """Auto-detect equipment model from query text. + + Scans for known CAT engine identifiers. Returns the first match + normalized to canonical form, or None if no equipment found. + + Known values: 3516, C18, C32, C4.4 + + Args: + query: User's question text + + Returns: + Canonical equipment string or None + """ + match = _EQUIPMENT_PATTERN.search(query) + if match: + return _EQUIPMENT_CANONICAL.get(match.group(1).lower(), match.group(1)) + return None + + +def _resolve_equipment( + explicit: Optional[str], + query: str, +) -> Optional[str]: + """Resolve equipment filter: explicit dropdown wins, then auto-detect. + + Args: + explicit: Equipment value from UI dropdown (None or empty = not set) + query: User's query text for auto-detection fallback + + Returns: + Equipment filter string or None + """ + if explicit: + return explicit + return detect_equipment(query) + + def get_chat_response( query: str, history: list[dict], @@ -37,7 +91,7 @@ def get_chat_response( Args: query: User's question history: Previous conversation turns - equipment: Optional equipment filter for RAG search + equipment: Optional equipment filter from UI dropdown max_context_tokens: Token budget for RAG context max_turns: Max history turns to include @@ -51,12 +105,19 @@ def get_chat_response( if not llm: raise ChatServiceError("Chat assistant is not configured (missing API key)") + # Resolve equipment: dropdown wins, then auto-detect from query + resolved_equipment = _resolve_equipment(equipment, query) + # Trim history to max turns trimmed_history = _trim_history(history, max_turns, llm) - # Retrieve RAG context - context_results = get_context_for_llm(query, equipment=equipment, limit=5) - context_str = format_context(context_results) + # Retrieve RAG context via search_manuals() (single search path) + context_results = get_context_for_llm( + query, equipment=resolved_equipment, limit=10 + ) + context_str = format_search_results( + context_results, query, equipment=resolved_equipment + ) # Trim context if over budget context_str = _trim_to_token_budget(context_str, max_context_tokens, llm) @@ -92,10 +153,18 @@ def stream_chat_response( if not llm: raise ChatServiceError("Chat assistant is not configured (missing API key)") + # Resolve equipment: dropdown wins, then auto-detect from query + resolved_equipment = _resolve_equipment(equipment, query) + trimmed_history = _trim_history(history, max_turns, llm) - context_results = get_context_for_llm(query, equipment=equipment, limit=5) - context_str = format_context(context_results) + # Retrieve RAG context via search_manuals() (single search path) + context_results = get_context_for_llm( + query, equipment=resolved_equipment, limit=10 + ) + context_str = format_search_results( + context_results, query, equipment=resolved_equipment + ) context_str = _trim_to_token_budget(context_str, max_context_tokens, llm) system, messages = build_messages(context_str, trimmed_history, query) diff --git a/src/services/manuals_service.py b/src/services/manuals_service.py index 470d6e9..0637317 100644 --- a/src/services/manuals_service.py +++ b/src/services/manuals_service.py @@ -620,81 +620,88 @@ def search_manuals( def get_context_for_llm( query: str, equipment: Optional[str] = None, - limit: int = 5, + limit: int = 10, ) -> list[dict]: """ - Retrieve structured manual context for the LLM assistant. + Retrieve search results for LLM context via search_manuals(). - Returns top search results with full page content (not snippets) - formatted for RAG context injection. + Single search path: both the search UI and LLM use the same + ranking (authority boost, phrase boost, tag-aware boost). + Returns summaries (snippets) for triage, not full page content. Args: query: User's question equipment: Optional equipment filter - limit: Max excerpts to return + limit: Max results to return Returns: - List of dicts with: content, filename, page_num, equipment, doc_type, authority + List of dicts with: filename, page_num, equipment, doc_type, + snippet, authority, score """ - conn = load_manuals_database() - if not conn: - return [] + results = search_manuals( + query, equipment=equipment, boost_primary=True, limit=limit + ) + return [ + { + "filename": r["filename"], + "page_num": r["page_num"], + "equipment": r["equipment"], + "doc_type": r["doc_type"], + "snippet": r["snippet"], + "authority": r["authority"], + "score": r["score"], + } + for r in results + ] - try: - cursor = conn.cursor() - fts_query = prepare_search_query(query) - where_parts = [] - params = [] +def get_pages_content( + filename: str, + page_nums: list[int], +) -> list[dict]: + """ + Fetch full page content by filename and page numbers. - if equipment: - where_parts.append("p.equipment = ?") - params.append(equipment) + Used for deep-dive phase: after the LLM triages search results + and the user picks specific pages to examine. - where_clause = " AND ".join(where_parts) if where_parts else "1=1" + Args: + filename: Document filename (e.g. 'kenr5403-00_3516-testing-&-adjusting') + page_nums: List of page numbers to fetch - sql = f""" - SELECT - p.filepath, - p.filename, - p.equipment, - p.doc_type, - p.page_num, - p.content, - bm25(pages_fts) as score - FROM pages_fts - JOIN pages p ON pages_fts.rowid = p.id - WHERE pages_fts MATCH ? - AND {where_clause} - ORDER BY bm25(pages_fts) - LIMIT ? - """ - params = [fts_query] + params + [limit * 2] + Returns: + List of dicts with: content, filename, page_num, equipment, doc_type + """ + if not page_nums: + return [] - cursor.execute(sql, params) - rows = cursor.fetchall() + conn = load_manuals_database() + if not conn: + return [] - _init_authority_table(conn) + try: + cursor = conn.cursor() + placeholders = ",".join("?" * len(page_nums)) + cursor.execute( + f""" + SELECT filepath, filename, equipment, doc_type, page_num, content + FROM pages + WHERE filename = ? AND page_num IN ({placeholders}) + ORDER BY page_num + """, + [filename] + list(page_nums), + ) - results = [] - for row in rows: - authority = _get_authority_for_filepath(conn, row["filepath"]) - results.append({ + return [ + { "content": row["content"].strip(), "filename": row["filename"], "page_num": row["page_num"], "equipment": row["equipment"], "doc_type": row["doc_type"], - "authority": authority, - "score": abs(row["score"]), - }) - - # Sort by authority (primary first) then by score - authority_order = {"primary": 0, "secondary": 1, "unset": 2, "mention": 3} - results.sort(key=lambda r: (authority_order.get(r["authority"], 2), r["score"])) - - return results[:limit] - + } + for row in cursor.fetchall() + ] except sqlite3.OperationalError: return [] finally: diff --git a/templates/base.html b/templates/base.html index c791982..376d459 100644 --- a/templates/base.html +++ b/templates/base.html @@ -5,6 +5,7 @@ + {% block title %}Engine Room Status{% endblock %} diff --git a/templates/manuals/chat.html b/templates/manuals/chat.html index 6c881b6..ba19ab7 100644 --- a/templates/manuals/chat.html +++ b/templates/manuals/chat.html @@ -221,6 +221,34 @@ border-top: 1px solid var(--border-default); } +.chat-filter-row { + display: flex; + align-items: center; + gap: var(--space-sm); + margin-bottom: var(--space-sm); +} + +.chat-filter-row label { + font-size: 0.8rem; + color: var(--text-muted, #484f58); + white-space: nowrap; +} + +.equipment-filter { + padding: var(--space-xs) var(--space-sm); + font-size: 0.85rem; + background: var(--bg-input, #0d1117); + color: var(--text-primary); + border: 1px solid var(--border-default); + border-radius: var(--radius-sm); + cursor: pointer; +} + +.equipment-filter:focus { + outline: none; + border-color: var(--accent-primary); +} + .chat-input-row { display: flex; gap: var(--space-sm); @@ -387,6 +415,16 @@

Ask about CAT engine manuals

+
+ + +